ARM: VIXL32: Implement more codegen visitors.

~50 tests now start to pass.

Test: export ART_USE_VIXL_ARM_BACKEND=true && \
      mma test-art-host dist && mma test-art-target dist

Change-Id: I5b1a032f11e2c0fda2ea2c6a0f1b1a4b2668964a
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 9e74a9d..665e5e1 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -65,6 +65,7 @@
 
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr size_t kArmInstrMaxSizeInBytes = 4u;
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
 #ifdef __
 #error "ARM Codegen VIXL macro-assembler macro already defined."
@@ -657,6 +658,7 @@
                     compiler_options,
                     stats),
       block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
@@ -675,9 +677,44 @@
   GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d15);
 }
 
+void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
+  uint32_t num_entries = switch_instr_->GetNumEntries();
+  DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
+
+  // We are about to use the assembler to place literals directly. Make sure we have enough
+  // underlying code buffer and we have generated the jump table with right size.
+  codegen->GetVIXLAssembler()->GetBuffer().Align();
+  AssemblerAccurateScope aas(codegen->GetVIXLAssembler(),
+                             num_entries * sizeof(int32_t),
+                             CodeBufferCheckScope::kMaximumSize);
+  // TODO(VIXL): Check that using lower case bind is fine here.
+  codegen->GetVIXLAssembler()->bind(&table_start_);
+  const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
+  for (uint32_t i = 0; i < num_entries; i++) {
+    vixl32::Label* target_label = codegen->GetLabelOf(successors[i]);
+    DCHECK(target_label->IsBound());
+    int32_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
+    // When doing BX to address we need to have lower bit set to 1 in T32.
+    if (codegen->GetVIXLAssembler()->IsUsingT32()) {
+      jump_offset++;
+    }
+    DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
+    DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
+    vixl32::Literal<int32_t> literal(jump_offset);
+    codegen->GetVIXLAssembler()->place(&literal);
+  }
+}
+
+void CodeGeneratorARMVIXL::EmitJumpTables() {
+  for (auto&& jump_table : jump_tables_) {
+    jump_table->EmitTable(this);
+  }
+}
+
 #define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()->  // NOLINT
 
 void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
+  EmitJumpTables();
   GetAssembler()->FinalizeCode();
   CodeGenerator::Finalize(allocator);
 }
@@ -1253,6 +1290,14 @@
   __ Bind(&false_target);
 }
 
+void LocationsBuilderARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo*) {
+  // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+}
+
 void CodeGeneratorARMVIXL::GenerateNop() {
   __ Nop();
 }
@@ -2495,7 +2540,12 @@
         locations->SetInAt(1, Location::RequiresRegister());
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       } else {
-        TODO_VIXL32(FATAL);
+        InvokeRuntimeCallingConventionARMVIXL calling_convention;
+        locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+        locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+        // Note: divrem will compute both the quotient and the remainder as the pair R0 and R1, but
+        //       we only need the former.
+        locations->SetOut(LocationFrom(r0));
       }
       break;
     }
@@ -2532,7 +2582,13 @@
       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
         __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1));
       } else {
-        TODO_VIXL32(FATAL);
+        InvokeRuntimeCallingConventionARMVIXL calling_convention;
+        DCHECK(calling_convention.GetRegisterAt(0).Is(RegisterFrom(lhs)));
+        DCHECK(calling_convention.GetRegisterAt(1).Is(RegisterFrom(rhs)));
+        DCHECK(r0.Is(OutputRegister(div)));
+
+        codegen_->InvokeRuntime(kQuickIdivmod, div, div->GetDexPc());
+        CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
       }
       break;
     }
@@ -2561,6 +2617,140 @@
   }
 }
 
+void LocationsBuilderARMVIXL::VisitRem(HRem* rem) {
+  Primitive::Type type = rem->GetResultType();
+
+  // Most remainders are implemented in the runtime.
+  LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
+  if (rem->GetResultType() == Primitive::kPrimInt && rem->InputAt(1)->IsConstant()) {
+    // sdiv will be replaced by other instruction sequence.
+    call_kind = LocationSummary::kNoCall;
+  } else if ((rem->GetResultType() == Primitive::kPrimInt)
+             && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+    // Have hardware divide instruction for int, do it with three instructions.
+    call_kind = LocationSummary::kNoCall;
+  }
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+
+  switch (type) {
+    case Primitive::kPrimInt: {
+      if (rem->InputAt(1)->IsConstant()) {
+        locations->SetInAt(0, Location::RequiresRegister());
+        locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+        int32_t value = rem->InputAt(1)->AsIntConstant()->GetValue();
+        if (value == 1 || value == 0 || value == -1) {
+          // No temp register required.
+        } else {
+          locations->AddTemp(Location::RequiresRegister());
+          if (!IsPowerOfTwo(AbsOrMin(value))) {
+            locations->AddTemp(Location::RequiresRegister());
+          }
+        }
+      } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+        locations->SetInAt(0, Location::RequiresRegister());
+        locations->SetInAt(1, Location::RequiresRegister());
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+        locations->AddTemp(Location::RequiresRegister());
+      } else {
+        InvokeRuntimeCallingConventionARMVIXL calling_convention;
+        locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+        locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+        // Note: divrem will compute both the quotient and the remainder as the pair R0 and R1, but
+        //       we only need the latter.
+        locations->SetOut(LocationFrom(r1));
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConventionARMVIXL calling_convention;
+      locations->SetInAt(0, LocationFrom(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, LocationFrom(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      // The runtime helper puts the output in R2,R3.
+      locations->SetOut(LocationFrom(r2, r3));
+      break;
+    }
+    case Primitive::kPrimFloat: {
+      InvokeRuntimeCallingConventionARMVIXL calling_convention;
+      locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+      locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
+      locations->SetOut(LocationFrom(s0));
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      InvokeRuntimeCallingConventionARMVIXL calling_convention;
+      locations->SetInAt(0, LocationFrom(
+          calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1)));
+      locations->SetInAt(1, LocationFrom(
+          calling_convention.GetFpuRegisterAt(2), calling_convention.GetFpuRegisterAt(3)));
+      locations->SetOut(LocationFrom(s0, s1));
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
+  LocationSummary* locations = rem->GetLocations();
+  Location second = locations->InAt(1);
+
+  Primitive::Type type = rem->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+        vixl32::Register reg1 = InputRegisterAt(rem, 0);
+        vixl32::Register out_reg = OutputRegister(rem);
+        if (second.IsConstant()) {
+          GenerateDivRemConstantIntegral(rem);
+        } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+        vixl32::Register reg2 = RegisterFrom(second);
+        vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+
+        // temp = reg1 / reg2  (integer division)
+        // dest = reg1 - temp * reg2
+        __ Sdiv(temp, reg1, reg2);
+        __ Mls(out_reg, temp, reg2, reg1);
+      } else {
+        InvokeRuntimeCallingConventionARMVIXL calling_convention;
+        DCHECK(reg1.Is(calling_convention.GetRegisterAt(0)));
+        DCHECK(RegisterFrom(second).Is(calling_convention.GetRegisterAt(1)));
+        DCHECK(out_reg.Is(r1));
+
+        codegen_->InvokeRuntime(kQuickIdivmod, rem, rem->GetDexPc());
+        CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      codegen_->InvokeRuntime(kQuickLmod, rem, rem->GetDexPc());
+        CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      codegen_->InvokeRuntime(kQuickFmodf, rem, rem->GetDexPc());
+      CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      codegen_->InvokeRuntime(kQuickFmod, rem, rem->GetDexPc());
+      CheckEntrypointTypes<kQuickFmod, double, double, double>();
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
+
 void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/
   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
@@ -5254,6 +5444,24 @@
   __ Bind(type_check_slow_path->GetExitLabel());
 }
 
+void LocationsBuilderARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
+  codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
+                          instruction,
+                          instruction->GetDexPc());
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
+}
+
 void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
   HandleBitwiseOperation(instruction, AND);
 }
@@ -5656,6 +5864,103 @@
   __ Blx(lr);
 }
 
+void LocationsBuilderARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  LOG(FATAL) << "Unreachable";
+}
+
+// Simple implementation of packed switch - generate cascaded compare/jumps.
+void LocationsBuilderARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
+      codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
+    locations->AddTemp(Location::RequiresRegister());  // We need a temp for the table base.
+    if (switch_instr->GetStartValue() != 0) {
+      locations->AddTemp(Location::RequiresRegister());  // We need a temp for the bias.
+    }
+  }
+}
+
+// TODO(VIXL): Investigate and reach the parity with old arm codegen.
+void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  uint32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  vixl32::Register value_reg = InputRegisterAt(switch_instr, 0);
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  if (num_entries <= kPackedSwitchCompareJumpThreshold ||
+      !codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
+    // Create a series of compare/jumps.
+    UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+    vixl32::Register temp_reg = temps.Acquire();
+    // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
+    // the immediate, because IP is used as the destination register. For the other
+    // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
+    // and they can be encoded in the instruction without making use of IP register.
+    __ Adds(temp_reg, value_reg, -lower_bound);
+
+    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+    // Jump to successors[0] if value == lower_bound.
+    __ B(eq, codegen_->GetLabelOf(successors[0]));
+    int32_t last_index = 0;
+    for (; num_entries - last_index > 2; last_index += 2) {
+      __ Adds(temp_reg, temp_reg, -2);
+      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+      __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
+      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
+    }
+    if (num_entries - last_index == 2) {
+      // The last missing case_value.
+      __ Cmp(temp_reg, 1);
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
+    }
+
+    // And the default for any other value.
+    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ B(codegen_->GetLabelOf(default_block));
+    }
+  } else {
+    // Create a table lookup.
+    vixl32::Register table_base = RegisterFrom(locations->GetTemp(0));
+
+    JumpTableARMVIXL* jump_table = codegen_->CreateJumpTable(switch_instr);
+
+    // Remove the bias.
+    vixl32::Register key_reg;
+    if (lower_bound != 0) {
+      key_reg = RegisterFrom(locations->GetTemp(1));
+      __ Sub(key_reg, value_reg, lower_bound);
+    } else {
+      key_reg = value_reg;
+    }
+
+    // Check whether the value is in the table, jump to default block if not.
+    __ Cmp(key_reg, num_entries - 1);
+    __ B(hi, codegen_->GetLabelOf(default_block));
+
+    UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+    vixl32::Register jump_offset = temps.Acquire();
+
+    // Load jump offset from the table.
+    __ Adr(table_base, jump_table->GetTableStartLabel());
+    __ Ldr(jump_offset, MemOperand(table_base, key_reg, vixl32::LSL, 2));
+
+    // Jump to target block by branching to table_base(pc related) + offset.
+    vixl32::Register target_address = table_base;
+    __ Add(target_address, table_base, jump_offset);
+    __ Bx(target_address);
+  }
+}
+
 // Copy the result of a call into the given target.
 void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
@@ -5684,6 +5989,17 @@
   }
 }
 
+void LocationsBuilderARMVIXL::VisitClassTableGet(
+    HClassTableGet* instruction ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitClassTableGet(
+    HClassTableGet* instruction ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+}
+
+
 #undef __
 #undef QUICK_ENTRY_POINT
 #undef TODO_VIXL32