Opt compiler: Add arm64 support for register allocation. Change-Id: Idc6e84eee66170de4a9c0a5844c3da038c083aa7

commit: 3e69f16ae3fddfd24f4f0e29deb106d564ab296c [log] [tgz]
author: Alexandre Rames <alexandre.rames@arm.com> Wed Dec 10 10:36:50 2014 +0000
committer: Alexandre Rames <alexandre.rames@arm.com> Wed Dec 10 14:36:14 2014 +0000
tree: c796b1a2f71d4410af9fbdcb970b548a681f3955
parent: 776b880f66edb21cb3b4225877e494ec7a9ec1a2 [diff]
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index a61ef2d..b048c07 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc

@@ -90,10 +90,12 @@
 }
 
 Register XRegisterFrom(Location location) {
+  DCHECK(location.IsRegister());
   return Register::XRegFromCode(VIXLRegCodeFromART(location.reg()));
 }
 
 Register WRegisterFrom(Location location) {
+  DCHECK(location.IsRegister());
   return Register::WRegFromCode(VIXLRegCodeFromART(location.reg()));
 }
 
@@ -112,10 +114,12 @@
 }
 
 FPRegister DRegisterFrom(Location location) {
+  DCHECK(location.IsFpuRegister());
   return FPRegister::DRegFromCode(location.reg());
 }
 
 FPRegister SRegisterFrom(Location location) {
+  DCHECK(location.IsFpuRegister());
   return FPRegister::SRegFromCode(location.reg());
 }
 
@@ -133,6 +137,11 @@
                         instr->InputAt(input_index)->GetType());
 }
 
+CPURegister CPURegisterFrom(Location location, Primitive::Type type) {
+  return IsFPType(type) ? CPURegister(FPRegisterFrom(location, type))
+                        : CPURegister(RegisterFrom(location, type));
+}
+
 CPURegister OutputCPURegister(HInstruction* instr) {
   return IsFPType(instr->GetType()) ? static_cast<CPURegister>(OutputFPRegister(instr))
                                     : static_cast<CPURegister>(OutputRegister(instr));
@@ -266,14 +275,32 @@
 
 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  BoundsCheckSlowPathARM64() {}
+  BoundsCheckSlowPathARM64(HBoundsCheck* instruction,
+                           Location index_location,
+                           Location length_location)
+      : instruction_(instruction),
+        index_location_(index_location),
+        length_location_(length_location) {}
+
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
-    __ Brk(__LINE__);  // TODO: Unimplemented BoundsCheckSlowPathARM64.
+    // We're moving two locations to locations that could overlap, so we need a parallel
+    // move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    codegen->EmitParallelMoves(
+        index_location_, LocationFrom(calling_convention.GetRegisterAt(0)),
+        length_location_, LocationFrom(calling_convention.GetRegisterAt(1)));
+    arm64_codegen->InvokeRuntime(
+        QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc());
   }
 
  private:
+  HBoundsCheck* const instruction_;
+  const Location index_location_;
+  const Location length_location_;
+
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
 };
 
@@ -322,7 +349,7 @@
     if (out.IsValid()) {
       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
       Primitive::Type type = at_->GetType();
-      arm64_codegen->MoveHelper(out, calling_convention.GetReturnLocation(type), type);
+      arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
     }
 
     codegen->RestoreLiveRegisters(locations);
@@ -364,7 +391,7 @@
     arm64_codegen->InvokeRuntime(
         QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc());
     Primitive::Type type = instruction_->GetType();
-    arm64_codegen->MoveHelper(locations->Out(), calling_convention.GetReturnLocation(type), type);
+    arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
 
     codegen->RestoreLiveRegisters(locations);
     __ B(GetExitLabel());
@@ -445,15 +472,51 @@
 
 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  TypeCheckSlowPathARM64() {}
+  TypeCheckSlowPathARM64(HInstruction* instruction,
+                         Location class_to_check,
+                         Location object_class,
+                         uint32_t dex_pc)
+      : instruction_(instruction),
+        class_to_check_(class_to_check),
+        object_class_(object_class),
+        dex_pc_(dex_pc) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(instruction_->IsCheckCast()
+           || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+
     __ Bind(GetEntryLabel());
-    __ Brk(__LINE__);  // TODO: Unimplemented TypeCheckSlowPathARM64.
+    codegen->SaveLiveRegisters(locations);
+
+    // We're moving two locations to locations that could overlap, so we need a parallel
+    // move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    codegen->EmitParallelMoves(
+        class_to_check_, LocationFrom(calling_convention.GetRegisterAt(0)),
+        object_class_, LocationFrom(calling_convention.GetRegisterAt(1)));
+
+    if (instruction_->IsInstanceOf()) {
+      arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc_);
+      Primitive::Type ret_type = instruction_->GetType();
+      Location ret_loc = calling_convention.GetReturnLocation(ret_type);
+      arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
+    } else {
+      DCHECK(instruction_->IsCheckCast());
+      arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc_);
+    }
+
+    codegen->RestoreLiveRegisters(locations);
     __ B(GetExitLabel());
   }
 
  private:
+  HInstruction* const instruction_;
+  const Location class_to_check_;
+  const Location object_class_;
+  uint32_t dex_pc_;
+
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
 };
 
@@ -487,7 +550,8 @@
                     kNumberOfAllocatableRegisterPairs),
       block_labels_(nullptr),
       location_builder_(graph, this),
-      instruction_visitor_(graph, this) {}
+      instruction_visitor_(graph, this),
+      move_resolver_(graph->GetArena(), this) {}
 
 #undef __
 #define __ GetVIXLAssembler()->
@@ -498,6 +562,24 @@
   CodeGenerator::Finalize(allocator);
 }
 
+void ParallelMoveResolverARM64::EmitMove(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  codegen_->MoveLocation(move->GetDestination(), move->GetSource());
+}
+
+void ParallelMoveResolverARM64::EmitSwap(size_t index) {
+  MoveOperands* move = moves_.Get(index);
+  codegen_->SwapLocations(move->GetDestination(), move->GetSource());
+}
+
+void ParallelMoveResolverARM64::RestoreScratch(int reg) {
+  __ Pop(Register(VIXLRegCodeFromART(reg), kXRegSize));
+}
+
+void ParallelMoveResolverARM64::SpillScratch(int reg) {
+  __ Push(Register(VIXLRegCodeFromART(reg), kXRegSize));
+}
+
 void CodeGeneratorARM64::GenerateFrameEntry() {
   bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod();
   if (do_overflow_check) {
@@ -571,18 +653,18 @@
     }
   } else if (instruction->IsTemporary()) {
     Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    MoveHelper(location, temp_location, type);
+    MoveLocation(location, temp_location, type);
   } else if (instruction->IsLoadLocal()) {
     uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
     if (Is64BitType(type)) {
-      MoveHelper(location, Location::DoubleStackSlot(stack_slot), type);
+      MoveLocation(location, Location::DoubleStackSlot(stack_slot), type);
     } else {
-      MoveHelper(location, Location::StackSlot(stack_slot), type);
+      MoveLocation(location, Location::StackSlot(stack_slot), type);
     }
 
   } else {
     DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
-    MoveHelper(location, locations->Out(), type);
+    MoveLocation(location, locations->Out(), type);
   }
 }
 
@@ -665,6 +747,30 @@
   }
 }
 
+size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
+  Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
+  __ Str(reg, MemOperand(sp, stack_index));
+  return kArm64WordSize;
+}
+
+size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
+  Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
+  __ Ldr(reg, MemOperand(sp, stack_index));
+  return kArm64WordSize;
+}
+
+size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  FPRegister reg = FPRegister(reg_id, kDRegSize);
+  __ Str(reg, MemOperand(sp, stack_index));
+  return kArm64WordSize;
+}
+
+size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  FPRegister reg = FPRegister(reg_id, kDRegSize);
+  __ Ldr(reg, MemOperand(sp, stack_index));
+  return kArm64WordSize;
+}
+
 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << Arm64ManagedRegister::FromXRegister(XRegister(reg));
 }
@@ -686,58 +792,162 @@
   }
 }
 
-void CodeGeneratorARM64::MoveHelper(Location destination,
-                                    Location source,
-                                    Primitive::Type type) {
+
+static bool CoherentConstantAndType(Location constant, Primitive::Type type) {
+  DCHECK(constant.IsConstant());
+  HConstant* cst = constant.GetConstant();
+  return (cst->IsIntConstant() && type == Primitive::kPrimInt) ||
+         (cst->IsLongConstant() && type == Primitive::kPrimLong) ||
+         (cst->IsFloatConstant() && type == Primitive::kPrimFloat) ||
+         (cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
+}
+
+void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Primitive::Type type) {
   if (source.Equals(destination)) {
     return;
   }
-  if (destination.IsRegister()) {
-    Register dst = RegisterFrom(destination, type);
-    if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
-      DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
-      __ Ldr(dst, StackOperandFrom(source));
-    } else {
-      __ Mov(dst, OperandFrom(source, type));
-    }
-  } else if (destination.IsFpuRegister()) {
-    FPRegister dst = FPRegisterFrom(destination, type);
-    if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
-      DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
-      __ Ldr(dst, StackOperandFrom(source));
-    } else if (source.IsFpuRegister()) {
-      __ Fmov(dst, FPRegisterFrom(source, type));
-    } else {
-      MoveConstant(dst, source.GetConstant());
-    }
-  } else {
-    DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
-    if (source.IsRegister()) {
-      __ Str(RegisterFrom(source, type), StackOperandFrom(destination));
-    } else if (source.IsFpuRegister()) {
-      __ Str(FPRegisterFrom(source, type), StackOperandFrom(destination));
-    } else if (source.IsConstant()) {
-      UseScratchRegisterScope temps(GetVIXLAssembler());
-      HConstant* cst = source.GetConstant();
-      CPURegister temp;
-      if (cst->IsIntConstant() || cst->IsLongConstant()) {
-        temp = cst->IsIntConstant() ? temps.AcquireW() : temps.AcquireX();
+
+  // A valid move can always be inferred from the destination and source
+  // locations. When moving from and to a register, the argument type can be
+  // used to generate 32bit instead of 64bit moves. In debug mode we also
+  // checks the coherency of the locations and the type.
+  bool unspecified_type = (type == Primitive::kPrimVoid);
+
+  if (destination.IsRegister() || destination.IsFpuRegister()) {
+    if (unspecified_type) {
+      HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
+      if (source.IsStackSlot() ||
+          (src_cst != nullptr && (src_cst->IsIntConstant() || src_cst->IsFloatConstant()))) {
+        // For stack slots and 32bit constants, a 64bit type is appropriate.
+        type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
       } else {
-        DCHECK(cst->IsFloatConstant() || cst->IsDoubleConstant());
-        temp = cst->IsFloatConstant() ? temps.AcquireS() : temps.AcquireD();
+        // If the source is a double stack slot or a 64bit constant, a 64bit
+        // type is appropriate. Else the source is a register, and since the
+        // type has not been specified, we chose a 64bit type to force a 64bit
+        // move.
+        type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble;
       }
-      MoveConstant(temp, cst);
+    }
+    DCHECK((destination.IsFpuRegister() && IsFPType(type)) ||
+           (destination.IsRegister() && !IsFPType(type)));
+    CPURegister dst = CPURegisterFrom(destination, type);
+    if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
+      DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
+      __ Ldr(dst, StackOperandFrom(source));
+    } else if (source.IsConstant()) {
+      DCHECK(CoherentConstantAndType(source, type));
+      MoveConstant(dst, source.GetConstant());
+    } else {
+      if (destination.IsRegister()) {
+        __ Mov(Register(dst), RegisterFrom(source, type));
+      } else {
+        __ Fmov(FPRegister(dst), FPRegisterFrom(source, type));
+      }
+    }
+
+  } else {  // The destination is not a register. It must be a stack slot.
+    DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
+    if (source.IsRegister() || source.IsFpuRegister()) {
+      if (unspecified_type) {
+        if (source.IsRegister()) {
+          type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong;
+        } else {
+          type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble;
+        }
+      }
+      DCHECK((destination.IsDoubleStackSlot() == Is64BitType(type)) &&
+             (source.IsFpuRegister() == IsFPType(type)));
+      __ Str(CPURegisterFrom(source, type), StackOperandFrom(destination));
+    } else if (source.IsConstant()) {
+      DCHECK(unspecified_type || CoherentConstantAndType(source, type));
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      HConstant* src_cst = source.GetConstant();
+      CPURegister temp;
+      if (src_cst->IsIntConstant()) {
+        temp = temps.AcquireW();
+      } else if (src_cst->IsLongConstant()) {
+        temp = temps.AcquireX();
+      } else if (src_cst->IsFloatConstant()) {
+        temp = temps.AcquireS();
+      } else {
+        DCHECK(src_cst->IsDoubleConstant());
+        temp = temps.AcquireD();
+      }
+      MoveConstant(temp, src_cst);
       __ Str(temp, StackOperandFrom(destination));
     } else {
       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
+      DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
       UseScratchRegisterScope temps(GetVIXLAssembler());
-      Register temp = destination.IsDoubleStackSlot() ? temps.AcquireX() : temps.AcquireW();
+      // There is generally less pressure on FP registers.
+      FPRegister temp = destination.IsDoubleStackSlot() ? temps.AcquireD() : temps.AcquireS();
       __ Ldr(temp, StackOperandFrom(source));
       __ Str(temp, StackOperandFrom(destination));
     }
   }
 }
 
+void CodeGeneratorARM64::SwapLocations(Location loc1, Location loc2) {
+  DCHECK(!loc1.IsConstant());
+  DCHECK(!loc2.IsConstant());
+
+  if (loc1.Equals(loc2)) {
+    return;
+  }
+
+  UseScratchRegisterScope temps(GetAssembler()->vixl_masm_);
+
+  bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot();
+  bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot();
+  bool is_fp_reg1 = loc1.IsFpuRegister();
+  bool is_fp_reg2 = loc2.IsFpuRegister();
+
+  if (loc2.IsRegister() && loc1.IsRegister()) {
+    Register r1 = XRegisterFrom(loc1);
+    Register r2 = XRegisterFrom(loc2);
+    Register tmp = temps.AcquireSameSizeAs(r1);
+    __ Mov(tmp, r2);
+    __ Mov(r2, r1);
+    __ Mov(r1, tmp);
+  } else if (is_fp_reg2 && is_fp_reg1) {
+    FPRegister r1 = DRegisterFrom(loc1);
+    FPRegister r2 = DRegisterFrom(loc2);
+    FPRegister tmp = temps.AcquireSameSizeAs(r1);
+    __ Fmov(tmp, r2);
+    __ Fmov(r2, r1);
+    __ Fmov(r1, tmp);
+  } else if (is_slot1 != is_slot2) {
+    MemOperand mem = StackOperandFrom(is_slot1 ? loc1 : loc2);
+    Location reg_loc = is_slot1 ? loc2 : loc1;
+    CPURegister reg, tmp;
+    if (reg_loc.IsFpuRegister()) {
+      reg = DRegisterFrom(reg_loc);
+      tmp = temps.AcquireD();
+    } else {
+      reg = XRegisterFrom(reg_loc);
+      tmp = temps.AcquireX();
+    }
+    __ Ldr(tmp, mem);
+    __ Str(reg, mem);
+    if (reg_loc.IsFpuRegister()) {
+      __ Fmov(FPRegister(reg), FPRegister(tmp));
+    } else {
+      __ Mov(Register(reg), Register(tmp));
+    }
+  } else if (is_slot1 && is_slot2) {
+    MemOperand mem1 = StackOperandFrom(loc1);
+    MemOperand mem2 = StackOperandFrom(loc2);
+    Register tmp1 = loc1.IsStackSlot() ? temps.AcquireW() : temps.AcquireX();
+    Register tmp2 = temps.AcquireSameSizeAs(tmp1);
+    __ Ldr(tmp1, mem1);
+    __ Ldr(tmp2, mem2);
+    __ Str(tmp1, mem2);
+    __ Str(tmp2, mem1);
+  } else {
+    LOG(FATAL) << "Unimplemented";
+  }
+}
+
 void CodeGeneratorARM64::Load(Primitive::Type type,
                               vixl::CPURegister dst,
                               const vixl::MemOperand& src) {
@@ -850,7 +1060,7 @@
         codegen_(codegen) {}
 
 #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)              \
-  M(ParallelMove)                                          \
+  /* No unimplemented IR. */
 
 #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
 
@@ -1113,7 +1323,9 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
-  BoundsCheckSlowPathARM64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64();
+  LocationSummary* locations = instruction->GetLocations();
+  BoundsCheckSlowPathARM64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(
+      instruction, locations->InAt(0), locations->InAt(1));
   codegen_->AddSlowPath(slow_path);
 
   __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
@@ -1125,22 +1337,24 @@
       instruction, LocationSummary::kCallOnSlowPath);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
-  UseScratchRegisterScope temps(GetVIXLAssembler());
+  LocationSummary* locations = instruction->GetLocations();
   Register obj = InputRegisterAt(instruction, 0);;
   Register cls = InputRegisterAt(instruction, 1);;
-  Register temp = temps.AcquireW();
+  Register obj_cls = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
 
-  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64();
+  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
+      instruction, locations->InAt(1), LocationFrom(obj_cls), instruction->GetDexPc());
   codegen_->AddSlowPath(slow_path);
 
   // TODO: avoid this check if we know obj is not null.
   __ Cbz(obj, slow_path->GetExitLabel());
   // Compare the class of `obj` with `cls`.
-  __ Ldr(temp, HeapOperand(obj, mirror::Object::ClassOffset()));
-  __ Cmp(temp, cls);
+  __ Ldr(obj_cls, HeapOperand(obj, mirror::Object::ClassOffset()));
+  __ Cmp(obj_cls, cls);
   __ B(ne, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1316,12 +1530,20 @@
   codegen_->AddSlowPath(slow_path);
   Location value = instruction->GetLocations()->InAt(0);
 
+  Primitive::Type type = instruction->GetType();
+
+  if ((type != Primitive::kPrimInt) && (type != Primitive::kPrimLong)) {
+      LOG(FATAL) << "Unexpected type " << type << "for DivZeroCheck.";
+    return;
+  }
+
   if (value.IsConstant()) {
     int64_t divisor = Int64ConstantFrom(value);
     if (divisor == 0) {
       __ B(slow_path->GetEntryLabel());
     } else {
-      LOG(FATAL) << "Divisions by non-null constants should have been optimized away.";
+      // A division by a non-null constant is valid. We don't need to perform
+      // any check, so simply fall through.
     }
   } else {
     __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
@@ -1496,7 +1718,8 @@
     // If the classes are not equal, we go into a slow path.
     DCHECK(locations->OnlyCallsOnSlowPath());
     SlowPathCodeARM64* slow_path =
-        new (GetGraph()->GetArena()) TypeCheckSlowPathARM64();
+        new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
+        instruction, locations->InAt(1), locations->Out(), instruction->GetDexPc());
     codegen_->AddSlowPath(slow_path);
     __ B(ne, slow_path->GetEntryLabel());
     __ Mov(out, 1);
@@ -1914,6 +2137,14 @@
   HandleBinaryOp(instruction);
 }
 
+void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
+  codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
@@ -1989,7 +2220,7 @@
 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction) {
   UNUSED(instruction);
   codegen_->GenerateFrameExit();
-  __ Br(lr);
+  __ Ret();
 }
 
 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
@@ -1999,7 +2230,7 @@
 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction) {
   UNUSED(instruction);
   codegen_->GenerateFrameExit();
-  __ Br(lr);
+  __ Ret();
 }
 
 void LocationsBuilderARM64::VisitShl(HShl* shl) {
@@ -2157,17 +2388,18 @@
   if (IsIntegralType(result_type) && IsIntegralType(input_type)) {
     int result_size = Primitive::ComponentSize(result_type);
     int input_size = Primitive::ComponentSize(input_type);
-    int min_size = kBitsPerByte * std::min(result_size, input_size);
+    int min_size = std::min(result_size, input_size);
     Register output = OutputRegister(conversion);
     Register source = InputRegisterAt(conversion, 0);
-    if ((result_type == Primitive::kPrimChar) ||
-        ((input_type == Primitive::kPrimChar) && (result_size > input_size))) {
-      __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size);
+    if ((result_type == Primitive::kPrimChar) && (input_size < result_size)) {
+      __ Ubfx(output, source, 0, result_size * kBitsPerByte);
+    } else if ((result_type == Primitive::kPrimChar) ||
+               ((input_type == Primitive::kPrimChar) && (result_size > input_size))) {
+      __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
     } else {
-      __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size);
+      __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
     }
   } else if (IsFPType(result_type) && IsIntegralType(input_type)) {
-    CHECK(input_type == Primitive::kPrimInt || input_type == Primitive::kPrimLong);
     __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
   } else if (IsIntegralType(result_type) && IsFPType(input_type)) {
     CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);

diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 0e3d25f..1d5bfb7 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h

@@ -139,6 +139,27 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
 };
 
+class ParallelMoveResolverARM64 : public ParallelMoveResolver {
+ public:
+  ParallelMoveResolverARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen)
+      : ParallelMoveResolver(allocator), codegen_(codegen) {}
+
+  void EmitMove(size_t index) OVERRIDE;
+  void EmitSwap(size_t index) OVERRIDE;
+  void RestoreScratch(int reg) OVERRIDE;
+  void SpillScratch(int reg) OVERRIDE;
+
+ private:
+  Arm64Assembler* GetAssembler() const;
+  vixl::MacroAssembler* GetVIXLAssembler() const {
+    return GetAssembler()->vixl_masm_;
+  }
+
+  CodeGeneratorARM64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64);
+};
+
 class CodeGeneratorARM64 : public CodeGenerator {
  public:
   explicit CodeGeneratorARM64(HGraph* graph);
@@ -193,19 +214,10 @@
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
-  size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
-    UNUSED(stack_index);
-    UNUSED(reg_id);
-    LOG(INFO) << "CodeGeneratorARM64::SaveCoreRegister()";
-    return kArm64WordSize;
-  }
-
-  size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
-    UNUSED(stack_index);
-    UNUSED(reg_id);
-    LOG(INFO) << "CodeGeneratorARM64::RestoreCoreRegister()";
-    return kArm64WordSize;
-  }
+  size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id);
+  size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id);
+  size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id);
+  size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id);
 
   // The number of registers that can be allocated. The register allocator may
   // decide to reserve and not use a few of them.
@@ -237,7 +249,11 @@
 
   // Code generation helpers.
   void MoveConstant(vixl::CPURegister destination, HConstant* constant);
-  void MoveHelper(Location destination, Location source, Primitive::Type type);
+  // The type is optional. When specified it must be coherent with the
+  // locations, and is used for optimisation and debugging.
+  void MoveLocation(Location destination, Location source,
+                    Primitive::Type type = Primitive::kPrimVoid);
+  void SwapLocations(Location loc_1, Location loc_2);
   void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
   void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
   void LoadCurrentMethod(vixl::Register current_method);
@@ -245,10 +261,7 @@
   // Generate code to invoke a runtime entry point.
   void InvokeRuntime(int32_t offset, HInstruction* instruction, uint32_t dex_pc);
 
-  ParallelMoveResolver* GetMoveResolver() OVERRIDE {
-    UNIMPLEMENTED(INFO) << "TODO: MoveResolver";
-    return nullptr;
-  }
+  ParallelMoveResolverARM64* GetMoveResolver() { return &move_resolver_; }
 
  private:
   // Labels for each block that will be compiled.
@@ -256,11 +269,16 @@
 
   LocationsBuilderARM64 location_builder_;
   InstructionCodeGeneratorARM64 instruction_visitor_;
+  ParallelMoveResolverARM64 move_resolver_;
   Arm64Assembler assembler_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
 };
 
+inline Arm64Assembler* ParallelMoveResolverARM64::GetAssembler() const {
+  return codegen_->GetAssembler();
+}
+
 }  // namespace arm64
 }  // namespace art
 

diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index fee3ea6..8b75cc7 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc

@@ -129,12 +129,15 @@
                              std::function<void(HGraph*)> hook_before_codegen,
                              bool has_result,
                              Expected expected) {
-  if (kRuntimeISA == kX86) {
-    x86::CodeGeneratorX86 codegenX86(graph);
-    RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
-  } else if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
+  if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
     arm::CodeGeneratorARM codegenARM(graph);
     RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
+  } else if (kRuntimeISA == kArm64) {
+    arm64::CodeGeneratorARM64 codegenARM64(graph);
+    RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected);
+  } else if (kRuntimeISA == kX86) {
+    x86::CodeGeneratorX86 codegenX86(graph);
+    RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected);
   } else if (kRuntimeISA == kX86_64) {
     x86_64::CodeGeneratorX86_64 codegenX86_64(graph);
     RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected);
@@ -362,11 +365,7 @@
 
 #undef NOT_LONG_TEST
 
-#if defined(__aarch64__)
-TEST(CodegenTest, DISABLED_IntToLongOfLongToInt) {
-#else
 TEST(CodegenTest, IntToLongOfLongToInt) {
-#endif
   const int64_t input = INT64_C(4294967296);             // 2^32
   const uint16_t word0 = Low16Bits(Low32Bits(input));    // LSW.
   const uint16_t word1 = High16Bits(Low32Bits(input));
@@ -493,10 +492,8 @@
     TestCode(data, true, 12);                         \
   }
 
-#if !defined(__aarch64__)
 MUL_TEST(INT, MulInt);
 MUL_TEST(LONG, MulLong);
-#endif
 
 TEST(CodegenTest, ReturnMulIntLit8) {
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
@@ -633,11 +630,7 @@
   }
 }
 
-#if defined(__aarch64__)
-TEST(CodegenTest, DISABLED_ReturnDivIntLit8) {
-#else
 TEST(CodegenTest, ReturnDivIntLit8) {
-#endif
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::DIV_INT_LIT8, 3 << 8 | 0,
@@ -646,11 +639,7 @@
   TestCode(data, true, 1);
 }
 
-#if defined(__aarch64__)
-TEST(CodegenTest, DISABLED_ReturnDivInt2Addr) {
-#else
 TEST(CodegenTest, ReturnDivInt2Addr) {
-#endif
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0,
     Instruction::CONST_4 | 2 << 12 | 1 << 8,

diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index a6c0635..c1c805d 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc

@@ -64,15 +64,17 @@
   if (!Supports(instruction_set)) {
     return false;
   }
+  if (instruction_set == kArm64 || instruction_set == kX86_64) {
+    return true;
+  }
   for (size_t i = 0, e = graph.GetBlocks().Size(); i < e; ++i) {
     for (HInstructionIterator it(graph.GetBlocks().Get(i)->GetInstructions());
          !it.Done();
          it.Advance()) {
       HInstruction* current = it.Current();
-      if (current->GetType() == Primitive::kPrimLong && instruction_set != kX86_64) return false;
-      if ((current->GetType() == Primitive::kPrimFloat
-           || current->GetType() == Primitive::kPrimDouble)
-          && instruction_set != kX86_64) {
+      if (current->GetType() == Primitive::kPrimLong ||
+          current->GetType() == Primitive::kPrimFloat ||
+          current->GetType() == Primitive::kPrimDouble) {
         return false;
       }
     }

diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index 976ee39..cbe741c 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h

@@ -67,10 +67,11 @@
 
   static bool CanAllocateRegistersFor(const HGraph& graph, InstructionSet instruction_set);
   static bool Supports(InstructionSet instruction_set) {
-    return instruction_set == kX86
-        || instruction_set == kArm
-        || instruction_set == kX86_64
-        || instruction_set == kThumb2;
+    return instruction_set == kArm
+        || instruction_set == kArm64
+        || instruction_set == kThumb2
+        || instruction_set == kX86
+        || instruction_set == kX86_64;
   }
 
   size_t GetNumberOfSpillSlots() const {
commit	3e69f16ae3fddfd24f4f0e29deb106d564ab296c	[log] [tgz]
author	Alexandre Rames <alexandre.rames@arm.com>	Wed Dec 10 10:36:50 2014 +0000
committer	Alexandre Rames <alexandre.rames@arm.com>	Wed Dec 10 14:36:14 2014 +0000
tree	c796b1a2f71d4410af9fbdcb970b548a681f3955
parent	776b880f66edb21cb3b4225877e494ec7a9ec1a2 [diff]