Merge "Re-factor Quick ABI support"

author: Vladimir Marko <vmarko@google.com> 2014-12-08 18:38:42 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> 2014-12-08 18:38:43 +0000
commit: 6c964c98400b8c0949d5e369968da2d4809b772f (patch)
tree: 82c1893c0dbbd5a9b849b9c236fc775b4d20f3cc
parent: c4925d4c02dc8f8d51cb2653b5e7a99f6c9fd7d7 (diff)
parent: 717a3e447c6f7a922cf9c3efe522747a187a045d (diff)
18 files changed, 679 insertions, 1777 deletions
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 023abca64e..6b4d737316 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -1590,6 +1590,12 @@ const char* MIRGraph::GetShortyFromTargetIdx(int target_idx) {
   return cu_->dex_file->GetShorty(method_id.proto_idx_);
 }
 
+const char* MIRGraph::GetShortyFromMethodReference(const MethodReference& target_method) {
+  const DexFile::MethodId& method_id =
+      target_method.dex_file->GetMethodId(target_method.dex_method_index);
+  return target_method.dex_file->GetShorty(method_id.proto_idx_);
+}
+
 /* Debug Utility - dump a compilation unit */
 void MIRGraph::DumpMIRGraph() {
   const char* block_type_names[] = {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 1a1884131a..da0dd88e84 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -1113,6 +1113,7 @@ class MIRGraph {
   std::string GetSSANameWithConst(int ssa_reg, bool singles_only);
   void GetBlockName(BasicBlock* bb, char* name);
   const char* GetShortyFromTargetIdx(int);
+  const char* GetShortyFromMethodReference(const MethodReference& target_method);
   void DumpMIRGraph();
   CallInfo* NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, bool is_range);
   BasicBlock* NewMemBB(BBType block_type, int block_id);
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 0bc4c3b7bf..0ae7ee3560 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -26,16 +26,6 @@ namespace art {
 
 class ArmMir2Lir FINAL : public Mir2Lir {
  protected:
-  // TODO: Consolidate hard float target support.
-  // InToRegStorageMapper and InToRegStorageMapping can be shared with all backends.
-  // Base class used to get RegStorage for next argument.
-  class InToRegStorageMapper {
-   public:
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
-    virtual ~InToRegStorageMapper() {
-    }
-  };
-
   // Inherited class for ARM backend.
   class InToRegStorageArmMapper FINAL : public InToRegStorageMapper {
    public:
@@ -43,46 +33,26 @@ class ArmMir2Lir FINAL : public Mir2Lir {
         : cur_core_reg_(0), cur_fp_reg_(0), cur_fp_double_reg_(0) {
     }
 
-    virtual ~InToRegStorageArmMapper() {
-    }
-
-    RegStorage GetNextReg(bool is_double_or_float, bool is_wide) OVERRIDE;
+    RegStorage GetNextReg(ShortyArg arg) OVERRIDE;
 
-   private:
-    uint32_t cur_core_reg_;
-    uint32_t cur_fp_reg_;
-    uint32_t cur_fp_double_reg_;
-  };
-
-  // Class to map argument to RegStorage. The mapping object is initialized by a mapper.
-  class InToRegStorageMapping FINAL {
-   public:
-    InToRegStorageMapping()
-        : max_mapped_in_(0), is_there_stack_mapped_(false), initialized_(false) {
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+      cur_fp_reg_ = 0;
+      cur_fp_double_reg_ = 0;
     }
 
-    int GetMaxMappedIn() const {
-      return max_mapped_in_;
-    }
-
-    bool IsThereStackMapped() const {
-      return is_there_stack_mapped_;
-    }
-
-    bool IsInitialized() const {
-      return initialized_;
-    }
-
-    void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
-    RegStorage Get(int in_position) const;
-
    private:
-    std::map<int, RegStorage> mapping_;
-    int max_mapped_in_;
-    bool is_there_stack_mapped_;
-    bool initialized_;
+    size_t cur_core_reg_;
+    size_t cur_fp_reg_;
+    size_t cur_fp_double_reg_;
   };
 
+  InToRegStorageArmMapper in_to_reg_storage_arm_mapper_;
+  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
+    in_to_reg_storage_arm_mapper_.Reset();
+    return &in_to_reg_storage_arm_mapper_;
+  }
+
   public:
     ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 
@@ -127,7 +97,6 @@ class ArmMir2Lir FINAL : public Mir2Lir {
       }
     }
 
-    RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE;
     RegLocation GetReturnAlt() OVERRIDE;
     RegLocation GetReturnWideAlt() OVERRIDE;
     RegLocation LocCReturn() OVERRIDE;
@@ -290,19 +259,6 @@ class ArmMir2Lir FINAL : public Mir2Lir {
     LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
     size_t GetInstructionOffset(LIR* lir);
 
-    int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                             NextCallInsn next_call_insn,
-                             const MethodReference& target_method,
-                             uint32_t vtable_idx,
-                             uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                             bool skip_this) OVERRIDE;
-    int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                           NextCallInsn next_call_insn,
-                           const MethodReference& target_method,
-                           uint32_t vtable_idx,
-                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                           bool skip_this) OVERRIDE;
-
   private:
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
     void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
@@ -361,7 +317,7 @@ class ArmMir2Lir FINAL : public Mir2Lir {
                                      RegStorage::FloatSolo32(reg_num * 2 + 1));
     }
 
-    InToRegStorageMapping in_to_reg_storage_mapping_;
+    int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 0e8f64556d..7190a49c26 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -896,7 +896,7 @@ void ArmMir2Lir::InstallLiteralPools() {
   Mir2Lir::InstallLiteralPools();
 }
 
-RegStorage ArmMir2Lir::InToRegStorageArmMapper::GetNextReg(bool is_double_or_float, bool is_wide) {
+RegStorage ArmMir2Lir::InToRegStorageArmMapper::GetNextReg(ShortyArg arg) {
   const RegStorage coreArgMappingToPhysicalReg[] =
       {rs_r1, rs_r2, rs_r3};
   const int coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
@@ -906,28 +906,18 @@ RegStorage ArmMir2Lir::InToRegStorageArmMapper::GetNextReg(bool is_double_or_flo
   constexpr uint32_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
   static_assert(fpArgMappingToPhysicalRegSize % 2 == 0, "Number of FP Arg regs is not even");
 
-  if (kArm32QuickCodeUseSoftFloat) {
-    is_double_or_float = false;  // Regard double as long, float as int.
-    is_wide = false;  // Map long separately.
-  }
-
   RegStorage result = RegStorage::InvalidReg();
-  if (is_double_or_float) {
-    // TODO: Remove "cur_fp_double_reg_ % 2 != 0" when we return double as double.
-    if (is_wide || cur_fp_double_reg_ % 2 != 0) {
+  // Regard double as long, float as int for kArm32QuickCodeUseSoftFloat.
+  if (arg.IsFP() && !kArm32QuickCodeUseSoftFloat) {
+    if (arg.IsWide()) {
       cur_fp_double_reg_ = std::max(cur_fp_double_reg_, RoundUp(cur_fp_reg_, 2));
       if (cur_fp_double_reg_ < fpArgMappingToPhysicalRegSize) {
-        // TODO: Replace by following code in the branch when FlushIns() support 64-bit registers.
-        // result = RegStorage::MakeRegPair(fpArgMappingToPhysicalReg[cur_fp_double_reg_],
-        //                                  fpArgMappingToPhysicalReg[cur_fp_double_reg_ + 1]);
-        // result = As64BitFloatReg(result);
-        // cur_fp_double_reg_ += 2;
-        result = fpArgMappingToPhysicalReg[cur_fp_double_reg_];
-        cur_fp_double_reg_++;
+        result = RegStorage::MakeRegPair(fpArgMappingToPhysicalReg[cur_fp_double_reg_],
+                                         fpArgMappingToPhysicalReg[cur_fp_double_reg_ + 1]);
+        result = As64BitFloatReg(result);
+        cur_fp_double_reg_ += 2;
       }
     } else {
-      // TODO: Remove the check when we return double as double.
-      DCHECK_EQ(cur_fp_double_reg_ % 2, 0U);
       if (cur_fp_reg_ % 2 == 0) {
         cur_fp_reg_ = std::max(cur_fp_double_reg_, cur_fp_reg_);
       }
@@ -939,270 +929,23 @@ RegStorage ArmMir2Lir::InToRegStorageArmMapper::GetNextReg(bool is_double_or_flo
   } else {
     if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
       result = coreArgMappingToPhysicalReg[cur_core_reg_++];
-      // TODO: Enable following code when FlushIns() support 64-bit registers.
-      // if (is_wide && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-      //   result = RegStorage::MakeRegPair(result, coreArgMappingToPhysicalReg[cur_core_reg_++]);
-      // }
+      if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+        result = RegStorage::MakeRegPair(result, coreArgMappingToPhysicalReg[cur_core_reg_++]);
+      }
     }
   }
   return result;
 }
 
-RegStorage ArmMir2Lir::InToRegStorageMapping::Get(int in_position) const {
-  DCHECK(IsInitialized());
-  auto res = mapping_.find(in_position);
-  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
-}
-
-void ArmMir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count,
-                                                   InToRegStorageMapper* mapper) {
-  DCHECK(mapper != nullptr);
-  max_mapped_in_ = -1;
-  is_there_stack_mapped_ = false;
-  for (int in_position = 0; in_position < count; in_position++) {
-     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp,
-                                         arg_locs[in_position].wide);
-     if (reg.Valid()) {
-       mapping_[in_position] = reg;
-       // TODO: Enable the following code when FlushIns() support 64-bit argument registers.
-       // if (arg_locs[in_position].wide) {
-       //  if (reg.Is32Bit()) {
-       //    // As it is a split long, the hi-part is on stack.
-       //    is_there_stack_mapped_ = true;
-       //  }
-       //  // We covered 2 v-registers, so skip the next one
-       //  in_position++;
-       // }
-       max_mapped_in_ = std::max(max_mapped_in_, in_position);
-     } else {
-       is_there_stack_mapped_ = true;
-     }
-  }
-  initialized_ = true;
-}
-
-// TODO: Should be able to return long, double registers.
-// Need check some common code as it will break some assumption.
-RegStorage ArmMir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  if (!in_to_reg_storage_mapping_.IsInitialized()) {
-    int start_vreg = mir_graph_->GetFirstInVR();
-    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
-
-    InToRegStorageArmMapper mapper;
-    in_to_reg_storage_mapping_.Initialize(arg_locs, mir_graph_->GetNumOfInVRs(), &mapper);
-  }
-  return in_to_reg_storage_mapping_.Get(arg_num);
-}
-
-int ArmMir2Lir::GenDalvikArgsNoRange(CallInfo* info,
-                                     int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
-                                     const MethodReference& target_method,
-                                     uint32_t vtable_idx, uintptr_t direct_code,
-                                     uintptr_t direct_method, InvokeType type, bool skip_this) {
+int ArmMir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) {
   if (kArm32QuickCodeUseSoftFloat) {
-    return Mir2Lir::GenDalvikArgsNoRange(info, call_state, pcrLabel, next_call_insn, target_method,
-                                         vtable_idx, direct_code, direct_method, type, skip_this);
-  } else {
-    return GenDalvikArgsRange(info, call_state, pcrLabel, next_call_insn, target_method, vtable_idx,
-                              direct_code, direct_method, type, skip_this);
-  }
-}
-
-int ArmMir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
-                                   LIR** pcrLabel, NextCallInsn next_call_insn,
-                                   const MethodReference& target_method,
-                                   uint32_t vtable_idx, uintptr_t direct_code,
-                                   uintptr_t direct_method, InvokeType type, bool skip_this) {
-  if (kArm32QuickCodeUseSoftFloat) {
-    return Mir2Lir::GenDalvikArgsRange(info, call_state, pcrLabel, next_call_insn, target_method,
-                                       vtable_idx, direct_code, direct_method, type, skip_this);
-  }
-
-  // TODO: Rework the implementation when argument register can be long or double.
-
-  /* If no arguments, just return */
-  if (info->num_arg_words == 0) {
-    return call_state;
-  }
-
-  const int start_index = skip_this ? 1 : 0;
-
-  InToRegStorageArmMapper mapper;
-  InToRegStorageMapping in_to_reg_storage_mapping;
-  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
-  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
-  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + 1);
-
-  // First of all, check whether it makes sense to use bulk copying.
-  // Bulk copying is done only for the range case.
-  // TODO: make a constant instead of 2
-  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
-    // Scan the rest of the args - if in phys_reg flush to memory
-    for (int next_arg = last_mapped_in + 1; next_arg < info->num_arg_words;) {
-      RegLocation loc = info->args[next_arg];
-      if (loc.wide) {
-        // TODO: Only flush hi-part.
-        if (loc.high_word) {
-          loc = info->args[--next_arg];
-        }
-        loc = UpdateLocWide(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
-        }
-        next_arg += 2;
-      } else {
-        loc = UpdateLoc(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (loc.ref) {
-            StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
-          } else {
-            StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32,
-                          kNotVolatile);
-          }
-        }
-        next_arg++;
-      }
-    }
-
-    // The rest can be copied together
-    int start_offset = SRegOffset(info->args[last_mapped_in + 1].s_reg_low);
-    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + 1,
-                                                   cu_->instruction_set);
-
-    int current_src_offset = start_offset;
-    int current_dest_offset = outs_offset;
-
-    // Only davik regs are accessed in this loop; no next_call_insn() calls.
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    while (regs_left_to_pass_via_stack > 0) {
-      /*
-       * TODO: Improve by adding block copy for large number of arguments.  This
-       * should be done, if possible, as a target-depending helper.  For now, just
-       * copy a Dalvik vreg at a time.
-       */
-      // Moving 32-bits via general purpose register.
-      size_t bytes_to_move = sizeof(uint32_t);
-
-      // Instead of allocating a new temp, simply reuse one of the registers being used
-      // for argument passing.
-      RegStorage temp = TargetReg(kArg3, kNotWide);
-
-      // Now load the argument VR and store to the outs.
-      Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
-      Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
-
-      current_src_offset += bytes_to_move;
-      current_dest_offset += bytes_to_move;
-      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
-    }
-    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
-  }
-
-  // Now handle rest not registers if they are
-  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
-    RegStorage regWide = TargetReg(kArg2, kWide);
-    for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) {
-      RegLocation rl_arg = info->args[i];
-      rl_arg = UpdateRawLoc(rl_arg);
-      RegStorage reg = in_to_reg_storage_mapping.Get(i);
-      // TODO: Only pass split wide hi-part via stack.
-      if (!reg.Valid() || rl_arg.wide) {
-        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
-
-        {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (rl_arg.wide) {
-            if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
-            } else {
-              LoadValueDirectWideFixed(rl_arg, regWide);
-              StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile);
-            }
-          } else {
-            if (rl_arg.location == kLocPhysReg) {
-              if (rl_arg.ref) {
-                StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile);
-              } else {
-                StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile);
-              }
-            } else {
-              if (rl_arg.ref) {
-                RegStorage regSingle = TargetReg(kArg2, kRef);
-                LoadValueDirectFixed(rl_arg, regSingle);
-                StoreRefDisp(TargetPtrReg(kSp), out_offset, regSingle, kNotVolatile);
-              } else {
-                RegStorage regSingle = TargetReg(kArg2, kNotWide);
-                LoadValueDirectFixed(rl_arg, regSingle);
-                StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile);
-              }
-            }
-          }
-        }
-
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      if (rl_arg.wide) {
-        i++;
-      }
-    }
-  }
-
-  // Finish with mapped registers
-  for (int i = start_index; i <= last_mapped_in; i++) {
-    RegLocation rl_arg = info->args[i];
-    rl_arg = UpdateRawLoc(rl_arg);
-    RegStorage reg = in_to_reg_storage_mapping.Get(i);
-    if (reg.Valid()) {
-      if (reg.Is64Bit()) {
-        LoadValueDirectWideFixed(rl_arg, reg);
-      } else {
-        // TODO: Only split long should be the case we need to care about.
-        if (rl_arg.wide) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          int high_word = rl_arg.high_word ? 1 : 0;
-          rl_arg = high_word ? info->args[i - 1] : rl_arg;
-          if (rl_arg.location == kLocPhysReg) {
-            RegStorage rs_arg = rl_arg.reg;
-            if (rs_arg.IsDouble() && rs_arg.Is64BitSolo()) {
-              rs_arg = As64BitFloatRegPair(rs_arg);
-            }
-            RegStorage rs_arg_low = rs_arg.GetLow();
-            RegStorage rs_arg_high = rs_arg.GetHigh();
-            OpRegCopy(reg, high_word ? rs_arg_high : rs_arg_low);
-          } else {
-            Load32Disp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low + high_word), reg);
-          }
-        } else {
-          LoadValueDirectFixed(rl_arg, reg);
-        }
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                                  direct_code, direct_method, type);
-    }
-    if (reg.Is64Bit()) {
-      i++;
-    }
-  }
-
-  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                           direct_code, direct_method, type);
-  if (pcrLabel) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    } else {
-      *pcrLabel = nullptr;
-      // In lieu of generating a check for kArg1 being null, we need to
-      // perform a load when doing implicit checks.
-      RegStorage tmp = AllocTemp();
-      Load32Disp(TargetReg(kArg1, kRef), 0, tmp);
-      MarkPossibleNullPointerException(info->opt_flags);
-      FreeTemp(tmp);
-    }
+    return Mir2Lir::GenDalvikArgsBulkCopy(info, first, count);
   }
-  return call_state;
+  /*
+   * TODO: Improve by adding block copy for large number of arguments.  For now, just
+   * copy a Dalvik vreg at a time.
+   */
+  return count;
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 5e10f80fa5..766ac23ef9 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -27,38 +27,25 @@ namespace art {
 
 class Arm64Mir2Lir FINAL : public Mir2Lir {
  protected:
-  // TODO: consolidate 64-bit target support.
-  class InToRegStorageMapper {
-   public:
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref) = 0;
-    virtual ~InToRegStorageMapper() {}
-  };
-
   class InToRegStorageArm64Mapper : public InToRegStorageMapper {
    public:
     InToRegStorageArm64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
     virtual ~InToRegStorageArm64Mapper() {}
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref);
+    virtual RegStorage GetNextReg(ShortyArg arg);
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+      cur_fp_reg_ = 0;
+    }
    private:
-    int cur_core_reg_;
-    int cur_fp_reg_;
+    size_t cur_core_reg_;
+    size_t cur_fp_reg_;
   };
 
-  class InToRegStorageMapping {
-   public:
-    InToRegStorageMapping() : max_mapped_in_(0), is_there_stack_mapped_(false),
-    initialized_(false) {}
-    void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
-    int GetMaxMappedIn() { return max_mapped_in_; }
-    bool IsThereStackMapped() { return is_there_stack_mapped_; }
-    RegStorage Get(int in_position);
-    bool IsInitialized() { return initialized_; }
-   private:
-    std::map<int, RegStorage> mapping_;
-    int max_mapped_in_;
-    bool is_there_stack_mapped_;
-    bool initialized_;
-  };
+  InToRegStorageArm64Mapper in_to_reg_storage_arm64_mapper_;
+  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
+    in_to_reg_storage_arm64_mapper_.Reset();
+    return &in_to_reg_storage_arm64_mapper_;
+  }
 
  public:
   Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
@@ -113,7 +100,6 @@ class Arm64Mir2Lir FINAL : public Mir2Lir {
   RegStorage TargetPtrReg(SpecialTargetRegister symbolic_reg) OVERRIDE {
     return As64BitReg(TargetReg(symbolic_reg));
   }
-  RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE;
   RegLocation GetReturnAlt() OVERRIDE;
   RegLocation GetReturnWideAlt() OVERRIDE;
   RegLocation LocCReturn() OVERRIDE;
@@ -240,22 +226,6 @@ class Arm64Mir2Lir FINAL : public Mir2Lir {
   bool InexpensiveConstantLong(int64_t value) OVERRIDE;
   bool InexpensiveConstantDouble(int64_t value) OVERRIDE;
 
-  void FlushIns(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
-
-  int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                           NextCallInsn next_call_insn,
-                           const MethodReference& target_method,
-                           uint32_t vtable_idx,
-                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                           bool skip_this) OVERRIDE;
-
-  int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                         NextCallInsn next_call_insn,
-                         const MethodReference& target_method,
-                         uint32_t vtable_idx,
-                         uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                         bool skip_this) OVERRIDE;
-
   bool WideGPRsAreAliases() const OVERRIDE {
     return true;  // 64b architecture.
   }
@@ -422,10 +392,11 @@ class Arm64Mir2Lir FINAL : public Mir2Lir {
   void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                      RegLocation rl_src2, bool is_div, int flags);
 
-  InToRegStorageMapping in_to_reg_storage_mapping_;
   static const A64EncodingMap EncodingMap[kA64Last];
 
   ArenaVector<LIR*> call_method_insns_;
+
+  int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 094ff51eee..e7fa8ed475 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -790,27 +790,23 @@ const char* Arm64Mir2Lir::GetTargetInstFmt(int opcode) {
   return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].fmt;
 }
 
-RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(bool is_double_or_float,
-                                                               bool is_wide,
-                                                               bool is_ref) {
+RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(ShortyArg arg) {
   const RegStorage coreArgMappingToPhysicalReg[] =
       {rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7};
-  const int coreArgMappingToPhysicalRegSize =
-      sizeof(coreArgMappingToPhysicalReg) / sizeof(RegStorage);
+  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
   const RegStorage fpArgMappingToPhysicalReg[] =
       {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7};
-  const int fpArgMappingToPhysicalRegSize =
-      sizeof(fpArgMappingToPhysicalReg) / sizeof(RegStorage);
+  const size_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
 
   RegStorage result = RegStorage::InvalidReg();
-  if (is_double_or_float) {
+  if (arg.IsFP()) {
     if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
-      DCHECK(!is_ref);
+      DCHECK(!arg.IsRef());
       result = fpArgMappingToPhysicalReg[cur_fp_reg_++];
       if (result.Valid()) {
         // TODO: switching between widths remains a bit ugly.  Better way?
         int res_reg = result.GetReg();
-        result = is_wide ? RegStorage::FloatSolo64(res_reg) : RegStorage::FloatSolo32(res_reg);
+        result = arg.IsWide() ? RegStorage::FloatSolo64(res_reg) : RegStorage::FloatSolo32(res_reg);
       }
     }
   } else {
@@ -819,388 +815,15 @@ RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(bool is_double_or
       if (result.Valid()) {
         // TODO: switching between widths remains a bit ugly.  Better way?
         int res_reg = result.GetReg();
-        DCHECK(!(is_wide && is_ref));
-        result = (is_wide || is_ref) ? RegStorage::Solo64(res_reg) : RegStorage::Solo32(res_reg);
+        DCHECK(!(arg.IsWide() && arg.IsRef()));
+        result = (arg.IsWide() || arg.IsRef()) ?
+                 RegStorage::Solo64(res_reg) : RegStorage::Solo32(res_reg);
       }
     }
   }
   return result;
 }
 
-RegStorage Arm64Mir2Lir::InToRegStorageMapping::Get(int in_position) {
-  DCHECK(IsInitialized());
-  auto res = mapping_.find(in_position);
-  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
-}
-
-void Arm64Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count,
-                                                     InToRegStorageMapper* mapper) {
-  DCHECK(mapper != nullptr);
-  max_mapped_in_ = -1;
-  is_there_stack_mapped_ = false;
-  for (int in_position = 0; in_position < count; in_position++) {
-     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp,
-                                         arg_locs[in_position].wide,
-                                         arg_locs[in_position].ref);
-     if (reg.Valid()) {
-       mapping_[in_position] = reg;
-       if (arg_locs[in_position].wide) {
-         // We covered 2 args, so skip the next one
-         in_position++;
-       }
-       max_mapped_in_ = std::max(max_mapped_in_, in_position);
-     } else {
-       is_there_stack_mapped_ = true;
-     }
-  }
-  initialized_ = true;
-}
-
-
-// Deprecate.  Use the new mechanism.
-// TODO(Arm64): reuse info in QuickArgumentVisitor?
-static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* num_fpr_used,
-                                    OpSize* op_size) {
-  if (loc->fp) {
-    int n = *num_fpr_used;
-    if (n < 8) {
-      *num_fpr_used = n + 1;
-      RegStorage::RegStorageKind reg_kind;
-      if (loc->wide) {
-        *op_size = kDouble;
-        reg_kind = RegStorage::k64BitSolo;
-      } else {
-        *op_size = kSingle;
-        reg_kind = RegStorage::k32BitSolo;
-      }
-      return RegStorage(RegStorage::kValid | reg_kind | RegStorage::kFloatingPoint | n);
-    }
-  } else {
-    int n = *num_gpr_used;
-    if (n < 8) {
-      *num_gpr_used = n + 1;
-      if (loc->wide || loc->ref) {
-        *op_size = k64;
-        return RegStorage::Solo64(n);
-      } else {
-        *op_size = k32;
-        return RegStorage::Solo32(n);
-      }
-    }
-  }
-  *op_size = kWord;
-  return RegStorage::InvalidReg();
-}
-
-RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  if (!in_to_reg_storage_mapping_.IsInitialized()) {
-    int start_vreg = mir_graph_->GetFirstInVR();
-    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
-
-    InToRegStorageArm64Mapper mapper;
-    in_to_reg_storage_mapping_.Initialize(arg_locs, mir_graph_->GetNumOfInVRs(), &mapper);
-  }
-  return in_to_reg_storage_mapping_.Get(arg_num);
-}
-
-
-/*
- * If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame.  Perform initial
- * assignment of promoted arguments.
- *
- * ArgLocs is an array of location records describing the incoming arguments
- * with one location record per word of argument.
- */
-void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
-  int num_gpr_used = 1;
-  int num_fpr_used = 0;
-
-  /*
-   * Dummy up a RegLocation for the incoming StackReference<mirror::ArtMethod>
-   * It will attempt to keep kArg0 live (or copy it to home location
-   * if promoted).
-   */
-  RegLocation rl_src = rl_method;
-  rl_src.location = kLocPhysReg;
-  rl_src.reg = TargetReg(kArg0, kRef);
-  rl_src.home = false;
-  MarkLive(rl_src);
-  StoreValue(rl_method, rl_src);
-  // If Method* has been promoted, explicitly flush
-  if (rl_method.location == kLocPhysReg) {
-    StoreRefDisp(TargetPtrReg(kSp), 0, rl_src.reg, kNotVolatile);
-  }
-
-  if (mir_graph_->GetNumOfInVRs() == 0) {
-    return;
-  }
-
-  // Handle dalvik registers.
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  int start_vreg = mir_graph_->GetFirstInVR();
-  for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i++) {
-    RegLocation* t_loc = &ArgLocs[i];
-    OpSize op_size;
-    RegStorage reg = GetArgPhysicalReg(t_loc, &num_gpr_used, &num_fpr_used, &op_size);
-
-    if (reg.Valid()) {
-      // If arriving in register.
-
-      // We have already updated the arg location with promoted info
-      // so we can be based on it.
-      if (t_loc->location == kLocPhysReg) {
-        // Just copy it.
-        OpRegCopy(t_loc->reg, reg);
-      } else {
-        // Needs flush.
-        if (t_loc->ref) {
-          StoreRefDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, kNotVolatile);
-        } else {
-          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32,
-              kNotVolatile);
-        }
-      }
-    } else {
-      // If arriving in frame & promoted.
-      if (t_loc->location == kLocPhysReg) {
-        if (t_loc->ref) {
-          LoadRefDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile);
-        } else {
-          LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), t_loc->reg,
-                       t_loc->wide ? k64 : k32, kNotVolatile);
-        }
-      }
-    }
-    if (t_loc->wide) {
-      // Increment i to skip the next one.
-      i++;
-    }
-    //      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
-    //        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
-    //      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
-    //        OpRegCopy(RegStorage::Solo32(v_map->fp_reg), reg);
-    //      } else {
-    //        StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size, kNotVolatile);
-    //        if (reg.Is64Bit()) {
-    //          if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) {
-    //            LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots";
-    //          }
-    //          i += 1;
-    //        }
-    //      }
-    //    } else {
-    //      // If arriving in frame & promoted
-    //      if (v_map->core_location == kLocPhysReg) {
-    //        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
-    //                     RegStorage::Solo32(v_map->core_reg));
-    //      }
-    //      if (v_map->fp_location == kLocPhysReg) {
-    //        LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->fp_reg));
-    //      }
-  }
-}
-
-/*
- * Load up to 5 arguments, the first three of which will be in
- * kArg1 .. kArg3.  On entry kArg0 contains the current method pointer,
- * and as part of the load sequence, it must be replaced with
- * the target method pointer.
- */
-int Arm64Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
-                                       int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
-                                       const MethodReference& target_method,
-                                       uint32_t vtable_idx, uintptr_t direct_code,
-                                       uintptr_t direct_method, InvokeType type, bool skip_this) {
-  return GenDalvikArgsRange(info,
-                       call_state, pcrLabel, next_call_insn,
-                       target_method,
-                       vtable_idx, direct_code,
-                       direct_method, type, skip_this);
-}
-
-/*
- * May have 0+ arguments (also used for jumbo).  Note that
- * source virtual registers may be in physical registers, so may
- * need to be flushed to home location before copying.  This
- * applies to arg3 and above (see below).
- *
- * FIXME: update comments.
- *
- * Two general strategies:
- *    If < 20 arguments
- *       Pass args 3-18 using vldm/vstm block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *    If 20+ arguments
- *       Pass args arg19+ using memcpy block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *
- */
-int Arm64Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
-                                     LIR** pcrLabel, NextCallInsn next_call_insn,
-                                     const MethodReference& target_method,
-                                     uint32_t vtable_idx, uintptr_t direct_code,
-                                     uintptr_t direct_method, InvokeType type, bool skip_this) {
-  /* If no arguments, just return */
-  if (info->num_arg_words == 0)
-    return call_state;
-
-  const int start_index = skip_this ? 1 : 0;
-
-  InToRegStorageArm64Mapper mapper;
-  InToRegStorageMapping in_to_reg_storage_mapping;
-  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
-  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
-  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + 1);
-
-  // First of all, check whether it makes sense to use bulk copying.
-  // Bulk copying is done only for the range case.
-  // TODO: make a constant instead of 2
-  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
-    // Scan the rest of the args - if in phys_reg flush to memory
-    for (int next_arg = last_mapped_in + 1; next_arg < info->num_arg_words;) {
-      RegLocation loc = info->args[next_arg];
-      if (loc.wide) {
-        loc = UpdateLocWide(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
-        }
-        next_arg += 2;
-      } else {
-        loc = UpdateLoc(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (loc.ref) {
-            StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
-          } else {
-            StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32,
-                          kNotVolatile);
-          }
-        }
-        next_arg++;
-      }
-    }
-
-    // The rest can be copied together
-    int start_offset = SRegOffset(info->args[last_mapped_in + 1].s_reg_low);
-    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + 1,
-                                                   cu_->instruction_set);
-
-    int current_src_offset = start_offset;
-    int current_dest_offset = outs_offset;
-
-    // Only davik regs are accessed in this loop; no next_call_insn() calls.
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    while (regs_left_to_pass_via_stack > 0) {
-      /*
-       * TODO: Improve by adding block copy for large number of arguments.  This
-       * should be done, if possible, as a target-depending helper.  For now, just
-       * copy a Dalvik vreg at a time.
-       */
-      // Moving 32-bits via general purpose register.
-      size_t bytes_to_move = sizeof(uint32_t);
-
-      // Instead of allocating a new temp, simply reuse one of the registers being used
-      // for argument passing.
-      RegStorage temp = TargetReg(kArg3, kNotWide);
-
-      // Now load the argument VR and store to the outs.
-      Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
-      Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
-
-      current_src_offset += bytes_to_move;
-      current_dest_offset += bytes_to_move;
-      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
-    }
-    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
-  }
-
-  // Now handle rest not registers if they are
-  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
-    RegStorage regWide = TargetReg(kArg3, kWide);
-    for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) {
-      RegLocation rl_arg = info->args[i];
-      rl_arg = UpdateRawLoc(rl_arg);
-      RegStorage reg = in_to_reg_storage_mapping.Get(i);
-      if (!reg.Valid()) {
-        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
-
-        {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (rl_arg.wide) {
-            if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
-            } else {
-              LoadValueDirectWideFixed(rl_arg, regWide);
-              StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile);
-            }
-          } else {
-            if (rl_arg.location == kLocPhysReg) {
-              if (rl_arg.ref) {
-                StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile);
-              } else {
-                StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile);
-              }
-            } else {
-              if (rl_arg.ref) {
-                RegStorage regSingle = TargetReg(kArg2, kRef);
-                LoadValueDirectFixed(rl_arg, regSingle);
-                StoreRefDisp(TargetPtrReg(kSp), out_offset, regSingle, kNotVolatile);
-              } else {
-                RegStorage regSingle = TargetReg(kArg2, kNotWide);
-                LoadValueDirectFixed(rl_arg, regSingle);
-                StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile);
-              }
-            }
-          }
-        }
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      if (rl_arg.wide) {
-        i++;
-      }
-    }
-  }
-
-  // Finish with mapped registers
-  for (int i = start_index; i <= last_mapped_in; i++) {
-    RegLocation rl_arg = info->args[i];
-    rl_arg = UpdateRawLoc(rl_arg);
-    RegStorage reg = in_to_reg_storage_mapping.Get(i);
-    if (reg.Valid()) {
-      if (rl_arg.wide) {
-        LoadValueDirectWideFixed(rl_arg, reg);
-      } else {
-        LoadValueDirectFixed(rl_arg, reg);
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                                  direct_code, direct_method, type);
-    }
-    if (rl_arg.wide) {
-      i++;
-    }
-  }
-
-  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                           direct_code, direct_method, type);
-  if (pcrLabel) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    } else {
-      *pcrLabel = nullptr;
-      // In lieu of generating a check for kArg1 being null, we need to
-      // perform a load when doing implicit checks.
-      RegStorage tmp = AllocTemp();
-      Load32Disp(TargetReg(kArg1, kRef), 0, tmp);
-      MarkPossibleNullPointerException(info->opt_flags);
-      FreeTemp(tmp);
-    }
-  }
-  return call_state;
-}
-
 void Arm64Mir2Lir::InstallLiteralPools() {
   // PC-relative calls to methods.
   patches_.reserve(call_method_insns_.size());
@@ -1218,4 +841,12 @@ void Arm64Mir2Lir::InstallLiteralPools() {
   Mir2Lir::InstallLiteralPools();
 }
 
+int Arm64Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* /*info*/, int /*first*/, int count) {
+  /*
+   * TODO: Improve by adding block copy for large number of arguments.  For now, just
+   * copy a Dalvik vreg at a time.
+   */
+  return count;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 066041c6ad..cc61e93d82 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -997,7 +997,8 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena
       last_lir_insn_(nullptr),
       slow_paths_(arena->Adapter(kArenaAllocSlowPaths)),
       mem_ref_type_(ResourceMask::kHeapRef),
-      mask_cache_(arena) {
+      mask_cache_(arena),
+      in_to_reg_storage_mapping_(arena) {
   switch_tables_.reserve(4);
   fill_array_data_.reserve(4);
   tempreg_info_.reserve(20);
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 31b81bfb92..9462d3d08f 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -401,59 +401,50 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
    * half to memory as well.
    */
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i++) {
-    PromotionMap* v_map = &promotion_map_[start_vreg + i];
+  RegLocation* t_loc = nullptr;
+  for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i += t_loc->wide ? 2 : 1) {
+    // get reg corresponding to input
     RegStorage reg = GetArgMappingToPhysicalReg(i);
+    t_loc = &ArgLocs[i];
+
+    // If the wide input appeared as single, flush it and go
+    // as it comes from memory.
+    if (t_loc->wide && reg.Valid() && !reg.Is64Bit()) {
+      StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, k32, kNotVolatile);
+      reg = RegStorage::InvalidReg();
+    }
 
     if (reg.Valid()) {
-      // If arriving in register
-      bool need_flush = true;
-      RegLocation* t_loc = &ArgLocs[i];
-      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
-        need_flush = false;
-      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
-        OpRegCopy(RegStorage::Solo32(v_map->fp_reg), reg);
-        need_flush = false;
+      // If arriving in register.
+
+      // We have already updated the arg location with promoted info
+      // so we can be based on it.
+      if (t_loc->location == kLocPhysReg) {
+        // Just copy it.
+        if (t_loc->wide) {
+          OpRegCopyWide(t_loc->reg, reg);
+        } else {
+          OpRegCopy(t_loc->reg, reg);
+        }
       } else {
-        need_flush = true;
-      }
-
-      // For wide args, force flush if not fully promoted
-      if (t_loc->wide) {
-        PromotionMap* p_map = v_map + (t_loc->high_word ? -1 : +1);
-        // Is only half promoted?
-        need_flush |= (p_map->core_location != v_map->core_location) ||
-            (p_map->fp_location != v_map->fp_location);
-        if ((cu_->instruction_set == kThumb2) && t_loc->fp && !need_flush) {
-          /*
-           * In Arm, a double is represented as a pair of consecutive single float
-           * registers starting at an even number.  It's possible that both Dalvik vRegs
-           * representing the incoming double were independently promoted as singles - but
-           * not in a form usable as a double.  If so, we need to flush - even though the
-           * incoming arg appears fully in register.  At this point in the code, both
-           * halves of the double are promoted.  Make sure they are in a usable form.
-           */
-          int lowreg_index = start_vreg + i + (t_loc->high_word ? -1 : 0);
-          int low_reg = promotion_map_[lowreg_index].fp_reg;
-          int high_reg = promotion_map_[lowreg_index + 1].fp_reg;
-          if (((low_reg & 0x1) != 0) || (high_reg != (low_reg + 1))) {
-            need_flush = true;
-          }
+        // Needs flush.
+        int offset = SRegOffset(start_vreg + i);
+        if (t_loc->ref) {
+          StoreRefDisp(TargetPtrReg(kSp), offset, reg, kNotVolatile);
+        } else {
+          StoreBaseDisp(TargetPtrReg(kSp), offset, reg, t_loc->wide ? k64 : k32, kNotVolatile);
         }
       }
-      if (need_flush) {
-        Store32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg);
-      }
     } else {
-      // If arriving in frame & promoted
-      if (v_map->core_location == kLocPhysReg) {
-        Load32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i),
-                   RegStorage::Solo32(v_map->core_reg));
-      }
-      if (v_map->fp_location == kLocPhysReg) {
-        Load32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i),
-                   RegStorage::Solo32(v_map->fp_reg));
+      // If arriving in frame & promoted.
+      if (t_loc->location == kLocPhysReg) {
+        int offset = SRegOffset(start_vreg + i);
+        if (t_loc->ref) {
+          LoadRefDisp(TargetPtrReg(kSp), offset, t_loc->reg, kNotVolatile);
+        } else {
+          LoadBaseDisp(TargetPtrReg(kSp), offset, t_loc->reg, t_loc->wide ? k64 : k32,
+                       kNotVolatile);
+        }
       }
     }
   }
@@ -568,7 +559,7 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
  * emit the next instruction in a virtual invoke sequence.
  * We can use kLr as a temp prior to target address loading
  * Note also that we'll load the first argument ("this") into
- * kArg1 here rather than the standard LoadArgRegs.
+ * kArg1 here rather than the standard GenDalvikArgs.
  */
 static int NextVCallInsn(CompilationUnit* cu, CallInfo* info,
                          int state, const MethodReference& target_method,
@@ -612,7 +603,7 @@ static int NextVCallInsn(CompilationUnit* cu, CallInfo* info,
  * Emit the next instruction in an invoke interface sequence. This will do a lookup in the
  * class's IMT, calling either the actual method or art_quick_imt_conflict_trampoline if
  * more than one interface method map to the same index. Note also that we'll load the first
- * argument ("this") into kArg1 here rather than the standard LoadArgRegs.
+ * argument ("this") into kArg1 here rather than the standard GenDalvikArgs.
  */
 static int NextInterfaceCallInsn(CompilationUnit* cu, CallInfo* info, int state,
                                  const MethodReference& target_method,
@@ -719,158 +710,6 @@ static int NextInterfaceCallInsnWithAccessCheck(CompilationUnit* cu,
                           target_method, 0);
 }
 
-int Mir2Lir::LoadArgRegs(CallInfo* info, int call_state,
-                         NextCallInsn next_call_insn,
-                         const MethodReference& target_method,
-                         uint32_t vtable_idx, uintptr_t direct_code,
-                         uintptr_t direct_method, InvokeType type, bool skip_this) {
-  int last_arg_reg = 3 - 1;
-  int arg_regs[3] = {TargetReg(kArg1, kNotWide).GetReg(), TargetReg(kArg2, kNotWide).GetReg(),
-                     TargetReg(kArg3, kNotWide).GetReg()};
-
-  int next_reg = 0;
-  int next_arg = 0;
-  if (skip_this) {
-    next_reg++;
-    next_arg++;
-  }
-  for (; (next_reg <= last_arg_reg) && (next_arg < info->num_arg_words); next_reg++) {
-    RegLocation rl_arg = info->args[next_arg++];
-    rl_arg = UpdateRawLoc(rl_arg);
-    if (rl_arg.wide && (next_reg <= last_arg_reg - 1)) {
-      RegStorage r_tmp(RegStorage::k64BitPair, arg_regs[next_reg], arg_regs[next_reg + 1]);
-      LoadValueDirectWideFixed(rl_arg, r_tmp);
-      next_reg++;
-      next_arg++;
-    } else {
-      if (rl_arg.wide) {
-        rl_arg = NarrowRegLoc(rl_arg);
-        rl_arg.is_const = false;
-      }
-      LoadValueDirectFixed(rl_arg, RegStorage::Solo32(arg_regs[next_reg]));
-    }
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                                direct_code, direct_method, type);
-  }
-  return call_state;
-}
-
-/*
- * Load up to 5 arguments, the first three of which will be in
- * kArg1 .. kArg3.  On entry kArg0 contains the current method pointer,
- * and as part of the load sequence, it must be replaced with
- * the target method pointer.  Note, this may also be called
- * for "range" variants if the number of arguments is 5 or fewer.
- */
-int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
-                                  int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
-                                  const MethodReference& target_method,
-                                  uint32_t vtable_idx, uintptr_t direct_code,
-                                  uintptr_t direct_method, InvokeType type, bool skip_this) {
-  RegLocation rl_arg;
-
-  /* If no arguments, just return */
-  if (info->num_arg_words == 0)
-    return call_state;
-
-  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                              direct_code, direct_method, type);
-
-  DCHECK_LE(info->num_arg_words, 5);
-  if (info->num_arg_words > 3) {
-    int32_t next_use = 3;
-    // Detect special case of wide arg spanning arg3/arg4
-    RegLocation rl_use0 = info->args[0];
-    RegLocation rl_use1 = info->args[1];
-    RegLocation rl_use2 = info->args[2];
-    if (((!rl_use0.wide && !rl_use1.wide) || rl_use0.wide) && rl_use2.wide) {
-      RegStorage reg;
-      // Wide spans, we need the 2nd half of uses[2].
-      rl_arg = UpdateLocWide(rl_use2);
-      if (rl_arg.location == kLocPhysReg) {
-        if (rl_arg.reg.IsPair()) {
-          reg = rl_arg.reg.GetHigh();
-        } else {
-          RegisterInfo* reg_info = GetRegInfo(rl_arg.reg);
-          reg_info = reg_info->FindMatchingView(RegisterInfo::kHighSingleStorageMask);
-          if (reg_info == nullptr) {
-            // NOTE: For hard float convention we won't split arguments across reg/mem.
-            UNIMPLEMENTED(FATAL) << "Needs hard float api.";
-          }
-          reg = reg_info->GetReg();
-        }
-      } else {
-        // kArg2 & rArg3 can safely be used here
-        reg = TargetReg(kArg3, kNotWide);
-        {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          Load32Disp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg);
-        }
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      {
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        Store32Disp(TargetPtrReg(kSp), (next_use + 1) * 4, reg);
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                                  direct_code, direct_method, type);
-      next_use++;
-    }
-    // Loop through the rest
-    while (next_use < info->num_arg_words) {
-      RegStorage arg_reg;
-      rl_arg = info->args[next_use];
-      rl_arg = UpdateRawLoc(rl_arg);
-      if (rl_arg.location == kLocPhysReg) {
-        arg_reg = rl_arg.reg;
-      } else {
-        arg_reg = TargetReg(kArg2, rl_arg.wide ? kWide : kNotWide);
-        if (rl_arg.wide) {
-          LoadValueDirectWideFixed(rl_arg, arg_reg);
-        } else {
-          LoadValueDirectFixed(rl_arg, arg_reg);
-        }
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      int outs_offset = (next_use + 1) * 4;
-      {
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        if (rl_arg.wide) {
-          StoreBaseDisp(TargetPtrReg(kSp), outs_offset, arg_reg, k64, kNotVolatile);
-          next_use += 2;
-        } else {
-          Store32Disp(TargetPtrReg(kSp), outs_offset, arg_reg);
-          next_use++;
-        }
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-    }
-  }
-
-  call_state = LoadArgRegs(info, call_state, next_call_insn,
-                           target_method, vtable_idx, direct_code, direct_method,
-                           type, skip_this);
-
-  if (pcrLabel) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    } else {
-      *pcrLabel = nullptr;
-      if (!(cu_->disable_opt & (1 << kNullCheckElimination)) &&
-          (info->opt_flags & MIR_IGNORE_NULL_CHECK)) {
-        return call_state;
-      }
-      // In lieu of generating a check for kArg1 being null, we need to
-      // perform a load when doing implicit checks.
-      GenImplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    }
-  }
-  return call_state;
-}
-
 // Default implementation of implicit null pointer check.
 // Overridden by arch specific as necessary.
 void Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
@@ -883,210 +722,195 @@ void Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
   FreeTemp(tmp);
 }
 
-
-/*
- * May have 0+ arguments (also used for jumbo).  Note that
- * source virtual registers may be in physical registers, so may
- * need to be flushed to home location before copying.  This
- * applies to arg3 and above (see below).
- *
- * Two general strategies:
- *    If < 20 arguments
- *       Pass args 3-18 using vldm/vstm block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *    If 20+ arguments
- *       Pass args arg19+ using memcpy block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *
+/**
+ * @brief Used to flush promoted registers if they are used as argument
+ * in an invocation.
+ * @param info the infromation about arguments for invocation.
+ * @param start the first argument we should start to look from.
  */
-int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
-                                LIR** pcrLabel, NextCallInsn next_call_insn,
-                                const MethodReference& target_method,
-                                uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
-                                InvokeType type, bool skip_this) {
-  // If we can treat it as non-range (Jumbo ops will use range form)
-  if (info->num_arg_words <= 5)
-    return GenDalvikArgsNoRange(info, call_state, pcrLabel,
-                                next_call_insn, target_method, vtable_idx,
-                                direct_code, direct_method, type, skip_this);
-  /*
-   * First load the non-register arguments.  Both forms expect all
-   * of the source arguments to be in their home frame location, so
-   * scan the s_reg names and flush any that have been promoted to
-   * frame backing storage.
-   */
+void Mir2Lir::GenDalvikArgsFlushPromoted(CallInfo* info, int start) {
+  if (cu_->disable_opt & (1 << kPromoteRegs)) {
+    // This make sense only if promotion is enabled.
+    return;
+  }
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   // Scan the rest of the args - if in phys_reg flush to memory
-  for (int next_arg = 0; next_arg < info->num_arg_words;) {
+  for (int next_arg = start; next_arg < info->num_arg_words;) {
     RegLocation loc = info->args[next_arg];
     if (loc.wide) {
       loc = UpdateLocWide(loc);
-      if ((next_arg >= 2) && (loc.location == kLocPhysReg)) {
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+      if (loc.location == kLocPhysReg) {
         StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
       }
       next_arg += 2;
     } else {
       loc = UpdateLoc(loc);
-      if ((next_arg >= 3) && (loc.location == kLocPhysReg)) {
-        ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        Store32Disp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
+      if (loc.location == kLocPhysReg) {
+        if (loc.ref) {
+          StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
+        } else {
+          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32,
+                        kNotVolatile);
+        }
       }
       next_arg++;
     }
   }
+}
 
-  // The first 3 arguments are passed via registers.
-  // TODO: For 64-bit, instead of hardcoding 4 for Method* size, we should either
-  // get size of uintptr_t or size of object reference according to model being used.
-  int outs_offset = 4 /* Method* */ + (3 * sizeof(uint32_t));
-  int start_offset = SRegOffset(info->args[3].s_reg_low);
-  int regs_left_to_pass_via_stack = info->num_arg_words - 3;
-  DCHECK_GT(regs_left_to_pass_via_stack, 0);
-
-  if (cu_->instruction_set == kThumb2 && regs_left_to_pass_via_stack <= 16) {
-    // Use vldm/vstm pair using kArg3 as a temp
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                             direct_code, direct_method, type);
-    OpRegRegImm(kOpAdd, TargetReg(kArg3, kRef), TargetPtrReg(kSp), start_offset);
-    LIR* ld = nullptr;
-    {
-      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      ld = OpVldm(TargetReg(kArg3, kRef), regs_left_to_pass_via_stack);
-    }
-    // TUNING: loosen barrier
-    ld->u.m.def_mask = &kEncodeAll;
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                             direct_code, direct_method, type);
-    OpRegRegImm(kOpAdd, TargetReg(kArg3, kRef), TargetPtrReg(kSp), 4 /* Method* */ + (3 * 4));
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                             direct_code, direct_method, type);
-    LIR* st = nullptr;
-    {
-      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-      st = OpVstm(TargetReg(kArg3, kRef), regs_left_to_pass_via_stack);
-    }
-    st->u.m.def_mask = &kEncodeAll;
-    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                             direct_code, direct_method, type);
-  } else if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
-    int current_src_offset = start_offset;
-    int current_dest_offset = outs_offset;
-
-    // Only davik regs are accessed in this loop; no next_call_insn() calls.
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    while (regs_left_to_pass_via_stack > 0) {
-      // This is based on the knowledge that the stack itself is 16-byte aligned.
-      bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
-      bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
-      size_t bytes_to_move;
-
-      /*
-       * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
-       * a 128-bit move because we won't get the chance to try to aligned. If there are more than
-       * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
-       * We do this because we could potentially do a smaller move to align.
-       */
-      if (regs_left_to_pass_via_stack == 4 ||
-          (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
-        // Moving 128-bits via xmm register.
-        bytes_to_move = sizeof(uint32_t) * 4;
-
-        // Allocate a free xmm temp. Since we are working through the calling sequence,
-        // we expect to have an xmm temporary available.  AllocTempDouble will abort if
-        // there are no free registers.
-        RegStorage temp = AllocTempDouble();
-
-        LIR* ld1 = nullptr;
-        LIR* ld2 = nullptr;
-        LIR* st1 = nullptr;
-        LIR* st2 = nullptr;
-
-        /*
-         * The logic is similar for both loads and stores. If we have 16-byte alignment,
-         * do an aligned move. If we have 8-byte alignment, then do the move in two
-         * parts. This approach prevents possible cache line splits. Finally, fall back
-         * to doing an unaligned move. In most cases we likely won't split the cache
-         * line but we cannot prove it and thus take a conservative approach.
-         */
-        bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
-        bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
-
-        if (src_is_16b_aligned) {
-          ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovA128FP);
-        } else if (src_is_8b_aligned) {
-          ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovLo128FP);
-          ld2 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset + (bytes_to_move >> 1),
-                            kMovHi128FP);
-        } else {
-          ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovU128FP);
-        }
+/**
+ * @brief Used to optimize the copying of VRs which are arguments of invocation.
+ * Please note that you should flush promoted registers first if you copy.
+ * If implementation does copying it may skip several of the first VRs but must copy
+ * till the end. Implementation must return the number of skipped VRs
+ * (it might be all VRs).
+ * @see GenDalvikArgsFlushPromoted
+ * @param info the information about arguments for invocation.
+ * @param first the first argument we should start to look from.
+ * @param count the number of remaining arguments we can handle.
+ * @return the number of arguments which we did not handle. Unhandled arguments
+ * must be attached to the first one.
+ */
+int Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) {
+  // call is pretty expensive, let's use it if count is big.
+  if (count > 16) {
+    GenDalvikArgsFlushPromoted(info, first);
+    int start_offset = SRegOffset(info->args[first].s_reg_low);
+    int outs_offset = StackVisitor::GetOutVROffset(first, cu_->instruction_set);
 
-        if (dest_is_16b_aligned) {
-          st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovA128FP);
-        } else if (dest_is_8b_aligned) {
-          st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovLo128FP);
-          st2 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset + (bytes_to_move >> 1),
-                            temp, kMovHi128FP);
-        } else {
-          st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovU128FP);
-        }
+    OpRegRegImm(kOpAdd, TargetReg(kArg0, kRef), TargetPtrReg(kSp), outs_offset);
+    OpRegRegImm(kOpAdd, TargetReg(kArg1, kRef), TargetPtrReg(kSp), start_offset);
+    CallRuntimeHelperRegRegImm(kQuickMemcpy, TargetReg(kArg0, kRef), TargetReg(kArg1, kRef),
+                               count * 4, false);
+    count = 0;
+  }
+  return count;
+}
+
+int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state,
+                           LIR** pcrLabel, NextCallInsn next_call_insn,
+                           const MethodReference& target_method,
+                           uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
+                           InvokeType type, bool skip_this) {
+  // If no arguments, just return.
+  if (info->num_arg_words == 0)
+    return call_state;
 
-        // TODO If we could keep track of aliasing information for memory accesses that are wider
-        // than 64-bit, we wouldn't need to set up a barrier.
-        if (ld1 != nullptr) {
-          if (ld2 != nullptr) {
-            // For 64-bit load we can actually set up the aliasing information.
-            AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
-            AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true,
-                                    true);
+  const int start_index = skip_this ? 1 : 0;
+
+  // Get architecture dependent mapping between output VRs and physical registers
+  // basing on shorty of method to call.
+  InToRegStorageMapping in_to_reg_storage_mapping(arena_);
+  {
+    const char* target_shorty = mir_graph_->GetShortyFromMethodReference(target_method);
+    ShortyIterator shorty_iterator(target_shorty, type == kStatic);
+    in_to_reg_storage_mapping.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper());
+  }
+
+  int stack_map_start = std::max(in_to_reg_storage_mapping.GetMaxMappedIn() + 1, start_index);
+  if ((stack_map_start < info->num_arg_words) && info->args[stack_map_start].high_word) {
+    // It is possible that the last mapped reg is 32 bit while arg is 64-bit.
+    // It will be handled together with low part mapped to register.
+    stack_map_start++;
+  }
+  int regs_left_to_pass_via_stack = info->num_arg_words - stack_map_start;
+
+  // If it is a range case we can try to copy remaining VRs (not mapped to physical registers)
+  // using more optimal algorithm.
+  if (info->is_range && regs_left_to_pass_via_stack > 1) {
+    regs_left_to_pass_via_stack = GenDalvikArgsBulkCopy(info, stack_map_start,
+                                                        regs_left_to_pass_via_stack);
+  }
+
+  // Now handle any remaining VRs mapped to stack.
+  if (in_to_reg_storage_mapping.HasArgumentsOnStack()) {
+    // Two temps but do not use kArg1, it might be this which we can skip.
+    // Separate single and wide - it can give some advantage.
+    RegStorage regRef = TargetReg(kArg3, kRef);
+    RegStorage regSingle = TargetReg(kArg3, kNotWide);
+    RegStorage regWide = TargetReg(kArg2, kWide);
+    for (int i = start_index;
+         i < stack_map_start + regs_left_to_pass_via_stack; i++) {
+      RegLocation rl_arg = info->args[i];
+      rl_arg = UpdateRawLoc(rl_arg);
+      RegStorage reg = in_to_reg_storage_mapping.Get(i);
+      if (!reg.Valid()) {
+        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
+        {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          if (rl_arg.wide) {
+            if (rl_arg.location == kLocPhysReg) {
+              StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
+            } else {
+              LoadValueDirectWideFixed(rl_arg, regWide);
+              StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile);
+            }
           } else {
-            // Set barrier for 128-bit load.
-            ld1->u.m.def_mask = &kEncodeAll;
+            if (rl_arg.location == kLocPhysReg) {
+              if (rl_arg.ref) {
+                StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile);
+              } else {
+                StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile);
+              }
+            } else {
+              if (rl_arg.ref) {
+                LoadValueDirectFixed(rl_arg, regRef);
+                StoreRefDisp(TargetPtrReg(kSp), out_offset, regRef, kNotVolatile);
+              } else {
+                LoadValueDirectFixed(rl_arg, regSingle);
+                StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile);
+              }
+            }
           }
         }
-        if (st1 != nullptr) {
-          if (st2 != nullptr) {
-            // For 64-bit store we can actually set up the aliasing information.
-            AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
-            AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false,
-                                    true);
+        call_state = next_call_insn(cu_, info, call_state, target_method,
+                                    vtable_idx, direct_code, direct_method, type);
+      }
+      if (rl_arg.wide) {
+        i++;
+      }
+    }
+  }
+
+  // Finish with VRs mapped to physical registers.
+  for (int i = start_index; i < stack_map_start; i++) {
+    RegLocation rl_arg = info->args[i];
+    rl_arg = UpdateRawLoc(rl_arg);
+    RegStorage reg = in_to_reg_storage_mapping.Get(i);
+    if (reg.Valid()) {
+      if (rl_arg.wide) {
+        // if reg is not 64-bit (it is half of 64-bit) then handle it separately.
+        if (!reg.Is64Bit()) {
+          // TODO: REVISIT: This adds a spill of low part while we could just copy it.
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          if (rl_arg.location == kLocPhysReg) {
+            int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
+            // Dump it to memory and then load only low part
+            StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
+            LoadBaseDisp(TargetPtrReg(kSp), out_offset, reg, k32, kNotVolatile);
           } else {
-            // Set barrier for 128-bit store.
-            st1->u.m.def_mask = &kEncodeAll;
+            int out_offset = StackVisitor::GetOutVROffset(i + 1, cu_->instruction_set);
+            // First, use target reg for high part.
+            LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low + 1), reg, k32,
+                         kNotVolatile);
+            StoreBaseDisp(TargetPtrReg(kSp), out_offset, reg, k32, kNotVolatile);
+            // Now load target reg with low part.
+            LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low), reg, k32, kNotVolatile);
           }
+        } else {
+          LoadValueDirectWideFixed(rl_arg, reg);
         }
-
-        // Free the temporary used for the data movement.
-        FreeTemp(temp);
       } else {
-        // Moving 32-bits via general purpose register.
-        bytes_to_move = sizeof(uint32_t);
-
-        // Instead of allocating a new temp, simply reuse one of the registers being used
-        // for argument passing.
-        RegStorage temp = TargetReg(kArg3, kNotWide);
-
-        // Now load the argument VR and store to the outs.
-        Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
-        Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
+        LoadValueDirectFixed(rl_arg, reg);
       }
-
-      current_src_offset += bytes_to_move;
-      current_dest_offset += bytes_to_move;
-      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
+      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                               direct_code, direct_method, type);
+    }
+    if (rl_arg.wide) {
+      i++;
     }
-  } else {
-    // Generate memcpy
-    OpRegRegImm(kOpAdd, TargetReg(kArg0, kRef), TargetPtrReg(kSp), outs_offset);
-    OpRegRegImm(kOpAdd, TargetReg(kArg1, kRef), TargetPtrReg(kSp), start_offset);
-    CallRuntimeHelperRegRegImm(kQuickMemcpy, TargetReg(kArg0, kRef), TargetReg(kArg1, kRef),
-                               (info->num_arg_words - 3) * 4, false);
   }
 
-  call_state = LoadArgRegs(info, call_state, next_call_insn,
-                           target_method, vtable_idx, direct_code, direct_method,
-                           type, skip_this);
-
   call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
                            direct_code, direct_method, type);
   if (pcrLabel) {
@@ -1094,18 +918,20 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
       *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
     } else {
       *pcrLabel = nullptr;
-      if (!(cu_->disable_opt & (1 << kNullCheckElimination)) &&
-          (info->opt_flags & MIR_IGNORE_NULL_CHECK)) {
-        return call_state;
-      }
-      // In lieu of generating a check for kArg1 being null, we need to
-      // perform a load when doing implicit checks.
       GenImplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
     }
   }
   return call_state;
 }
 
+RegStorage Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+  if (!in_to_reg_storage_mapping_.IsInitialized()) {
+    ShortyIterator shorty_iterator(cu_->shorty, cu_->invoke_type == kStatic);
+    in_to_reg_storage_mapping_.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper());
+  }
+  return in_to_reg_storage_mapping_.Get(arg_num);
+}
+
 RegLocation Mir2Lir::InlineTarget(CallInfo* info) {
   RegLocation res;
   if (info->result.location == kLocInvalid) {
@@ -1719,17 +1545,10 @@ void Mir2Lir::GenInvokeNoInline(CallInfo* info) {
     skip_this = fast_path;
   }
   MethodReference target_method = method_info.GetTargetMethod();
-  if (!info->is_range) {
-    call_state = GenDalvikArgsNoRange(info, call_state, p_null_ck,
-                                      next_call_insn, target_method, method_info.VTableIndex(),
-                                      method_info.DirectCode(), method_info.DirectMethod(),
-                                      original_type, skip_this);
-  } else {
-    call_state = GenDalvikArgsRange(info, call_state, p_null_ck,
-                                    next_call_insn, target_method, method_info.VTableIndex(),
-                                    method_info.DirectCode(), method_info.DirectMethod(),
-                                    original_type, skip_this);
-  }
+  call_state = GenDalvikArgs(info, call_state, p_null_ck,
+                             next_call_insn, target_method, method_info.VTableIndex(),
+                             method_info.DirectCode(), method_info.DirectMethod(),
+                             original_type, skip_this);
   // Finish up any of the call sequence not interleaved in arg loading
   while (call_state >= 0) {
     call_state = next_call_insn(cu_, info, call_state, target_method, method_info.VTableIndex(),
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index e08846c325..8f976df09d 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -24,6 +24,26 @@
 namespace art {
 
 class MipsMir2Lir FINAL : public Mir2Lir {
+ protected:
+  class InToRegStorageMipsMapper : public InToRegStorageMapper {
+   public:
+    explicit InToRegStorageMipsMapper(Mir2Lir* m2l) : m2l_(m2l), cur_core_reg_(0) {}
+    virtual RegStorage GetNextReg(ShortyArg arg);
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+    }
+   protected:
+    Mir2Lir* m2l_;
+   private:
+    size_t cur_core_reg_;
+  };
+
+  InToRegStorageMipsMapper in_to_reg_storage_mips_mapper_;
+  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
+    in_to_reg_storage_mips_mapper_.Reset();
+    return &in_to_reg_storage_mips_mapper_;
+  }
+
   public:
     MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 
@@ -56,7 +76,6 @@ class MipsMir2Lir FINAL : public Mir2Lir {
     // Required for target - register utilities.
     RegStorage Solo64ToPair64(RegStorage reg);
     RegStorage TargetReg(SpecialTargetRegister reg);
-    RegStorage GetArgMappingToPhysicalReg(int arg_num);
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 185112dbf9..efa130c65d 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -122,18 +122,20 @@ RegStorage MipsMir2Lir::TargetReg(SpecialTargetRegister reg) {
   return res_reg;
 }
 
-RegStorage MipsMir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
-  switch (arg_num) {
-    case 0:
-      return rs_rMIPS_ARG1;
-    case 1:
-      return rs_rMIPS_ARG2;
-    case 2:
-      return rs_rMIPS_ARG3;
-    default:
-      return RegStorage::InvalidReg();
+RegStorage MipsMir2Lir::InToRegStorageMipsMapper::GetNextReg(ShortyArg arg) {
+  const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3};
+  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
+
+  RegStorage result = RegStorage::InvalidReg();
+  if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+    result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
+                             arg.IsRef() ? kRef : kNotWide);
+    if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+      result = RegStorage::MakeRegPair(
+          result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide));
+    }
   }
+  return result;
 }
 
 /*
@@ -602,7 +604,7 @@ RegisterClass MipsMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volati
 }
 
 MipsMir2Lir::MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
-    : Mir2Lir(cu, mir_graph, arena) {
+    : Mir2Lir(cu, mir_graph, arena), in_to_reg_storage_mips_mapper_(this) {
   for (int i = 0; i < kMipsLast; i++) {
     DCHECK_EQ(MipsMir2Lir::EncodingMap[i].opcode, i)
         << "Encoding order for " << MipsMir2Lir::EncodingMap[i].name
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index 0aefc2dea8..144790e9d7 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -276,6 +276,24 @@ inline void Mir2Lir::CheckRegStorage(RegStorage rs, WidenessCheck wide, RefCheck
   }
 }
 
+inline Mir2Lir::ShortyIterator::ShortyIterator(const char* shorty, bool is_static)
+    : cur_(shorty + 1), pending_this_(!is_static), initialized_(false) {
+  DCHECK(shorty != nullptr);
+  DCHECK_NE(*shorty, 0);
+}
+
+inline bool Mir2Lir::ShortyIterator::Next() {
+  if (!initialized_) {
+    initialized_ = true;
+  } else if (pending_this_) {
+    pending_this_ = false;
+  } else if (*cur_ != 0) {
+    cur_++;
+  }
+
+  return *cur_ != 0 || pending_this_;
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DEX_QUICK_MIR_TO_LIR_INL_H_
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 320c0f4900..bd88091add 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -53,20 +53,14 @@ RegisterClass Mir2Lir::LocToRegClass(RegLocation loc) {
   return res;
 }
 
-void Mir2Lir::LockArg(int in_position, bool wide) {
-  RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
-  RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
-      RegStorage::InvalidReg();
+void Mir2Lir::LockArg(int in_position, bool) {
+  RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
 
-  if (reg_arg_low.Valid()) {
-    LockTemp(reg_arg_low);
-  }
-  if (reg_arg_high.Valid() && reg_arg_low.NotExactlyEquals(reg_arg_high)) {
-    LockTemp(reg_arg_high);
+  if (reg_arg.Valid()) {
+    LockTemp(reg_arg);
   }
 }
 
-// TODO: simplify when 32-bit targets go hard-float.
 RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
@@ -87,81 +81,38 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide)
     offset += sizeof(uint64_t);
   }
 
-  if (cu_->target64) {
-    RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
-    if (!reg_arg.Valid()) {
-      RegStorage new_reg =
-          wide ?  AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
-      LoadBaseDisp(TargetPtrReg(kSp), offset, new_reg, wide ? k64 : k32, kNotVolatile);
-      return new_reg;
-    } else {
-      // Check if we need to copy the arg to a different reg_class.
-      if (!RegClassMatches(reg_class, reg_arg)) {
-        if (wide) {
-          RegStorage new_reg = AllocTypedTempWide(false, reg_class);
-          OpRegCopyWide(new_reg, reg_arg);
-          reg_arg = new_reg;
-        } else {
-          RegStorage new_reg = AllocTypedTemp(false, reg_class);
-          OpRegCopy(new_reg, reg_arg);
-          reg_arg = new_reg;
-        }
-      }
-    }
-    return reg_arg;
-  }
+  RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
 
-  RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
-  RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
-      RegStorage::InvalidReg();
-
-  // If the VR is wide and there is no register for high part, we need to load it.
-  if (wide && !reg_arg_high.Valid()) {
-    // If the low part is not in a reg, we allocate a pair. Otherwise, we just load to high reg.
-    if (!reg_arg_low.Valid()) {
-      RegStorage new_regs = AllocTypedTempWide(false, reg_class);
-      LoadBaseDisp(TargetPtrReg(kSp), offset, new_regs, k64, kNotVolatile);
-      return new_regs;  // The reg_class is OK, we can return.
-    } else {
-      // Assume that no ABI allows splitting a wide fp reg between a narrow fp reg and memory,
-      // i.e. the low part is in a core reg. Load the second part in a core reg as well for now.
-      DCHECK(!reg_arg_low.IsFloat());
-      reg_arg_high = AllocTemp();
-      int offset_high = offset + sizeof(uint32_t);
-      Load32Disp(TargetPtrReg(kSp), offset_high, reg_arg_high);
-      // Continue below to check the reg_class.
-    }
+  // TODO: REVISIT: This adds a spill of low part while we could just copy it.
+  if (reg_arg.Valid() && wide && (reg_arg.GetWideKind() == kNotWide)) {
+    // For wide register we've got only half of it.
+    // Flush it to memory then.
+    StoreBaseDisp(TargetPtrReg(kSp), offset, reg_arg, k32, kNotVolatile);
+    reg_arg = RegStorage::InvalidReg();
   }
 
-  // If the low part is not in a register yet, we need to load it.
-  if (!reg_arg_low.Valid()) {
-    // Assume that if the low part of a wide arg is passed in memory, so is the high part,
-    // thus we don't get here for wide args as it's handled above. Big-endian ABIs could
-    // conceivably break this assumption but Android supports only little-endian architectures.
-    DCHECK(!wide);
-    reg_arg_low = AllocTypedTemp(false, reg_class);
-    Load32Disp(TargetPtrReg(kSp), offset, reg_arg_low);
-    return reg_arg_low;  // The reg_class is OK, we can return.
-  }
-
-  RegStorage reg_arg = wide ? RegStorage::MakeRegPair(reg_arg_low, reg_arg_high) : reg_arg_low;
-  // Check if we need to copy the arg to a different reg_class.
-  if (!RegClassMatches(reg_class, reg_arg)) {
-    if (wide) {
-      RegStorage new_regs = AllocTypedTempWide(false, reg_class);
-      OpRegCopyWide(new_regs, reg_arg);
-      reg_arg = new_regs;
-    } else {
-      RegStorage new_reg = AllocTypedTemp(false, reg_class);
-      OpRegCopy(new_reg, reg_arg);
-      reg_arg = new_reg;
+  if (!reg_arg.Valid()) {
+    reg_arg = wide ?  AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
+    LoadBaseDisp(TargetPtrReg(kSp), offset, reg_arg, wide ? k64 : k32, kNotVolatile);
+  } else {
+    // Check if we need to copy the arg to a different reg_class.
+    if (!RegClassMatches(reg_class, reg_arg)) {
+      if (wide) {
+        RegStorage new_reg = AllocTypedTempWide(false, reg_class);
+        OpRegCopyWide(new_reg, reg_arg);
+        reg_arg = new_reg;
+      } else {
+        RegStorage new_reg = AllocTypedTemp(false, reg_class);
+        OpRegCopy(new_reg, reg_arg);
+        reg_arg = new_reg;
+      }
     }
   }
   return reg_arg;
 }
 
-// TODO: simpilfy when 32-bit targets go hard float.
 void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
+  DCHECK_EQ(rl_dest.location, kLocPhysReg);
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
   if (cu_->instruction_set == kX86) {
@@ -180,48 +131,23 @@ void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
     offset += sizeof(uint64_t);
   }
 
-  if (!rl_dest.wide) {
-    RegStorage reg = GetArgMappingToPhysicalReg(in_position);
-    if (reg.Valid()) {
-      OpRegCopy(rl_dest.reg, reg);
-    } else {
-      Load32Disp(TargetPtrReg(kSp), offset, rl_dest.reg);
-    }
-  } else {
-    if (cu_->target64) {
-      RegStorage reg = GetArgMappingToPhysicalReg(in_position);
-      if (reg.Valid()) {
-        OpRegCopy(rl_dest.reg, reg);
-      } else {
-        LoadBaseDisp(TargetPtrReg(kSp), offset, rl_dest.reg, k64, kNotVolatile);
-      }
-      return;
-    }
-
-    RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
-    RegStorage reg_arg_high = GetArgMappingToPhysicalReg(in_position + 1);
+  RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
 
-    if (cu_->instruction_set == kX86) {
-      // Can't handle double split between reg & memory.  Flush reg half to memory.
-      if (rl_dest.reg.IsDouble() && (reg_arg_low.Valid() != reg_arg_high.Valid())) {
-        DCHECK(reg_arg_low.Valid());
-        DCHECK(!reg_arg_high.Valid());
-        Store32Disp(TargetPtrReg(kSp), offset, reg_arg_low);
-        reg_arg_low = RegStorage::InvalidReg();
-      }
-    }
+  // TODO: REVISIT: This adds a spill of low part while we could just copy it.
+  if (reg_arg.Valid() && rl_dest.wide && (reg_arg.GetWideKind() == kNotWide)) {
+    // For wide register we've got only half of it.
+    // Flush it to memory then.
+    StoreBaseDisp(TargetPtrReg(kSp), offset, reg_arg, k32, kNotVolatile);
+    reg_arg = RegStorage::InvalidReg();
+  }
 
-    if (reg_arg_low.Valid() && reg_arg_high.Valid()) {
-      OpRegCopyWide(rl_dest.reg, RegStorage::MakeRegPair(reg_arg_low, reg_arg_high));
-    } else if (reg_arg_low.Valid() && !reg_arg_high.Valid()) {
-      OpRegCopy(rl_dest.reg, reg_arg_low);
-      int offset_high = offset + sizeof(uint32_t);
-      Load32Disp(TargetPtrReg(kSp), offset_high, rl_dest.reg.GetHigh());
-    } else if (!reg_arg_low.Valid() && reg_arg_high.Valid()) {
-      OpRegCopy(rl_dest.reg.GetHigh(), reg_arg_high);
-      Load32Disp(TargetPtrReg(kSp), offset, rl_dest.reg.GetLow());
+  if (!reg_arg.Valid()) {
+    LoadBaseDisp(TargetPtrReg(kSp), offset, rl_dest.reg, rl_dest.wide ? k64 : k32, kNotVolatile);
+  } else {
+    if (rl_dest.wide) {
+      OpRegCopyWide(rl_dest.reg, reg_arg);
     } else {
-      LoadBaseDisp(TargetPtrReg(kSp), offset, rl_dest.reg, k64, kNotVolatile);
+      OpRegCopy(rl_dest.reg, reg_arg);
     }
   }
 }
@@ -1372,4 +1298,35 @@ size_t Mir2Lir::GetInstructionOffset(LIR* lir) {
   UNREACHABLE();
 }
 
+void Mir2Lir::InToRegStorageMapping::Initialize(ShortyIterator* shorty,
+                                                InToRegStorageMapper* mapper) {
+  DCHECK(mapper != nullptr);
+  DCHECK(shorty != nullptr);
+  max_mapped_in_ = -1;
+  has_arguments_on_stack_ = false;
+  while (shorty->Next()) {
+     ShortyArg arg = shorty->GetArg();
+     RegStorage reg = mapper->GetNextReg(arg);
+     if (reg.Valid()) {
+       mapping_.Put(count_, reg);
+       max_mapped_in_ = count_;
+       // If the VR is wide and was mapped as wide then account for it.
+       if (arg.IsWide() && reg.Is64Bit()) {
+         max_mapped_in_++;
+       }
+     } else {
+       has_arguments_on_stack_ = true;
+     }
+     count_ += arg.IsWide() ? 2 : 1;
+  }
+  initialized_ = true;
+}
+
+RegStorage Mir2Lir::InToRegStorageMapping::Get(int in_position) {
+  DCHECK(IsInitialized());
+  DCHECK_LT(in_position, count_);
+  auto res = mapping_.find(in_position);
+  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 5d78a6e25c..dd0933018f 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -905,19 +905,14 @@ class Mir2Lir : public Backend {
     virtual LIR* GenCallInsn(const MirMethodLoweringInfo& method_info);
 
     virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
-    virtual int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                             NextCallInsn next_call_insn,
-                             const MethodReference& target_method,
-                             uint32_t vtable_idx,
-                             uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                             bool skip_this);
-    virtual int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                           NextCallInsn next_call_insn,
-                           const MethodReference& target_method,
-                           uint32_t vtable_idx,
-                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                           bool skip_this);
-
+    virtual int GenDalvikArgs(CallInfo* info, int call_state, LIR** pcrLabel,
+                      NextCallInsn next_call_insn,
+                      const MethodReference& target_method,
+                      uint32_t vtable_idx,
+                      uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                      bool skip_this);
+    virtual int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count);
+    virtual void GenDalvikArgsFlushPromoted(CallInfo* info, int start);
     /**
      * @brief Used to determine the register location of destination.
      * @details This is needed during generation of inline intrinsics because it finds destination
@@ -958,12 +953,6 @@ class Mir2Lir : public Backend {
     bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile);
     bool GenInlinedUnsafePut(CallInfo* info, bool is_long, bool is_object,
                              bool is_volatile, bool is_ordered);
-    virtual int LoadArgRegs(CallInfo* info, int call_state,
-                    NextCallInsn next_call_insn,
-                    const MethodReference& target_method,
-                    uint32_t vtable_idx,
-                    uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                    bool skip_this);
 
     // Shared by all targets - implemented in gen_loadstore.cc.
     RegLocation LoadCurrMethod();
@@ -1228,7 +1217,7 @@ class Mir2Lir : public Backend {
       }
     }
 
-    virtual RegStorage GetArgMappingToPhysicalReg(int arg_num) = 0;
+    RegStorage GetArgMappingToPhysicalReg(int arg_num);
     virtual RegLocation GetReturnAlt() = 0;
     virtual RegLocation GetReturnWideAlt() = 0;
     virtual RegLocation LocCReturn() = 0;
@@ -1780,6 +1769,63 @@ class Mir2Lir : public Backend {
     // to deduplicate the masks.
     ResourceMaskCache mask_cache_;
 
+  protected:
+    // ABI support
+    class ShortyArg {
+      public:
+        explicit ShortyArg(char type) : type_(type) { }
+        bool IsFP() { return type_ == 'F' || type_ == 'D'; }
+        bool IsWide() { return type_ == 'J' || type_ == 'D'; }
+        bool IsRef() { return type_ == 'L'; }
+        char GetType() { return type_; }
+      private:
+        char type_;
+    };
+
+    class ShortyIterator {
+      public:
+        ShortyIterator(const char* shorty, bool is_static);
+        bool Next();
+        ShortyArg GetArg() { return ShortyArg(pending_this_ ? 'L' : *cur_); }
+      private:
+        const char* cur_;
+        bool pending_this_;
+        bool initialized_;
+    };
+
+    class InToRegStorageMapper {
+     public:
+      virtual RegStorage GetNextReg(ShortyArg arg) = 0;
+      virtual ~InToRegStorageMapper() {}
+      virtual void Reset() = 0;
+    };
+
+    class InToRegStorageMapping {
+     public:
+      explicit InToRegStorageMapping(ArenaAllocator* arena)
+          : mapping_(std::less<int>(), arena->Adapter()), count_(0),
+            max_mapped_in_(0), has_arguments_on_stack_(false),  initialized_(false) {}
+      void Initialize(ShortyIterator* shorty, InToRegStorageMapper* mapper);
+      /**
+       * @return the index of last VR mapped to physical register. In other words
+       * any VR starting from (return value + 1) index is mapped to memory.
+       */
+      int GetMaxMappedIn() { return max_mapped_in_; }
+      bool HasArgumentsOnStack() { return has_arguments_on_stack_; }
+      RegStorage Get(int in_position);
+      bool IsInitialized() { return initialized_; }
+     private:
+      ArenaSafeMap<int, RegStorage> mapping_;
+      int count_;
+      int max_mapped_in_;
+      bool has_arguments_on_stack_;
+      bool initialized_;
+    };
+
+  // Cached mapping of method input to reg storage according to ABI.
+  InToRegStorageMapping in_to_reg_storage_mapping_;
+  virtual InToRegStorageMapper* GetResetedInToRegStorageMapper() = 0;
+
   private:
     static bool SizeMatchesTypeForEntrypoint(OpSize size, Primitive::Type type);
 };  // Class Mir2Lir
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 9cb0bf53e6..c7d83dda62 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -28,40 +28,48 @@ namespace art {
 
 class X86Mir2Lir : public Mir2Lir {
  protected:
-  class InToRegStorageMapper {
-   public:
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref) = 0;
-    virtual ~InToRegStorageMapper() {}
-  };
-
   class InToRegStorageX86_64Mapper : public InToRegStorageMapper {
    public:
-    explicit InToRegStorageX86_64Mapper(Mir2Lir* ml) : ml_(ml), cur_core_reg_(0), cur_fp_reg_(0) {}
-    virtual ~InToRegStorageX86_64Mapper() {}
-    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref);
+    explicit InToRegStorageX86_64Mapper(Mir2Lir* m2l)
+        : m2l_(m2l), cur_core_reg_(0), cur_fp_reg_(0) {}
+    virtual RegStorage GetNextReg(ShortyArg arg);
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+      cur_fp_reg_ = 0;
+    }
    protected:
-    Mir2Lir* ml_;
+    Mir2Lir* m2l_;
    private:
-    int cur_core_reg_;
-    int cur_fp_reg_;
+    size_t cur_core_reg_;
+    size_t cur_fp_reg_;
   };
 
-  class InToRegStorageMapping {
+  class InToRegStorageX86Mapper : public InToRegStorageMapper {
    public:
-    InToRegStorageMapping() : max_mapped_in_(0), is_there_stack_mapped_(false),
-    initialized_(false) {}
-    void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
-    int GetMaxMappedIn() { return max_mapped_in_; }
-    bool IsThereStackMapped() { return is_there_stack_mapped_; }
-    RegStorage Get(int in_position);
-    bool IsInitialized() { return initialized_; }
+    explicit InToRegStorageX86Mapper(Mir2Lir* m2l) : m2l_(m2l), cur_core_reg_(0) {}
+    virtual RegStorage GetNextReg(ShortyArg arg);
+    virtual void Reset() OVERRIDE {
+      cur_core_reg_ = 0;
+    }
+   protected:
+    Mir2Lir* m2l_;
    private:
-    std::map<int, RegStorage> mapping_;
-    int max_mapped_in_;
-    bool is_there_stack_mapped_;
-    bool initialized_;
+    size_t cur_core_reg_;
   };
 
+  InToRegStorageX86_64Mapper in_to_reg_storage_x86_64_mapper_;
+  InToRegStorageX86Mapper in_to_reg_storage_x86_mapper_;
+  InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE {
+    InToRegStorageMapper* res;
+    if (cu_->target64) {
+      res = &in_to_reg_storage_x86_64_mapper_;
+    } else {
+      res = &in_to_reg_storage_x86_mapper_;
+    }
+    res->Reset();
+    return res;
+  }
+
   class ExplicitTempRegisterLock {
   public:
     ExplicitTempRegisterLock(X86Mir2Lir* mir_to_lir, int n_regs, ...);
@@ -71,6 +79,8 @@ class X86Mir2Lir : public Mir2Lir {
     X86Mir2Lir* const mir_to_lir_;
   };
 
+  virtual int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
+
  public:
   X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 
@@ -125,8 +135,6 @@ class X86Mir2Lir : public Mir2Lir {
     return TargetReg(symbolic_reg, cu_->target64 ? kWide : kNotWide);
   }
 
-  RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE;
-
   RegLocation GetReturnAlt() OVERRIDE;
   RegLocation GetReturnWideAlt() OVERRIDE;
   RegLocation LocCReturn() OVERRIDE;
@@ -350,22 +358,7 @@ class X86Mir2Lir : public Mir2Lir {
   void LoadClassType(const DexFile& dex_file, uint32_t type_idx,
                      SpecialTargetRegister symbolic_reg) OVERRIDE;
 
-  void FlushIns(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
-
   NextCallInsn GetNextSDCallInsn() OVERRIDE;
-  int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                           NextCallInsn next_call_insn,
-                           const MethodReference& target_method,
-                           uint32_t vtable_idx,
-                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                           bool skip_this) OVERRIDE;
-
-  int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
-                         NextCallInsn next_call_insn,
-                         const MethodReference& target_method,
-                         uint32_t vtable_idx,
-                         uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
-                         bool skip_this) OVERRIDE;
 
   /*
    * @brief Generate a relative call to the method that will be patched at link time.
@@ -439,8 +432,6 @@ class X86Mir2Lir : public Mir2Lir {
   LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement,
                             RegStorage r_src, OpSize size, int opt_flags = 0);
 
-  RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num) const;
-
   int AssignInsnOffsets();
   void AssignOffsets();
   AssemblerStatus AssembleInstructions(CodeOffset start_addr);
@@ -1000,8 +991,6 @@ class X86Mir2Lir : public Mir2Lir {
    */
   static void DumpRegLocation(RegLocation loc);
 
-  InToRegStorageMapping in_to_reg_storage_mapping_;
-
  private:
   void SwapBits(RegStorage result_reg, int shift, int32_t value);
   void SwapBits64(RegStorage result_reg, int shift, int64_t value);
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index ae80e9f1c5..5f6cdda0d3 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -814,6 +814,7 @@ RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatil
 
 X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
     : Mir2Lir(cu, mir_graph, arena),
+      in_to_reg_storage_x86_64_mapper_(this), in_to_reg_storage_x86_mapper_(this),
       base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
       method_address_insns_(arena->Adapter()),
       class_type_address_insns_(arena->Adapter()),
@@ -2407,451 +2408,44 @@ LIR* X86Mir2Lir::AddVectorLiteral(int32_t* constants) {
 }
 
 // ------------ ABI support: mapping of args to physical registers -------------
-RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide,
-                                                              bool is_ref) {
+RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(ShortyArg arg) {
   const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3, kArg4, kArg5};
-  const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) /
-      sizeof(SpecialTargetRegister);
+  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
   const SpecialTargetRegister fpArgMappingToPhysicalReg[] = {kFArg0, kFArg1, kFArg2, kFArg3,
                                                              kFArg4, kFArg5, kFArg6, kFArg7};
-  const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) /
-      sizeof(SpecialTargetRegister);
+  const size_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
 
-  if (is_double_or_float) {
+  if (arg.IsFP()) {
     if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
-      return ml_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++], is_wide ? kWide : kNotWide);
+      return m2l_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++],
+                             arg.IsWide() ? kWide : kNotWide);
     }
   } else {
     if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
-      return ml_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
-                            is_ref ? kRef : (is_wide ? kWide : kNotWide));
+      return m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
+                             arg.IsRef() ? kRef : (arg.IsWide() ? kWide : kNotWide));
     }
   }
   return RegStorage::InvalidReg();
 }
 
-RegStorage X86Mir2Lir::InToRegStorageMapping::Get(int in_position) {
-  DCHECK(IsInitialized());
-  auto res = mapping_.find(in_position);
-  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
-}
-
-void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count,
-                                                   InToRegStorageMapper* mapper) {
-  DCHECK(mapper != nullptr);
-  max_mapped_in_ = -1;
-  is_there_stack_mapped_ = false;
-  for (int in_position = 0; in_position < count; in_position++) {
-     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp,
-             arg_locs[in_position].wide, arg_locs[in_position].ref);
-     if (reg.Valid()) {
-       mapping_[in_position] = reg;
-       max_mapped_in_ = std::max(max_mapped_in_, in_position);
-       if (arg_locs[in_position].wide) {
-         // We covered 2 args, so skip the next one
-         in_position++;
-       }
-     } else {
-       is_there_stack_mapped_ = true;
-     }
-  }
-  initialized_ = true;
-}
-
-RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  if (!cu_->target64) {
-    return GetCoreArgMappingToPhysicalReg(arg_num);
-  }
-
-  if (!in_to_reg_storage_mapping_.IsInitialized()) {
-    int start_vreg = cu_->mir_graph->GetFirstInVR();
-    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
-
-    InToRegStorageX86_64Mapper mapper(this);
-    in_to_reg_storage_mapping_.Initialize(arg_locs, mir_graph_->GetNumOfInVRs(), &mapper);
-  }
-  return in_to_reg_storage_mapping_.Get(arg_num);
-}
-
-RegStorage X86Mir2Lir::GetCoreArgMappingToPhysicalReg(int core_arg_num) const {
-  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
-  // Not used for 64-bit, TODO: Move X86_32 to the same framework
-  switch (core_arg_num) {
-    case 0: return TargetReg32(kArg1);
-    case 1: return TargetReg32(kArg2);
-    case 2: return TargetReg32(kArg3);
-    default: return RegStorage::InvalidReg();
-  }
-}
+RegStorage X86Mir2Lir::InToRegStorageX86Mapper::GetNextReg(ShortyArg arg) {
+  const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3};
+  const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
 
-// ---------End of ABI support: mapping of args to physical registers -------------
-
-/*
- * If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame.  Perform initial
- * assignment of promoted arguments.
- *
- * ArgLocs is an array of location records describing the incoming arguments
- * with one location record per word of argument.
- */
-void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
-  if (!cu_->target64) return Mir2Lir::FlushIns(ArgLocs, rl_method);
-  /*
-   * Dummy up a RegLocation for the incoming Method*
-   * It will attempt to keep kArg0 live (or copy it to home location
-   * if promoted).
-   */
-
-  RegLocation rl_src = rl_method;
-  rl_src.location = kLocPhysReg;
-  rl_src.reg = TargetReg(kArg0, kRef);
-  rl_src.home = false;
-  MarkLive(rl_src);
-  StoreValue(rl_method, rl_src);
-  // If Method* has been promoted, explicitly flush
-  if (rl_method.location == kLocPhysReg) {
-    const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-    StoreRefDisp(rs_rSP, 0, As32BitReg(TargetReg(kArg0, kRef)), kNotVolatile);
-  }
-
-  if (mir_graph_->GetNumOfInVRs() == 0) {
-    return;
-  }
-
-  int start_vreg = cu_->mir_graph->GetFirstInVR();
-  /*
-   * Copy incoming arguments to their proper home locations.
-   * NOTE: an older version of dx had an issue in which
-   * it would reuse static method argument registers.
-   * This could result in the same Dalvik virtual register
-   * being promoted to both core and fp regs. To account for this,
-   * we only copy to the corresponding promoted physical register
-   * if it matches the type of the SSA name for the incoming
-   * argument.  It is also possible that long and double arguments
-   * end up half-promoted.  In those cases, we must flush the promoted
-   * half to memory as well.
-   */
-  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-  for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i++) {
-    // get reg corresponding to input
-    RegStorage reg = GetArgMappingToPhysicalReg(i);
-
-    RegLocation* t_loc = &ArgLocs[i];
-    if (reg.Valid()) {
-      // If arriving in register.
-
-      // We have already updated the arg location with promoted info
-      // so we can be based on it.
-      if (t_loc->location == kLocPhysReg) {
-        // Just copy it.
-        OpRegCopy(t_loc->reg, reg);
-      } else {
-        // Needs flush.
-        if (t_loc->ref) {
-          StoreRefDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), reg, kNotVolatile);
-        } else {
-          StoreBaseDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32,
-                        kNotVolatile);
-        }
-      }
-    } else {
-      // If arriving in frame & promoted.
-      if (t_loc->location == kLocPhysReg) {
-        if (t_loc->ref) {
-          LoadRefDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile);
-        } else {
-          LoadBaseDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), t_loc->reg,
-                       t_loc->wide ? k64 : k32, kNotVolatile);
-        }
-      }
-    }
-    if (t_loc->wide) {
-      // Increment i to skip the next one.
-      i++;
+  RegStorage result = RegStorage::InvalidReg();
+  if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+    result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
+                          arg.IsRef() ? kRef : kNotWide);
+    if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+      result = RegStorage::MakeRegPair(
+          result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide));
     }
   }
+  return result;
 }
 
-/*
- * Load up to 5 arguments, the first three of which will be in
- * kArg1 .. kArg3.  On entry kArg0 contains the current method pointer,
- * and as part of the load sequence, it must be replaced with
- * the target method pointer.  Note, this may also be called
- * for "range" variants if the number of arguments is 5 or fewer.
- */
-int X86Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
-                                  int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
-                                  const MethodReference& target_method,
-                                  uint32_t vtable_idx, uintptr_t direct_code,
-                                  uintptr_t direct_method, InvokeType type, bool skip_this) {
-  if (!cu_->target64) {
-    return Mir2Lir::GenDalvikArgsNoRange(info,
-                                         call_state, pcrLabel, next_call_insn,
-                                         target_method,
-                                         vtable_idx, direct_code,
-                                         direct_method, type, skip_this);
-  }
-  return GenDalvikArgsRange(info,
-                            call_state, pcrLabel, next_call_insn,
-                            target_method,
-                            vtable_idx, direct_code,
-                            direct_method, type, skip_this);
-}
-
-/*
- * May have 0+ arguments (also used for jumbo).  Note that
- * source virtual registers may be in physical registers, so may
- * need to be flushed to home location before copying.  This
- * applies to arg3 and above (see below).
- *
- * Two general strategies:
- *    If < 20 arguments
- *       Pass args 3-18 using vldm/vstm block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *    If 20+ arguments
- *       Pass args arg19+ using memcpy block copy
- *       Pass arg0, arg1 & arg2 in kArg1-kArg3
- *
- */
-int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
-                                LIR** pcrLabel, NextCallInsn next_call_insn,
-                                const MethodReference& target_method,
-                                uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
-                                InvokeType type, bool skip_this) {
-  if (!cu_->target64) {
-    return Mir2Lir::GenDalvikArgsRange(info, call_state,
-                                pcrLabel, next_call_insn,
-                                target_method,
-                                vtable_idx, direct_code, direct_method,
-                                type, skip_this);
-  }
-
-  /* If no arguments, just return */
-  if (info->num_arg_words == 0)
-    return call_state;
-
-  const int start_index = skip_this ? 1 : 0;
-
-  InToRegStorageX86_64Mapper mapper(this);
-  InToRegStorageMapping in_to_reg_storage_mapping;
-  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
-  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
-  const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 :
-          info->args[last_mapped_in].wide ? 2 : 1;
-  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped);
-
-  // Fisrt of all, check whether it make sense to use bulk copying
-  // Optimization is aplicable only for range case
-  // TODO: make a constant instead of 2
-  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
-    // Scan the rest of the args - if in phys_reg flush to memory
-    for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) {
-      RegLocation loc = info->args[next_arg];
-      if (loc.wide) {
-        loc = UpdateLocWide(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(rs_rX86_SP_64, SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
-        }
-        next_arg += 2;
-      } else {
-        loc = UpdateLoc(loc);
-        if (loc.location == kLocPhysReg) {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          StoreBaseDisp(rs_rX86_SP_64, SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile);
-        }
-        next_arg++;
-      }
-    }
-
-    // The rest can be copied together
-    int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low);
-    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped,
-                                                   cu_->instruction_set);
-
-    int current_src_offset = start_offset;
-    int current_dest_offset = outs_offset;
-
-    // Only davik regs are accessed in this loop; no next_call_insn() calls.
-    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-    while (regs_left_to_pass_via_stack > 0) {
-      // This is based on the knowledge that the stack itself is 16-byte aligned.
-      bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
-      bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
-      size_t bytes_to_move;
-
-      /*
-       * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
-       * a 128-bit move because we won't get the chance to try to aligned. If there are more than
-       * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
-       * We do this because we could potentially do a smaller move to align.
-       */
-      if (regs_left_to_pass_via_stack == 4 ||
-          (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
-        // Moving 128-bits via xmm register.
-        bytes_to_move = sizeof(uint32_t) * 4;
-
-        // Allocate a free xmm temp. Since we are working through the calling sequence,
-        // we expect to have an xmm temporary available.  AllocTempDouble will abort if
-        // there are no free registers.
-        RegStorage temp = AllocTempDouble();
-
-        LIR* ld1 = nullptr;
-        LIR* ld2 = nullptr;
-        LIR* st1 = nullptr;
-        LIR* st2 = nullptr;
-
-        /*
-         * The logic is similar for both loads and stores. If we have 16-byte alignment,
-         * do an aligned move. If we have 8-byte alignment, then do the move in two
-         * parts. This approach prevents possible cache line splits. Finally, fall back
-         * to doing an unaligned move. In most cases we likely won't split the cache
-         * line but we cannot prove it and thus take a conservative approach.
-         */
-        bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
-        bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
-
-        ScopedMemRefType mem_ref_type2(this, ResourceMask::kDalvikReg);
-        if (src_is_16b_aligned) {
-          ld1 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset, kMovA128FP);
-        } else if (src_is_8b_aligned) {
-          ld1 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset, kMovLo128FP);
-          ld2 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset + (bytes_to_move >> 1),
-                            kMovHi128FP);
-        } else {
-          ld1 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset, kMovU128FP);
-        }
-
-        if (dest_is_16b_aligned) {
-          st1 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset, temp, kMovA128FP);
-        } else if (dest_is_8b_aligned) {
-          st1 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset, temp, kMovLo128FP);
-          st2 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset + (bytes_to_move >> 1),
-                            temp, kMovHi128FP);
-        } else {
-          st1 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset, temp, kMovU128FP);
-        }
-
-        // TODO If we could keep track of aliasing information for memory accesses that are wider
-        // than 64-bit, we wouldn't need to set up a barrier.
-        if (ld1 != nullptr) {
-          if (ld2 != nullptr) {
-            // For 64-bit load we can actually set up the aliasing information.
-            AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
-            AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true);
-          } else {
-            // Set barrier for 128-bit load.
-            ld1->u.m.def_mask = &kEncodeAll;
-          }
-        }
-        if (st1 != nullptr) {
-          if (st2 != nullptr) {
-            // For 64-bit store we can actually set up the aliasing information.
-            AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
-            AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true);
-          } else {
-            // Set barrier for 128-bit store.
-            st1->u.m.def_mask = &kEncodeAll;
-          }
-        }
-
-        // Free the temporary used for the data movement.
-        FreeTemp(temp);
-      } else {
-        // Moving 32-bits via general purpose register.
-        bytes_to_move = sizeof(uint32_t);
-
-        // Instead of allocating a new temp, simply reuse one of the registers being used
-        // for argument passing.
-        RegStorage temp = TargetReg(kArg3, kNotWide);
-
-        // Now load the argument VR and store to the outs.
-        Load32Disp(rs_rX86_SP_64, current_src_offset, temp);
-        Store32Disp(rs_rX86_SP_64, current_dest_offset, temp);
-      }
-
-      current_src_offset += bytes_to_move;
-      current_dest_offset += bytes_to_move;
-      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
-    }
-    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
-  }
-
-  // Now handle rest not registers if they are
-  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
-    RegStorage regSingle = TargetReg(kArg2, kNotWide);
-    RegStorage regWide = TargetReg(kArg3, kWide);
-    for (int i = start_index;
-         i < last_mapped_in + size_of_the_last_mapped + regs_left_to_pass_via_stack; i++) {
-      RegLocation rl_arg = info->args[i];
-      rl_arg = UpdateRawLoc(rl_arg);
-      RegStorage reg = in_to_reg_storage_mapping.Get(i);
-      if (!reg.Valid()) {
-        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
-
-        {
-          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-          if (rl_arg.wide) {
-            if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(rs_rX86_SP_64, out_offset, rl_arg.reg, k64, kNotVolatile);
-            } else {
-              LoadValueDirectWideFixed(rl_arg, regWide);
-              StoreBaseDisp(rs_rX86_SP_64, out_offset, regWide, k64, kNotVolatile);
-            }
-          } else {
-            if (rl_arg.location == kLocPhysReg) {
-              StoreBaseDisp(rs_rX86_SP_64, out_offset, rl_arg.reg, k32, kNotVolatile);
-            } else {
-              LoadValueDirectFixed(rl_arg, regSingle);
-              StoreBaseDisp(rs_rX86_SP_64, out_offset, regSingle, k32, kNotVolatile);
-            }
-          }
-        }
-        call_state = next_call_insn(cu_, info, call_state, target_method,
-                                    vtable_idx, direct_code, direct_method, type);
-      }
-      if (rl_arg.wide) {
-        i++;
-      }
-    }
-  }
-
-  // Finish with mapped registers
-  for (int i = start_index; i <= last_mapped_in; i++) {
-    RegLocation rl_arg = info->args[i];
-    rl_arg = UpdateRawLoc(rl_arg);
-    RegStorage reg = in_to_reg_storage_mapping.Get(i);
-    if (reg.Valid()) {
-      if (rl_arg.wide) {
-        LoadValueDirectWideFixed(rl_arg, reg);
-      } else {
-        LoadValueDirectFixed(rl_arg, reg);
-      }
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-    }
-    if (rl_arg.wide) {
-      i++;
-    }
-  }
-
-  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                           direct_code, direct_method, type);
-  if (pcrLabel) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
-      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
-    } else {
-      *pcrLabel = nullptr;
-      // In lieu of generating a check for kArg1 being null, we need to
-      // perform a load when doing implicit checks.
-      RegStorage tmp = AllocTemp();
-      Load32Disp(TargetReg(kArg1, kRef), 0, tmp);
-      MarkPossibleNullPointerException(info->opt_flags);
-      FreeTemp(tmp);
-    }
-  }
-  return call_state;
-}
+// ---------End of ABI support: mapping of args to physical registers -------------
 
 bool X86Mir2Lir::GenInlinedCharAt(CallInfo* info) {
   // Location of reference to data array
@@ -2980,4 +2574,122 @@ X86Mir2Lir::ExplicitTempRegisterLock::~ExplicitTempRegisterLock() {
   }
 }
 
+int X86Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) {
+  if (count < 4) {
+    // It does not make sense to use this utility if we have no chance to use
+    // 128-bit move.
+    return count;
+  }
+  GenDalvikArgsFlushPromoted(info, first);
+
+  // The rest can be copied together
+  int current_src_offset = SRegOffset(info->args[first].s_reg_low);
+  int current_dest_offset = StackVisitor::GetOutVROffset(first, cu_->instruction_set);
+
+  // Only davik regs are accessed in this loop; no next_call_insn() calls.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+  while (count > 0) {
+    // This is based on the knowledge that the stack itself is 16-byte aligned.
+    bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
+    bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
+    size_t bytes_to_move;
+
+    /*
+     * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
+     * a 128-bit move because we won't get the chance to try to aligned. If there are more than
+     * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
+     * We do this because we could potentially do a smaller move to align.
+     */
+    if (count == 4 || (count > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
+      // Moving 128-bits via xmm register.
+      bytes_to_move = sizeof(uint32_t) * 4;
+
+      // Allocate a free xmm temp. Since we are working through the calling sequence,
+      // we expect to have an xmm temporary available. AllocTempDouble will abort if
+      // there are no free registers.
+      RegStorage temp = AllocTempDouble();
+
+      LIR* ld1 = nullptr;
+      LIR* ld2 = nullptr;
+      LIR* st1 = nullptr;
+      LIR* st2 = nullptr;
+
+      /*
+       * The logic is similar for both loads and stores. If we have 16-byte alignment,
+       * do an aligned move. If we have 8-byte alignment, then do the move in two
+       * parts. This approach prevents possible cache line splits. Finally, fall back
+       * to doing an unaligned move. In most cases we likely won't split the cache
+       * line but we cannot prove it and thus take a conservative approach.
+       */
+      bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
+      bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
+
+      if (src_is_16b_aligned) {
+        ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovA128FP);
+      } else if (src_is_8b_aligned) {
+        ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovLo128FP);
+        ld2 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset + (bytes_to_move >> 1),
+                          kMovHi128FP);
+      } else {
+        ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovU128FP);
+      }
+
+      if (dest_is_16b_aligned) {
+        st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovA128FP);
+      } else if (dest_is_8b_aligned) {
+        st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovLo128FP);
+        st2 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset + (bytes_to_move >> 1),
+                          temp, kMovHi128FP);
+      } else {
+        st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovU128FP);
+      }
+
+      // TODO If we could keep track of aliasing information for memory accesses that are wider
+      // than 64-bit, we wouldn't need to set up a barrier.
+      if (ld1 != nullptr) {
+        if (ld2 != nullptr) {
+          // For 64-bit load we can actually set up the aliasing information.
+          AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
+          AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true,
+                                  true);
+        } else {
+          // Set barrier for 128-bit load.
+          ld1->u.m.def_mask = &kEncodeAll;
+        }
+      }
+      if (st1 != nullptr) {
+        if (st2 != nullptr) {
+          // For 64-bit store we can actually set up the aliasing information.
+          AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
+          AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false,
+                                  true);
+        } else {
+          // Set barrier for 128-bit store.
+          st1->u.m.def_mask = &kEncodeAll;
+        }
+      }
+
+      // Free the temporary used for the data movement.
+      FreeTemp(temp);
+    } else {
+      // Moving 32-bits via general purpose register.
+      bytes_to_move = sizeof(uint32_t);
+
+      // Instead of allocating a new temp, simply reuse one of the registers being used
+      // for argument passing.
+      RegStorage temp = TargetReg(kArg3, kNotWide);
+
+      // Now load the argument VR and store to the outs.
+      Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
+      Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
+    }
+
+    current_src_offset += bytes_to_move;
+    current_dest_offset += bytes_to_move;
+    count -= (bytes_to_move >> 2);
+  }
+  DCHECK_EQ(count, 0);
+  return count;
+}
+
 }  // namespace art
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 0f7001fc68..5f86f1e047 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -8,4 +8,5 @@ b/18380491
 invoke-super abstract
 BadCaseInOpRegRegReg
 CmpLong
+FloatIntConstPassing
 Done!
diff --git a/test/800-smali/smali/FloatIntConstPassing.smali b/test/800-smali/smali/FloatIntConstPassing.smali
new file mode 100644
index 0000000000..a2916c5dbb
--- /dev/null
+++ b/test/800-smali/smali/FloatIntConstPassing.smali
@@ -0,0 +1,29 @@
+.class public LFloatIntConstPassing;
+
+.super Ljava/lang/Object;
+
+.method public static getInt(I)I
+  .registers 2
+  const/4 v0, 1
+  add-int/2addr v0, p0
+  return v0
+.end method
+
+.method public static getFloat(F)F
+  .registers 2
+  const/4 v0, 0
+  mul-float/2addr v0, p0
+  return v0
+.end method
+
+.method public static run()I
+  .registers 3
+  const/4 v0, 1
+  invoke-static {v0}, LFloatIntConstPassing;->getInt(I)I
+  move-result v1
+  invoke-static {v0}, LFloatIntConstPassing;->getFloat(F)F
+  move-result v2
+  float-to-int v2, v2
+  add-int/2addr v1, v2
+  return v1
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index f2c1ab57e7..a2db05135d 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -64,6 +64,7 @@ public class Main {
             new Object[]{0}, new AbstractMethodError(), null));
         testCases.add(new TestCase("BadCaseInOpRegRegReg", "BadCaseInOpRegRegReg", "getInt", null, null, 2));
         testCases.add(new TestCase("CmpLong", "CmpLong", "run", null, null, 0));
+        testCases.add(new TestCase("FloatIntConstPassing", "FloatIntConstPassing", "run", null, null, 2));
     }
 
     public void runTests() {
author	Vladimir Marko <vmarko@google.com>	2014-12-08 18:38:42 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	2014-12-08 18:38:43 +0000
commit	6c964c98400b8c0949d5e369968da2d4809b772f (patch)
tree	82c1893c0dbbd5a9b849b9c236fc775b4d20f3cc
parent	c4925d4c02dc8f8d51cb2653b5e7a99f6c9fd7d7 (diff)
parent	717a3e447c6f7a922cf9c3efe522747a187a045d (diff)