Rewrite use/def masks to support 128 bits.

Reduce LIR memory usage by holding masks by pointers in the
LIR rather than directly and using pre-defined const masks
for the common cases, allocating very few on the arena.

Change-Id: I0f6d27ef6867acd157184c8c74f9612cebfe6c16
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index a050a05..4a77df2 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -794,34 +794,61 @@
     RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX);
     LoadValueDirectWideFixed(rl_src_expected, r_tmp1);
     LoadValueDirectWideFixed(rl_src_new_value, r_tmp2);
-    NewLIR1(kX86Push32R, rs_rDI.GetReg());
-    MarkTemp(rs_rDI);
-    LockTemp(rs_rDI);
-    NewLIR1(kX86Push32R, rs_rSI.GetReg());
-    MarkTemp(rs_rSI);
-    LockTemp(rs_rSI);
-    const int push_offset = 4 /* push edi */ + 4 /* push esi */;
-    int srcObjSp = IsInReg(this, rl_src_obj, rs_rSI) ? 0
-                : (IsInReg(this, rl_src_obj, rs_rDI) ? 4
-                : (SRegOffset(rl_src_obj.s_reg_low) + push_offset));
     // FIXME: needs 64-bit update.
-    LoadWordDisp(TargetReg(kSp), srcObjSp, rs_rDI);
-    int srcOffsetSp = IsInReg(this, rl_src_offset, rs_rSI) ? 0
-                   : (IsInReg(this, rl_src_offset, rs_rDI) ? 4
-                   : (SRegOffset(rl_src_offset.s_reg_low) + push_offset));
-    LoadWordDisp(TargetReg(kSp), srcOffsetSp, rs_rSI);
-    NewLIR4(kX86LockCmpxchg64A, rs_rDI.GetReg(), rs_rSI.GetReg(), 0, 0);
+    const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI);
+    const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI);
+    DCHECK(!obj_in_si || !obj_in_di);
+    const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI);
+    const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI);
+    DCHECK(!off_in_si || !off_in_di);
+    // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg.
+    RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI;
+    RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI;
+    bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI);
+    bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI);
+    if (push_di) {
+      NewLIR1(kX86Push32R, rs_rDI.GetReg());
+      MarkTemp(rs_rDI);
+      LockTemp(rs_rDI);
+    }
+    if (push_si) {
+      NewLIR1(kX86Push32R, rs_rSI.GetReg());
+      MarkTemp(rs_rSI);
+      LockTemp(rs_rSI);
+    }
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+    const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
+    if (!obj_in_si && !obj_in_di) {
+      LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj);
+      // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
+      DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
+      int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
+      AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
+    }
+    if (!off_in_si && !off_in_di) {
+      LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off);
+      // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it.
+      DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info));
+      int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u;
+      AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false);
+    }
+    NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0);
 
     // After a store we need to insert barrier in case of potential load. Since the
     // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
     GenMemBarrier(kStoreLoad);
 
-    FreeTemp(rs_rSI);
-    UnmarkTemp(rs_rSI);
-    NewLIR1(kX86Pop32R, rs_rSI.GetReg());
-    FreeTemp(rs_rDI);
-    UnmarkTemp(rs_rDI);
-    NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+
+    if (push_si) {
+      FreeTemp(rs_rSI);
+      UnmarkTemp(rs_rSI);
+      NewLIR1(kX86Pop32R, rs_rSI.GetReg());
+    }
+    if (push_di) {
+      FreeTemp(rs_rDI);
+      UnmarkTemp(rs_rDI);
+      NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+    }
     FreeCallTemps();
   } else {
     // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX.
@@ -885,11 +912,11 @@
   // We don't know the proper offset for the value, so pick one that will force
   // 4 byte offset.  We will fix this up in the assembler later to have the right
   // value.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256,
                     0, 0, target);
   res->target = target;
   res->flags.fixup = kFixupLoad;
-  SetMemRefType(res, true, kLiteral);
   store_method_addr_used_ = true;
   return res;
 }
@@ -1077,6 +1104,9 @@
 }
 
 void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) {
+  // All memory accesses below reference dalvik regs.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+
   LIR *m;
   switch (val) {
     case 0:
@@ -1095,6 +1125,9 @@
 
 void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
+  // All memory accesses below reference dalvik regs.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+
   if (Gen64Bit()) {
     if (rl_src1.is_const) {
       std::swap(rl_src1, rl_src2);
@@ -1346,6 +1379,7 @@
   int r_base = TargetReg(kSp).GetReg();
   int displacement = SRegOffset(rl_src.s_reg_low);
 
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   LIR *lir = NewLIR3(x86op, Gen64Bit() ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET);
   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
                           true /* is_load */, true /* is64bit */);
@@ -1379,6 +1413,7 @@
   int r_base = TargetReg(kSp).GetReg();
   int displacement = SRegOffset(rl_dest.s_reg_low);
 
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
   LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
                      Gen64Bit() ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
@@ -2061,6 +2096,7 @@
       int r_base = TargetReg(kSp).GetReg();
       int displacement = SRegOffset(rl_dest.s_reg_low);
 
+      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
       X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
       LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val);
       AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
@@ -2091,6 +2127,7 @@
     int r_base = TargetReg(kSp).GetReg();
     int displacement = SRegOffset(rl_dest.s_reg_low);
 
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     if (!IsNoOp(op, val_lo)) {
       X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
       LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo);
@@ -2469,6 +2506,9 @@
     return;
   }
 
+  // If we generate any memory access below, it will reference a dalvik reg.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+
   if (unary) {
     rl_lhs = LoadValue(rl_lhs, kCoreReg);
     rl_result = UpdateLocTyped(rl_dest, kCoreReg);
@@ -2620,6 +2660,7 @@
     NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
   } else {
     int displacement = SRegOffset(rl_src.s_reg_low);
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(),
                      displacement + LOWORD_OFFSET);
     AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
@@ -2670,6 +2711,7 @@
     rl_result = UpdateLocWideTyped(rl_dest, kCoreReg);
     if (rl_result.location != kLocPhysReg) {
       // Okay, we can do this into memory
+      ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
       OpMemReg(op, rl_result, t_reg.GetReg());
     } else if (!rl_result.reg.IsFloat()) {
       // Can do this directly into the result register