Add byte swap instructions for ARM and x86.

Change-Id: I03fdd61ffc811ae521141f532b3e04dda566c77d
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index d184673..2ff7f1c 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -296,6 +296,8 @@
   kThumbOrr,         // orr   [0100001100] rm[5..3] rd[2..0].
   kThumbPop,         // pop   [1011110] r[8..8] rl[7..0].
   kThumbPush,        // push  [1011010] r[8..8] rl[7..0].
+  kThumbRev,         // rev   [1011101000] rm[5..3] rd[2..0]
+  kThumbRevsh,       // revsh   [1011101011] rm[5..3] rd[2..0]
   kThumbRorRR,       // ror   [0100000111] rs[5..3] rd[2..0].
   kThumbSbc,         // sbc   [0100000110] rm[5..3] rd[2..0].
   kThumbStmia,       // stmia   [11000] rn[10..8] reglist [7.. 0].
@@ -399,6 +401,8 @@
   kThumb2AdcRRI8,    // adc [111100010101] rn[19..16] [0] imm3 rd[11..8] imm8.
   kThumb2SubRRI8,    // sub [111100011011] rn[19..16] [0] imm3 rd[11..8] imm8.
   kThumb2SbcRRI8,    // sbc [111100010111] rn[19..16] [0] imm3 rd[11..8] imm8.
+  kThumb2RevRR,      // rev [111110101001] rm[19..16] [1111] rd[11..8] 1000 rm[3..0]
+  kThumb2RevshRR,    // rev [111110101001] rm[19..16] [1111] rd[11..8] 1011 rm[3..0]
   kThumb2It,         // it [10111111] firstcond[7-4] mask[3-0].
   kThumb2Fmstat,     // fmstat [11101110111100011111101000010000].
   kThumb2Vcmpd,      // vcmp [111011101] D [11011] rd[15-12] [1011] E [1] M [0] rm[3-0].
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 2a6e656..e8c188c 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -327,6 +327,16 @@
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
                  | IS_STORE, "push", "<!0R>", 2, kFixupNone),
+    ENCODING_MAP(kThumbRev,           0xba00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE1,
+                 "rev", "!0C, !1C", 2, kFixupNone),
+    ENCODING_MAP(kThumbRevsh,         0xbac0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE1,
+                 "rev", "!0C, !1C", 2, kFixupNone),
     ENCODING_MAP(kThumbRorRR,        0x41c0,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
@@ -768,6 +778,16 @@
                  kFmtUnused, -1, -1,
                  IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
                  "sbcs", "!0C, !1C, #!2m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2RevRR, 0xfa90f080,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE12,  // Binary, but rm is stored twice.
+                 "rev", "!0C, !1C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2RevshRR, 0xfa90f0b0,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE12,  // Binary, but rm is stored twice.
+                 "revsh", "!0C, !1C", 4, kFixupNone),
     ENCODING_MAP(kThumb2It,  0xbf00,
                  kFmtBitBlt, 7, 4, kFmtBitBlt, 3, 0, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_BINARY_OP | IS_IT | USES_CCODES,
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index a2ac6ef..3ceeacf 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -314,6 +314,22 @@
     case kOpSub:
       opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR;
       break;
+    case kOpRev:
+      DCHECK_EQ(shift, 0);
+      if (!thumb_form) {
+        // Binary, but rm is encoded twice.
+        return NewLIR3(kThumb2RevRR, r_dest_src1, r_src2, r_src2);
+      }
+      opcode = kThumbRev;
+      break;
+    case kOpRevsh:
+      DCHECK_EQ(shift, 0);
+      if (!thumb_form) {
+        // Binary, but rm is encoded twice.
+        return NewLIR3(kThumb2RevshRR, r_dest_src1, r_src2, r_src2);
+      }
+      opcode = kThumbRevsh;
+      break;
     case kOp2Byte:
       DCHECK_EQ(shift, 0);
       return NewLIR4(kThumb2Sbfx, r_dest_src1, r_src2, 0, 8);
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 92d58d5..2047f30 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -246,6 +246,8 @@
   UNARY_ENCODING_MAP(Idivmod, 0x7, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
 #undef UNARY_ENCODING_MAP
 
+  { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0, { 0, 0, 0x0F, 0xC8, 0, 0, 0, 0 }, "Bswap32R", "!0r" },
+
 #define EXT_0F_ENCODING_MAP(opname, prefix, opcode, reg_def) \
 { kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE01,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
 { kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
@@ -371,6 +373,8 @@
       return lir->operands[0];  // length of nop is sole operand
     case kNullary:
       return 1;  // 1 byte of opcode
+    case kRegOpcode:  // lir operands - 0: reg
+      return ComputeSize(entry, 0, 0, false) - 1;  // substract 1 for modrm
     case kReg:  // lir operands - 0: reg
       return ComputeSize(entry, 0, 0, false);
     case kMem:  // lir operands - 0: base, 1: disp
@@ -514,6 +518,33 @@
   }
 }
 
+void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg) {
+  if (entry->skeleton.prefix1 != 0) {
+    code_buffer_.push_back(entry->skeleton.prefix1);
+    if (entry->skeleton.prefix2 != 0) {
+      code_buffer_.push_back(entry->skeleton.prefix2);
+    }
+  } else {
+    DCHECK_EQ(0, entry->skeleton.prefix2);
+  }
+  code_buffer_.push_back(entry->skeleton.opcode);
+  if (entry->skeleton.opcode == 0x0F) {
+    code_buffer_.push_back(entry->skeleton.extra_opcode1);
+    // There's no 3-byte instruction with +rd
+    DCHECK_NE(0x38, entry->skeleton.extra_opcode1);
+    DCHECK_NE(0x3A, entry->skeleton.extra_opcode1);
+    DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  } else {
+    DCHECK_EQ(0, entry->skeleton.extra_opcode1);
+    DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  }
+  DCHECK(!X86_FPREG(reg));
+  DCHECK_LT(reg, 8);
+  code_buffer_.back() += reg;
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+}
+
 void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) {
   if (entry->skeleton.prefix1 != 0) {
     code_buffer_.push_back(entry->skeleton.prefix1);
@@ -1303,6 +1334,9 @@
         DCHECK_EQ(0, entry->skeleton.ax_opcode);
         DCHECK_EQ(0, entry->skeleton.immediate_bytes);
         break;
+      case kRegOpcode:  // lir operands - 0: reg
+        EmitOpRegOpcode(entry, lir->operands[0]);
+        break;
       case kReg:  // lir operands - 0: reg
         EmitOpReg(entry, lir->operands[0]);
         break;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index b1d95ff..b28d7ef 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -170,6 +170,7 @@
 
   private:
     void EmitDisp(int base, int disp);
+    void EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg);
     void EmitOpReg(const X86EncodingMap* entry, uint8_t reg);
     void EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp);
     void EmitMemReg(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg);
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index c519bfe..6ec7ebb 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -117,6 +117,7 @@
   switch (op) {
     case kOpNeg: opcode = kX86Neg32R; break;
     case kOpNot: opcode = kX86Not32R; break;
+    case kOpRev: opcode = kX86Bswap32R; break;
     case kOpBlx: opcode = kX86CallR; break;
     default:
       LOG(FATAL) << "Bad case in OpReg " << op;
@@ -161,6 +162,13 @@
       case kOpNeg:
         OpRegCopy(r_dest_src1, r_src2);
         return OpReg(kOpNeg, r_dest_src1);
+      case kOpRev:
+        OpRegCopy(r_dest_src1, r_src2);
+        return OpReg(kOpRev, r_dest_src1);
+      case kOpRevsh:
+        OpRegCopy(r_dest_src1, r_src2);
+        OpReg(kOpRev, r_dest_src1);
+        return OpRegImm(kOpAsr, r_dest_src1, 16);
         // X86 binary opcodes
       case kOpSub: opcode = kX86Sub32RR; break;
       case kOpSbc: opcode = kX86Sbb32RR; break;
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index f1b91ca..3518131 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -313,6 +313,7 @@
   UnaryOpcode(kX86Imul, DaR, DaM, DaA),
   UnaryOpcode(kX86Divmod,  DaR, DaM, DaA),
   UnaryOpcode(kX86Idivmod, DaR, DaM, DaA),
+  kX86Bswap32R,
 #undef UnaryOpcode
 #define Binary0fOpCode(opcode) \
   opcode ## RR, opcode ## RM, opcode ## RA
@@ -381,6 +382,7 @@
   kData,                                   // Special case for raw data.
   kNop,                                    // Special case for variable length nop.
   kNullary,                                // Opcode that takes no arguments.
+  kRegOpcode,                              // Shorter form of R instruction kind (opcode+rd)
   kReg, kMem, kArray,                      // R, M and A instruction kinds.
   kMemReg, kArrayReg, kThreadReg,          // MR, AR and TR instruction kinds.
   kRegReg, kRegMem, kRegArray, kRegThread,  // RR, RM, RA and RT instruction kinds.