Thumb2 assembler for JNI compiler and optimizing compiler

This provides a programmatic assembler for the thumb2 instruction set for
ARM.  The interface is the same as the ARM assembler and the ARM assembler has
been moved into Arm32Assembler.  The assembler handles most 16 and 32 bit instructions
and also allows relocations due to branch expansion.  It will also rewrite cbz/cbnz
instructions if they go out of range.

It also changes the JNI compiler to use the thumb2 assembler as opposed
to forcing it to use ARM32.  The trampoline compiler still uses ARM due to the
way it returns the address of its generated code.  A trampoline in thumb2 is the
same size as that in ARM anyway (8 bytes).

Provides gtest for testing the thumb2 instruction output.  This gtest only runs
on the host as it uses arm-eabi-objdump to disassemble the generated code.  On the
target the output is not checked but the assembler will still be run to perform
all its checks.

Change-Id: Icd9742b6f13541bec5b23097896727392e3a6fb6
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 64685c1..b607a1d 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -25,66 +25,16 @@
 namespace art {
 namespace arm {
 
-// Instruction encoding bits.
-enum {
-  H   = 1 << 5,   // halfword (or byte)
-  L   = 1 << 20,  // load (or store)
-  S   = 1 << 20,  // set condition code (or leave unchanged)
-  W   = 1 << 21,  // writeback base register (or leave unchanged)
-  A   = 1 << 21,  // accumulate in multiply instruction (or not)
-  B   = 1 << 22,  // unsigned byte (or word)
-  N   = 1 << 22,  // long (or short)
-  U   = 1 << 23,  // positive (or negative) offset/index
-  P   = 1 << 24,  // offset/pre-indexed addressing (or post-indexed addressing)
-  I   = 1 << 25,  // immediate shifter operand (or not)
-
-  B0 = 1,
-  B1 = 1 << 1,
-  B2 = 1 << 2,
-  B3 = 1 << 3,
-  B4 = 1 << 4,
-  B5 = 1 << 5,
-  B6 = 1 << 6,
-  B7 = 1 << 7,
-  B8 = 1 << 8,
-  B9 = 1 << 9,
-  B10 = 1 << 10,
-  B11 = 1 << 11,
-  B12 = 1 << 12,
-  B16 = 1 << 16,
-  B17 = 1 << 17,
-  B18 = 1 << 18,
-  B19 = 1 << 19,
-  B20 = 1 << 20,
-  B21 = 1 << 21,
-  B22 = 1 << 22,
-  B23 = 1 << 23,
-  B24 = 1 << 24,
-  B25 = 1 << 25,
-  B26 = 1 << 26,
-  B27 = 1 << 27,
-
-  // Instruction bit masks.
-  RdMask = 15 << 12,  // in str instruction
-  CondMask = 15 << 28,
-  CoprocessorMask = 15 << 8,
-  OpCodeMask = 15 << 21,  // in data-processing instructions
-  Imm24Mask = (1 << 24) - 1,
-  Off12Mask = (1 << 12) - 1,
-
-  // ldrex/strex register field encodings.
-  kLdExRnShift = 16,
-  kLdExRtShift = 12,
-  kStrExRnShift = 16,
-  kStrExRdShift = 12,
-  kStrExRtShift = 0,
-};
-
-
-static const char* kRegisterNames[] = {
+const char* kRegisterNames[] = {
   "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
   "fp", "ip", "sp", "lr", "pc"
 };
+
+const char* kConditionNames[] = {
+  "EQ", "NE", "CS", "CC", "MI", "PL", "VS", "VC", "HI", "LS", "GE", "LT", "GT",
+  "LE", "AL",
+};
+
 std::ostream& operator<<(std::ostream& os, const Register& rhs) {
   if (rhs >= R0 && rhs <= PC) {
     os << kRegisterNames[rhs];
@@ -114,11 +64,6 @@
   return os;
 }
 
-
-static const char* kConditionNames[] = {
-  "EQ", "NE", "CS", "CC", "MI", "PL", "VS", "VC", "HI", "LS", "GE", "LT", "GT",
-  "LE", "AL",
-};
 std::ostream& operator<<(std::ostream& os, const Condition& rhs) {
   if (rhs >= EQ && rhs <= AL) {
     os << kConditionNames[rhs];
@@ -128,1084 +73,218 @@
   return os;
 }
 
-void ArmAssembler::Emit(int32_t value) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  buffer_.Emit<int32_t>(value);
-}
 
 
-void ArmAssembler::EmitType01(Condition cond,
-                              int type,
-                              Opcode opcode,
-                              int set_cc,
-                              Register rn,
-                              Register rd,
-                              ShifterOperand so) {
-  CHECK_NE(rd, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
-                     type << kTypeShift |
-                     static_cast<int32_t>(opcode) << kOpcodeShift |
-                     set_cc << kSShift |
-                     static_cast<int32_t>(rn) << kRnShift |
-                     static_cast<int32_t>(rd) << kRdShift |
-                     so.encoding();
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitType5(Condition cond, int offset, bool link) {
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
-                     5 << kTypeShift |
-                     (link ? 1 : 0) << kLinkShift;
-  Emit(ArmAssembler::EncodeBranchOffset(offset, encoding));
-}
-
-
-void ArmAssembler::EmitMemOp(Condition cond,
-                             bool load,
-                             bool byte,
-                             Register rd,
-                             Address ad) {
-  CHECK_NE(rd, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B26 |
-                     (load ? L : 0) |
-                     (byte ? B : 0) |
-                     (static_cast<int32_t>(rd) << kRdShift) |
-                     ad.encoding();
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitMemOpAddressMode3(Condition cond,
-                                         int32_t mode,
-                                         Register rd,
-                                         Address ad) {
-  CHECK_NE(rd, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B22  |
-                     mode |
-                     (static_cast<int32_t>(rd) << kRdShift) |
-                     ad.encoding3();
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitMultiMemOp(Condition cond,
-                                  BlockAddressMode am,
-                                  bool load,
-                                  Register base,
-                                  RegList regs) {
-  CHECK_NE(base, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 |
-                     am |
-                     (load ? L : 0) |
-                     (static_cast<int32_t>(base) << kRnShift) |
-                     regs;
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitShiftImmediate(Condition cond,
-                                      Shift opcode,
-                                      Register rd,
-                                      Register rm,
-                                      ShifterOperand so) {
-  CHECK_NE(cond, kNoCondition);
-  CHECK_EQ(so.type(), 1U);
-  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
-                     static_cast<int32_t>(MOV) << kOpcodeShift |
-                     static_cast<int32_t>(rd) << kRdShift |
-                     so.encoding() << kShiftImmShift |
-                     static_cast<int32_t>(opcode) << kShiftShift |
-                     static_cast<int32_t>(rm);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitShiftRegister(Condition cond,
-                                     Shift opcode,
-                                     Register rd,
-                                     Register rm,
-                                     ShifterOperand so) {
-  CHECK_NE(cond, kNoCondition);
-  CHECK_EQ(so.type(), 0U);
-  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
-                     static_cast<int32_t>(MOV) << kOpcodeShift |
-                     static_cast<int32_t>(rd) << kRdShift |
-                     so.encoding() << kShiftRegisterShift |
-                     static_cast<int32_t>(opcode) << kShiftShift |
-                     B4 |
-                     static_cast<int32_t>(rm);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitBranch(Condition cond, Label* label, bool link) {
-  if (label->IsBound()) {
-    EmitType5(cond, label->Position() - buffer_.Size(), link);
-  } else {
-    int position = buffer_.Size();
-    // Use the offset field of the branch instruction for linking the sites.
-    EmitType5(cond, label->position_, link);
-    label->LinkTo(position);
-  }
-}
-
-void ArmAssembler::and_(Register rd, Register rn, ShifterOperand so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), AND, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::eor(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), EOR, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::sub(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), SUB, 0, rn, rd, so);
-}
-
-void ArmAssembler::rsb(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), RSB, 0, rn, rd, so);
-}
-
-void ArmAssembler::rsbs(Register rd, Register rn, ShifterOperand so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), RSB, 1, rn, rd, so);
-}
-
-
-void ArmAssembler::add(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), ADD, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::adds(Register rd, Register rn, ShifterOperand so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), ADD, 1, rn, rd, so);
-}
-
-
-void ArmAssembler::subs(Register rd, Register rn, ShifterOperand so,
-                        Condition cond) {
-  EmitType01(cond, so.type(), SUB, 1, rn, rd, so);
-}
-
-
-void ArmAssembler::adc(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), ADC, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::sbc(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), SBC, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::rsc(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), RSC, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::tst(Register rn, ShifterOperand so, Condition cond) {
-  CHECK_NE(rn, PC);  // Reserve tst pc instruction for exception handler marker.
-  EmitType01(cond, so.type(), TST, 1, rn, R0, so);
-}
-
-
-void ArmAssembler::teq(Register rn, ShifterOperand so, Condition cond) {
-  CHECK_NE(rn, PC);  // Reserve teq pc instruction for exception handler marker.
-  EmitType01(cond, so.type(), TEQ, 1, rn, R0, so);
-}
-
-
-void ArmAssembler::cmp(Register rn, ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), CMP, 1, rn, R0, so);
-}
-
-
-void ArmAssembler::cmn(Register rn, ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), CMN, 1, rn, R0, so);
-}
-
-
-void ArmAssembler::orr(Register rd, Register rn,
-                    ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), ORR, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::orrs(Register rd, Register rn,
-                        ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), ORR, 1, rn, rd, so);
-}
-
-
-void ArmAssembler::mov(Register rd, ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), MOV, 0, R0, rd, so);
-}
-
-
-void ArmAssembler::movs(Register rd, ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), MOV, 1, R0, rd, so);
-}
-
-
-void ArmAssembler::bic(Register rd, Register rn, ShifterOperand so,
-                       Condition cond) {
-  EmitType01(cond, so.type(), BIC, 0, rn, rd, so);
-}
-
-
-void ArmAssembler::mvn(Register rd, ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), MVN, 0, R0, rd, so);
-}
-
-
-void ArmAssembler::mvns(Register rd, ShifterOperand so, Condition cond) {
-  EmitType01(cond, so.type(), MVN, 1, R0, rd, so);
-}
-
-
-void ArmAssembler::clz(Register rd, Register rm, Condition cond) {
-  CHECK_NE(rd, kNoRegister);
-  CHECK_NE(rm, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  CHECK_NE(rd, PC);
-  CHECK_NE(rm, PC);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B24 | B22 | B21 | (0xf << 16) |
-                     (static_cast<int32_t>(rd) << kRdShift) |
-                     (0xf << 8) | B4 | static_cast<int32_t>(rm);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::movw(Register rd, uint16_t imm16, Condition cond) {
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
-                     B25 | B24 | ((imm16 >> 12) << 16) |
-                     static_cast<int32_t>(rd) << kRdShift | (imm16 & 0xfff);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::movt(Register rd, uint16_t imm16, Condition cond) {
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
-                     B25 | B24 | B22 | ((imm16 >> 12) << 16) |
-                     static_cast<int32_t>(rd) << kRdShift | (imm16 & 0xfff);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitMulOp(Condition cond, int32_t opcode,
-                             Register rd, Register rn,
-                             Register rm, Register rs) {
-  CHECK_NE(rd, kNoRegister);
-  CHECK_NE(rn, kNoRegister);
-  CHECK_NE(rm, kNoRegister);
-  CHECK_NE(rs, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = opcode |
-      (static_cast<int32_t>(cond) << kConditionShift) |
-      (static_cast<int32_t>(rn) << kRnShift) |
-      (static_cast<int32_t>(rd) << kRdShift) |
-      (static_cast<int32_t>(rs) << kRsShift) |
-      B7 | B4 |
-      (static_cast<int32_t>(rm) << kRmShift);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::mul(Register rd, Register rn, Register rm, Condition cond) {
-  // Assembler registers rd, rn, rm are encoded as rn, rm, rs.
-  EmitMulOp(cond, 0, R0, rd, rn, rm);
-}
-
-
-void ArmAssembler::mla(Register rd, Register rn, Register rm, Register ra,
-                       Condition cond) {
-  // Assembler registers rd, rn, rm, ra are encoded as rn, rm, rs, rd.
-  EmitMulOp(cond, B21, ra, rd, rn, rm);
-}
-
-
-void ArmAssembler::mls(Register rd, Register rn, Register rm, Register ra,
-                       Condition cond) {
-  // Assembler registers rd, rn, rm, ra are encoded as rn, rm, rs, rd.
-  EmitMulOp(cond, B22 | B21, ra, rd, rn, rm);
-}
-
-
-void ArmAssembler::umull(Register rd_lo, Register rd_hi, Register rn,
-                         Register rm, Condition cond) {
-  // Assembler registers rd_lo, rd_hi, rn, rm are encoded as rd, rn, rm, rs.
-  EmitMulOp(cond, B23, rd_lo, rd_hi, rn, rm);
-}
-
-
-void ArmAssembler::ldr(Register rd, Address ad, Condition cond) {
-  EmitMemOp(cond, true, false, rd, ad);
-}
-
-
-void ArmAssembler::str(Register rd, Address ad, Condition cond) {
-  EmitMemOp(cond, false, false, rd, ad);
-}
-
-
-void ArmAssembler::ldrb(Register rd, Address ad, Condition cond) {
-  EmitMemOp(cond, true, true, rd, ad);
-}
-
-
-void ArmAssembler::strb(Register rd, Address ad, Condition cond) {
-  EmitMemOp(cond, false, true, rd, ad);
-}
-
-
-void ArmAssembler::ldrh(Register rd, Address ad, Condition cond) {
-  EmitMemOpAddressMode3(cond, L | B7 | H | B4, rd, ad);
-}
-
-
-void ArmAssembler::strh(Register rd, Address ad, Condition cond) {
-  EmitMemOpAddressMode3(cond, B7 | H | B4, rd, ad);
-}
-
-
-void ArmAssembler::ldrsb(Register rd, Address ad, Condition cond) {
-  EmitMemOpAddressMode3(cond, L | B7 | B6 | B4, rd, ad);
-}
-
-
-void ArmAssembler::ldrsh(Register rd, Address ad, Condition cond) {
-  EmitMemOpAddressMode3(cond, L | B7 | B6 | H | B4, rd, ad);
-}
-
-
-void ArmAssembler::ldrd(Register rd, Address ad, Condition cond) {
-  CHECK_EQ(rd % 2, 0);
-  EmitMemOpAddressMode3(cond, B7 | B6 | B4, rd, ad);
-}
-
-
-void ArmAssembler::strd(Register rd, Address ad, Condition cond) {
-  CHECK_EQ(rd % 2, 0);
-  EmitMemOpAddressMode3(cond, B7 | B6 | B5 | B4, rd, ad);
-}
-
-
-void ArmAssembler::ldm(BlockAddressMode am,
-                       Register base,
-                       RegList regs,
-                       Condition cond) {
-  EmitMultiMemOp(cond, am, true, base, regs);
-}
-
-
-void ArmAssembler::stm(BlockAddressMode am,
-                       Register base,
-                       RegList regs,
-                       Condition cond) {
-  EmitMultiMemOp(cond, am, false, base, regs);
-}
-
-
-void ArmAssembler::ldrex(Register rt, Register rn, Condition cond) {
-  CHECK_NE(rn, kNoRegister);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B24 |
-                     B23 |
-                     L   |
-                     (static_cast<int32_t>(rn) << kLdExRnShift) |
-                     (static_cast<int32_t>(rt) << kLdExRtShift) |
-                     B11 | B10 | B9 | B8 | B7 | B4 | B3 | B2 | B1 | B0;
-  Emit(encoding);
-}
-
-
-void ArmAssembler::strex(Register rd,
-                         Register rt,
-                         Register rn,
-                         Condition cond) {
-  CHECK_NE(rn, kNoRegister);
-  CHECK_NE(rd, kNoRegister);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B24 |
-                     B23 |
-                     (static_cast<int32_t>(rn) << kStrExRnShift) |
-                     (static_cast<int32_t>(rd) << kStrExRdShift) |
-                     B11 | B10 | B9 | B8 | B7 | B4 |
-                     (static_cast<int32_t>(rt) << kStrExRtShift);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::clrex() {
-  int32_t encoding = (kSpecialCondition << kConditionShift) |
-                     B26 | B24 | B22 | B21 | B20 | (0xff << 12) | B4 | 0xf;
-  Emit(encoding);
-}
-
-
-void ArmAssembler::nop(Condition cond) {
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B25 | B24 | B21 | (0xf << 12);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vmovsr(SRegister sn, Register rt, Condition cond) {
-  CHECK_NE(sn, kNoSRegister);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(rt, SP);
-  CHECK_NE(rt, PC);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B25 |
-                     ((static_cast<int32_t>(sn) >> 1)*B16) |
-                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
-                     ((static_cast<int32_t>(sn) & 1)*B7) | B4;
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vmovrs(Register rt, SRegister sn, Condition cond) {
-  CHECK_NE(sn, kNoSRegister);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(rt, SP);
-  CHECK_NE(rt, PC);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B25 | B20 |
-                     ((static_cast<int32_t>(sn) >> 1)*B16) |
-                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
-                     ((static_cast<int32_t>(sn) & 1)*B7) | B4;
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vmovsrr(SRegister sm, Register rt, Register rt2,
-                           Condition cond) {
-  CHECK_NE(sm, kNoSRegister);
-  CHECK_NE(sm, S31);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(rt, SP);
-  CHECK_NE(rt, PC);
-  CHECK_NE(rt2, kNoRegister);
-  CHECK_NE(rt2, SP);
-  CHECK_NE(rt2, PC);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B22 |
-                     (static_cast<int32_t>(rt2)*B16) |
-                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
-                     ((static_cast<int32_t>(sm) & 1)*B5) | B4 |
-                     (static_cast<int32_t>(sm) >> 1);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vmovrrs(Register rt, Register rt2, SRegister sm,
-                           Condition cond) {
-  CHECK_NE(sm, kNoSRegister);
-  CHECK_NE(sm, S31);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(rt, SP);
-  CHECK_NE(rt, PC);
-  CHECK_NE(rt2, kNoRegister);
-  CHECK_NE(rt2, SP);
-  CHECK_NE(rt2, PC);
-  CHECK_NE(rt, rt2);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B22 | B20 |
-                     (static_cast<int32_t>(rt2)*B16) |
-                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
-                     ((static_cast<int32_t>(sm) & 1)*B5) | B4 |
-                     (static_cast<int32_t>(sm) >> 1);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vmovdrr(DRegister dm, Register rt, Register rt2,
-                           Condition cond) {
-  CHECK_NE(dm, kNoDRegister);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(rt, SP);
-  CHECK_NE(rt, PC);
-  CHECK_NE(rt2, kNoRegister);
-  CHECK_NE(rt2, SP);
-  CHECK_NE(rt2, PC);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B22 |
-                     (static_cast<int32_t>(rt2)*B16) |
-                     (static_cast<int32_t>(rt)*B12) | B11 | B9 | B8 |
-                     ((static_cast<int32_t>(dm) >> 4)*B5) | B4 |
-                     (static_cast<int32_t>(dm) & 0xf);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vmovrrd(Register rt, Register rt2, DRegister dm,
-                           Condition cond) {
-  CHECK_NE(dm, kNoDRegister);
-  CHECK_NE(rt, kNoRegister);
-  CHECK_NE(rt, SP);
-  CHECK_NE(rt, PC);
-  CHECK_NE(rt2, kNoRegister);
-  CHECK_NE(rt2, SP);
-  CHECK_NE(rt2, PC);
-  CHECK_NE(rt, rt2);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B22 | B20 |
-                     (static_cast<int32_t>(rt2)*B16) |
-                     (static_cast<int32_t>(rt)*B12) | B11 | B9 | B8 |
-                     ((static_cast<int32_t>(dm) >> 4)*B5) | B4 |
-                     (static_cast<int32_t>(dm) & 0xf);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vldrs(SRegister sd, Address ad, Condition cond) {
-  CHECK_NE(sd, kNoSRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B24 | B20 |
-                     ((static_cast<int32_t>(sd) & 1)*B22) |
-                     ((static_cast<int32_t>(sd) >> 1)*B12) |
-                     B11 | B9 | ad.vencoding();
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vstrs(SRegister sd, Address ad, Condition cond) {
-  CHECK_NE(static_cast<Register>(ad.encoding_ & (0xf << kRnShift)), PC);
-  CHECK_NE(sd, kNoSRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B24 |
-                     ((static_cast<int32_t>(sd) & 1)*B22) |
-                     ((static_cast<int32_t>(sd) >> 1)*B12) |
-                     B11 | B9 | ad.vencoding();
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vldrd(DRegister dd, Address ad, Condition cond) {
-  CHECK_NE(dd, kNoDRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B24 | B20 |
-                     ((static_cast<int32_t>(dd) >> 4)*B22) |
-                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
-                     B11 | B9 | B8 | ad.vencoding();
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vstrd(DRegister dd, Address ad, Condition cond) {
-  CHECK_NE(static_cast<Register>(ad.encoding_ & (0xf << kRnShift)), PC);
-  CHECK_NE(dd, kNoDRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B24 |
-                     ((static_cast<int32_t>(dd) >> 4)*B22) |
-                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
-                     B11 | B9 | B8 | ad.vencoding();
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitVFPsss(Condition cond, int32_t opcode,
-                              SRegister sd, SRegister sn, SRegister sm) {
-  CHECK_NE(sd, kNoSRegister);
-  CHECK_NE(sn, kNoSRegister);
-  CHECK_NE(sm, kNoSRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B25 | B11 | B9 | opcode |
-                     ((static_cast<int32_t>(sd) & 1)*B22) |
-                     ((static_cast<int32_t>(sn) >> 1)*B16) |
-                     ((static_cast<int32_t>(sd) >> 1)*B12) |
-                     ((static_cast<int32_t>(sn) & 1)*B7) |
-                     ((static_cast<int32_t>(sm) & 1)*B5) |
-                     (static_cast<int32_t>(sm) >> 1);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitVFPddd(Condition cond, int32_t opcode,
-                              DRegister dd, DRegister dn, DRegister dm) {
-  CHECK_NE(dd, kNoDRegister);
-  CHECK_NE(dn, kNoDRegister);
-  CHECK_NE(dm, kNoDRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B25 | B11 | B9 | B8 | opcode |
-                     ((static_cast<int32_t>(dd) >> 4)*B22) |
-                     ((static_cast<int32_t>(dn) & 0xf)*B16) |
-                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
-                     ((static_cast<int32_t>(dn) >> 4)*B7) |
-                     ((static_cast<int32_t>(dm) >> 4)*B5) |
-                     (static_cast<int32_t>(dm) & 0xf);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vmovs(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vmovd(DRegister dd, DRegister dm, Condition cond) {
-  EmitVFPddd(cond, B23 | B21 | B20 | B6, dd, D0, dm);
-}
-
-
-bool ArmAssembler::vmovs(SRegister sd, float s_imm, Condition cond) {
-  uint32_t imm32 = bit_cast<uint32_t, float>(s_imm);
-  if (((imm32 & ((1 << 19) - 1)) == 0) &&
-      ((((imm32 >> 25) & ((1 << 6) - 1)) == (1 << 5)) ||
-       (((imm32 >> 25) & ((1 << 6) - 1)) == ((1 << 5) -1)))) {
-    uint8_t imm8 = ((imm32 >> 31) << 7) | (((imm32 >> 29) & 1) << 6) |
-        ((imm32 >> 19) & ((1 << 6) -1));
-    EmitVFPsss(cond, B23 | B21 | B20 | ((imm8 >> 4)*B16) | (imm8 & 0xf),
-               sd, S0, S0);
-    return true;
-  }
-  return false;
-}
-
-
-bool ArmAssembler::vmovd(DRegister dd, double d_imm, Condition cond) {
-  uint64_t imm64 = bit_cast<uint64_t, double>(d_imm);
-  if (((imm64 & ((1LL << 48) - 1)) == 0) &&
-      ((((imm64 >> 54) & ((1 << 9) - 1)) == (1 << 8)) ||
-       (((imm64 >> 54) & ((1 << 9) - 1)) == ((1 << 8) -1)))) {
-    uint8_t imm8 = ((imm64 >> 63) << 7) | (((imm64 >> 61) & 1) << 6) |
-        ((imm64 >> 48) & ((1 << 6) -1));
-    EmitVFPddd(cond, B23 | B21 | B20 | ((imm8 >> 4)*B16) | B8 | (imm8 & 0xf),
-               dd, D0, D0);
-    return true;
-  }
-  return false;
-}
-
-
-void ArmAssembler::vadds(SRegister sd, SRegister sn, SRegister sm,
-                         Condition cond) {
-  EmitVFPsss(cond, B21 | B20, sd, sn, sm);
-}
-
-
-void ArmAssembler::vaddd(DRegister dd, DRegister dn, DRegister dm,
-                         Condition cond) {
-  EmitVFPddd(cond, B21 | B20, dd, dn, dm);
-}
-
-
-void ArmAssembler::vsubs(SRegister sd, SRegister sn, SRegister sm,
-                         Condition cond) {
-  EmitVFPsss(cond, B21 | B20 | B6, sd, sn, sm);
-}
-
-
-void ArmAssembler::vsubd(DRegister dd, DRegister dn, DRegister dm,
-                         Condition cond) {
-  EmitVFPddd(cond, B21 | B20 | B6, dd, dn, dm);
-}
-
-
-void ArmAssembler::vmuls(SRegister sd, SRegister sn, SRegister sm,
-                         Condition cond) {
-  EmitVFPsss(cond, B21, sd, sn, sm);
-}
-
-
-void ArmAssembler::vmuld(DRegister dd, DRegister dn, DRegister dm,
-                         Condition cond) {
-  EmitVFPddd(cond, B21, dd, dn, dm);
-}
-
-
-void ArmAssembler::vmlas(SRegister sd, SRegister sn, SRegister sm,
-                         Condition cond) {
-  EmitVFPsss(cond, 0, sd, sn, sm);
-}
-
-
-void ArmAssembler::vmlad(DRegister dd, DRegister dn, DRegister dm,
-                         Condition cond) {
-  EmitVFPddd(cond, 0, dd, dn, dm);
-}
-
-
-void ArmAssembler::vmlss(SRegister sd, SRegister sn, SRegister sm,
-                         Condition cond) {
-  EmitVFPsss(cond, B6, sd, sn, sm);
-}
-
-
-void ArmAssembler::vmlsd(DRegister dd, DRegister dn, DRegister dm,
-                         Condition cond) {
-  EmitVFPddd(cond, B6, dd, dn, dm);
-}
-
-
-void ArmAssembler::vdivs(SRegister sd, SRegister sn, SRegister sm,
-                         Condition cond) {
-  EmitVFPsss(cond, B23, sd, sn, sm);
-}
-
-
-void ArmAssembler::vdivd(DRegister dd, DRegister dn, DRegister dm,
-                         Condition cond) {
-  EmitVFPddd(cond, B23, dd, dn, dm);
-}
-
-
-void ArmAssembler::vabss(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B7 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vabsd(DRegister dd, DRegister dm, Condition cond) {
-  EmitVFPddd(cond, B23 | B21 | B20 | B7 | B6, dd, D0, dm);
-}
-
-
-void ArmAssembler::vnegs(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B16 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vnegd(DRegister dd, DRegister dm, Condition cond) {
-  EmitVFPddd(cond, B23 | B21 | B20 | B16 | B6, dd, D0, dm);
-}
-
-
-void ArmAssembler::vsqrts(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B16 | B7 | B6, sd, S0, sm);
-}
-
-void ArmAssembler::vsqrtd(DRegister dd, DRegister dm, Condition cond) {
-  EmitVFPddd(cond, B23 | B21 | B20 | B16 | B7 | B6, dd, D0, dm);
-}
-
-
-void ArmAssembler::EmitVFPsd(Condition cond, int32_t opcode,
-                             SRegister sd, DRegister dm) {
-  CHECK_NE(sd, kNoSRegister);
-  CHECK_NE(dm, kNoDRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B25 | B11 | B9 | opcode |
-                     ((static_cast<int32_t>(sd) & 1)*B22) |
-                     ((static_cast<int32_t>(sd) >> 1)*B12) |
-                     ((static_cast<int32_t>(dm) >> 4)*B5) |
-                     (static_cast<int32_t>(dm) & 0xf);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::EmitVFPds(Condition cond, int32_t opcode,
-                             DRegister dd, SRegister sm) {
-  CHECK_NE(dd, kNoDRegister);
-  CHECK_NE(sm, kNoSRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B25 | B11 | B9 | opcode |
-                     ((static_cast<int32_t>(dd) >> 4)*B22) |
-                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
-                     ((static_cast<int32_t>(sm) & 1)*B5) |
-                     (static_cast<int32_t>(sm) >> 1);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::vcvtsd(SRegister sd, DRegister dm, Condition cond) {
-  EmitVFPsd(cond, B23 | B21 | B20 | B18 | B17 | B16 | B8 | B7 | B6, sd, dm);
-}
-
-
-void ArmAssembler::vcvtds(DRegister dd, SRegister sm, Condition cond) {
-  EmitVFPds(cond, B23 | B21 | B20 | B18 | B17 | B16 | B7 | B6, dd, sm);
-}
-
-
-void ArmAssembler::vcvtis(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B18 | B16 | B7 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vcvtid(SRegister sd, DRegister dm, Condition cond) {
-  EmitVFPsd(cond, B23 | B21 | B20 | B19 | B18 | B16 | B8 | B7 | B6, sd, dm);
-}
-
-
-void ArmAssembler::vcvtsi(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B7 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vcvtdi(DRegister dd, SRegister sm, Condition cond) {
-  EmitVFPds(cond, B23 | B21 | B20 | B19 | B8 | B7 | B6, dd, sm);
-}
-
-
-void ArmAssembler::vcvtus(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B18 | B7 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vcvtud(SRegister sd, DRegister dm, Condition cond) {
-  EmitVFPsd(cond, B23 | B21 | B20 | B19 | B18 | B8 | B7 | B6, sd, dm);
-}
-
-
-void ArmAssembler::vcvtsu(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vcvtdu(DRegister dd, SRegister sm, Condition cond) {
-  EmitVFPds(cond, B23 | B21 | B20 | B19 | B8 | B6, dd, sm);
-}
-
-
-void ArmAssembler::vcmps(SRegister sd, SRegister sm, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B18 | B6, sd, S0, sm);
-}
-
-
-void ArmAssembler::vcmpd(DRegister dd, DRegister dm, Condition cond) {
-  EmitVFPddd(cond, B23 | B21 | B20 | B18 | B6, dd, D0, dm);
-}
-
-
-void ArmAssembler::vcmpsz(SRegister sd, Condition cond) {
-  EmitVFPsss(cond, B23 | B21 | B20 | B18 | B16 | B6, sd, S0, S0);
-}
-
-
-void ArmAssembler::vcmpdz(DRegister dd, Condition cond) {
-  EmitVFPddd(cond, B23 | B21 | B20 | B18 | B16 | B6, dd, D0, D0);
-}
-
-
-void ArmAssembler::vmstat(Condition cond) {  // VMRS APSR_nzcv, FPSCR
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B27 | B26 | B25 | B23 | B22 | B21 | B20 | B16 |
-                     (static_cast<int32_t>(PC)*B12) |
-                     B11 | B9 | B4;
-  Emit(encoding);
-}
-
-
-void ArmAssembler::svc(uint32_t imm24) {
-  CHECK(IsUint(24, imm24)) << imm24;
-  int32_t encoding = (AL << kConditionShift) | B27 | B26 | B25 | B24 | imm24;
-  Emit(encoding);
-}
-
-
-void ArmAssembler::bkpt(uint16_t imm16) {
-  int32_t encoding = (AL << kConditionShift) | B24 | B21 |
-                     ((imm16 >> 4) << 8) | B6 | B5 | B4 | (imm16 & 0xf);
-  Emit(encoding);
-}
-
-
-void ArmAssembler::b(Label* label, Condition cond) {
-  EmitBranch(cond, label, false);
-}
-
-
-void ArmAssembler::bl(Label* label, Condition cond) {
-  EmitBranch(cond, label, true);
-}
-
-
-void ArmAssembler::blx(Register rm, Condition cond) {
-  CHECK_NE(rm, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B24 | B21 | (0xfff << 8) | B5 | B4 |
-                     (static_cast<int32_t>(rm) << kRmShift);
-  Emit(encoding);
-}
-
-void ArmAssembler::bx(Register rm, Condition cond) {
-  CHECK_NE(rm, kNoRegister);
-  CHECK_NE(cond, kNoCondition);
-  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
-                     B24 | B21 | (0xfff << 8) | B4 |
-                     (static_cast<int32_t>(rm) << kRmShift);
-  Emit(encoding);
-}
-
-void ArmAssembler::MarkExceptionHandler(Label* label) {
-  EmitType01(AL, 1, TST, 1, PC, R0, ShifterOperand(0));
-  Label l;
-  b(&l);
-  EmitBranch(AL, label, false);
-  Bind(&l);
-}
-
-
-void ArmAssembler::Bind(Label* label) {
-  CHECK(!label->IsBound());
-  int bound_pc = buffer_.Size();
-  while (label->IsLinked()) {
-    int32_t position = label->Position();
-    int32_t next = buffer_.Load<int32_t>(position);
-    int32_t encoded = ArmAssembler::EncodeBranchOffset(bound_pc - position, next);
-    buffer_.Store<int32_t>(position, encoded);
-    label->position_ = ArmAssembler::DecodeBranchOffset(next);
-  }
-  label->BindTo(bound_pc);
-}
-
-
-void ArmAssembler::EncodeUint32InTstInstructions(uint32_t data) {
-  // TODO: Consider using movw ip, <16 bits>.
-  while (!IsUint(8, data)) {
-    tst(R0, ShifterOperand(data & 0xFF), VS);
-    data >>= 8;
-  }
-  tst(R0, ShifterOperand(data), MI);
-}
-
-
-int32_t ArmAssembler::EncodeBranchOffset(int offset, int32_t inst) {
-  // The offset is off by 8 due to the way the ARM CPUs read PC.
-  offset -= 8;
-  CHECK_ALIGNED(offset, 4);
-  CHECK(IsInt(POPCOUNT(kBranchOffsetMask), offset)) << offset;
-
-  // Properly preserve only the bits supported in the instruction.
-  offset >>= 2;
-  offset &= kBranchOffsetMask;
-  return (inst & ~kBranchOffsetMask) | offset;
-}
-
-
-int ArmAssembler::DecodeBranchOffset(int32_t inst) {
-  // Sign-extend, left-shift by 2, then add 8.
-  return ((((inst & kBranchOffsetMask) << 8) >> 6) + 8);
-}
-
-void ArmAssembler::AddConstant(Register rd, int32_t value, Condition cond) {
-  AddConstant(rd, rd, value, cond);
-}
-
-
-void ArmAssembler::AddConstant(Register rd, Register rn, int32_t value,
-                               Condition cond) {
-  if (value == 0) {
-    if (rd != rn) {
-      mov(rd, ShifterOperand(rn), cond);
-    }
-    return;
-  }
-  // We prefer to select the shorter code sequence rather than selecting add for
-  // positive values and sub for negatives ones, which would slightly improve
-  // the readability of generated code for some constants.
-  ShifterOperand shifter_op;
-  if (ShifterOperand::CanHold(value, &shifter_op)) {
-    add(rd, rn, shifter_op, cond);
-  } else if (ShifterOperand::CanHold(-value, &shifter_op)) {
-    sub(rd, rn, shifter_op, cond);
-  } else {
-    CHECK(rn != IP);
-    if (ShifterOperand::CanHold(~value, &shifter_op)) {
-      mvn(IP, shifter_op, cond);
-      add(rd, rn, ShifterOperand(IP), cond);
-    } else if (ShifterOperand::CanHold(~(-value), &shifter_op)) {
-      mvn(IP, shifter_op, cond);
-      sub(rd, rn, ShifterOperand(IP), cond);
-    } else {
-      movw(IP, Low16Bits(value), cond);
-      uint16_t value_high = High16Bits(value);
-      if (value_high != 0) {
-        movt(IP, value_high, cond);
+uint32_t ShifterOperand::encodingArm() const {
+  CHECK(is_valid());
+  switch (type_) {
+    case kImmediate:
+      if (is_rotate_) {
+        return (rotate_ << kRotateShift) | (immed_ << kImmed8Shift);
+      } else {
+        return immed_;
       }
-      add(rd, rn, ShifterOperand(IP), cond);
-    }
-  }
-}
-
-
-void ArmAssembler::AddConstantSetFlags(Register rd, Register rn, int32_t value,
-                                       Condition cond) {
-  ShifterOperand shifter_op;
-  if (ShifterOperand::CanHold(value, &shifter_op)) {
-    adds(rd, rn, shifter_op, cond);
-  } else if (ShifterOperand::CanHold(-value, &shifter_op)) {
-    subs(rd, rn, shifter_op, cond);
-  } else {
-    CHECK(rn != IP);
-    if (ShifterOperand::CanHold(~value, &shifter_op)) {
-      mvn(IP, shifter_op, cond);
-      adds(rd, rn, ShifterOperand(IP), cond);
-    } else if (ShifterOperand::CanHold(~(-value), &shifter_op)) {
-      mvn(IP, shifter_op, cond);
-      subs(rd, rn, ShifterOperand(IP), cond);
-    } else {
-      movw(IP, Low16Bits(value), cond);
-      uint16_t value_high = High16Bits(value);
-      if (value_high != 0) {
-        movt(IP, value_high, cond);
+      break;
+    case kRegister:
+      if (is_shift_) {
+        // Shifted immediate or register.
+        if (rs_ == kNoRegister) {
+          // Immediate shift.
+          return immed_ << kShiftImmShift |
+                          static_cast<uint32_t>(shift_) << kShiftShift |
+                          static_cast<uint32_t>(rm_);
+        } else {
+          // Register shift.
+          return static_cast<uint32_t>(rs_) << kShiftRegisterShift |
+              static_cast<uint32_t>(shift_) << kShiftShift | (1 << 4) |
+              static_cast<uint32_t>(rm_);
+        }
+      } else {
+        // Simple register
+        return static_cast<uint32_t>(rm_);
       }
-      adds(rd, rn, ShifterOperand(IP), cond);
-    }
+      break;
+    default:
+      // Can't get here.
+      LOG(FATAL) << "Invalid shifter operand for ARM";
+      return 0;
   }
 }
 
-
-void ArmAssembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
-  ShifterOperand shifter_op;
-  if (ShifterOperand::CanHold(value, &shifter_op)) {
-    mov(rd, shifter_op, cond);
-  } else if (ShifterOperand::CanHold(~value, &shifter_op)) {
-    mvn(rd, shifter_op, cond);
+uint32_t ShifterOperand::encodingThumb(int version) const {
+  CHECK(version == 1 || version == 2);
+  if (version == 1) {
+    LOG(FATAL) << "Invalid of use encodingThumb with version 1";
   } else {
-    movw(rd, Low16Bits(value), cond);
-    uint16_t value_high = High16Bits(value);
-    if (value_high != 0) {
-      movt(rd, value_high, cond);
+    switch (type_) {
+      case kImmediate:
+        return immed_;
+      case kRegister:
+        if (is_shift_) {
+          // Shifted immediate or register.
+          if (rs_ == kNoRegister) {
+            // Immediate shift.
+            if (shift_ == RRX) {
+              // RRX is encoded as an ROR with imm 0.
+              return ROR << 4 | static_cast<uint32_t>(rm_);
+            } else {
+              uint32_t imm3 = immed_ >> 2;
+              uint32_t imm2 = immed_ & 0b11;
+
+              return imm3 << 12 | imm2 << 6 | shift_ << 4 |
+                  static_cast<uint32_t>(rm_);
+            }
+          } else {
+            LOG(FATAL) << "No register-shifted register instruction available in thumb";
+            return 0;
+          }
+        } else {
+          // Simple register
+          return static_cast<uint32_t>(rm_);
+        }
+        break;
+      default:
+        // Can't get here.
+        LOG(FATAL) << "Invalid shifter operand for thumb";
+        return 0;
     }
   }
+  return 0;
+}
+
+bool ShifterOperand::CanHoldThumb(Register rd, Register rn, Opcode opcode,
+                                  uint32_t immediate, ShifterOperand* shifter_op) {
+  shifter_op->type_ = kImmediate;
+  shifter_op->immed_ = immediate;
+  shifter_op->is_shift_ = false;
+  shifter_op->is_rotate_ = false;
+  switch (opcode) {
+    case ADD:
+    case SUB:
+      if (rn == SP) {
+        if (rd == SP) {
+          return immediate < (1 << 9);    // 9 bits allowed.
+        } else {
+          return immediate < (1 << 12);   // 12 bits.
+        }
+      }
+      if (immediate < (1 << 12)) {    // Less than (or equal to) 12 bits can always be done.
+        return true;
+      }
+      return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
+
+    case MOV:
+      if (immediate < (1 << 12)) {    // Less than (or equal to) 12 bits can always be done.
+        return true;
+      }
+      return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
+    case MVN:
+    default:
+      return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
+  }
 }
 
+uint32_t Address::encodingArm() const {
+  CHECK(IsAbsoluteUint(12, offset_));
+  uint32_t encoding;
+  if (offset_ < 0) {
+    encoding = (am_ ^ (1 << kUShift)) | -offset_;  // Flip U to adjust sign.
+  } else {
+    encoding =  am_ | offset_;
+  }
+  encoding |= static_cast<uint32_t>(rn_) << kRnShift;
+  return encoding;
+}
 
-bool Address::CanHoldLoadOffset(LoadOperandType type, int offset) {
+
+uint32_t Address::encodingThumb(int version) const {
+  CHECK(version == 1 || version == 2);
+  uint32_t encoding = 0;
+  if (version == 2) {
+      encoding = static_cast<uint32_t>(rn_) << 16;
+      // Check for the T3/T4 encoding.
+      // PUW must Offset for T3
+      // Convert ARM PU0W to PUW
+      // The Mode is in ARM encoding format which is:
+      // |P|U|0|W|
+      // we need this in thumb2 mode:
+      // |P|U|W|
+
+      uint32_t am = am_;
+      int32_t offset = offset_;
+      if (offset < 0) {
+        am ^= 1 << kUShift;
+        offset = -offset;
+      }
+      if (offset_ < 0 || (offset >= 0 && offset < 256 &&
+        am_ != Mode::Offset)) {
+          // T4 encoding.
+        uint32_t PUW = am >> 21;   // Move down to bottom of word.
+        PUW = (PUW >> 1) | (PUW & 1);   // Bits 3, 2 and 0.
+        // If P is 0 then W must be 1 (Different from ARM).
+        if ((PUW & 0b100) == 0) {
+          PUW |= 0b1;
+        }
+        encoding |= B11 | PUW << 8 | offset;
+      } else {
+        // T3 encoding (also sets op1 to 0b01).
+        encoding |= B23 | offset_;
+      }
+  } else {
+    LOG(FATAL) << "Invalid use of encodingThumb for version 1";
+  }
+  return encoding;
+}
+
+// This is very like the ARM encoding except the offset is 10 bits.
+uint32_t Address::encodingThumbLdrdStrd() const {
+  uint32_t encoding;
+  uint32_t am = am_;
+  // If P is 0 then W must be 1 (Different from ARM).
+  uint32_t PU1W = am_ >> 21;   // Move down to bottom of word.
+  if ((PU1W & 0b1000) == 0) {
+    am |= 1 << 21;      // Set W bit.
+  }
+  if (offset_ < 0) {
+    int32_t off = -offset_;
+    CHECK_LT(off, 1024);
+    CHECK_EQ((off & 0b11), 0);    // Must be multiple of 4.
+    encoding = (am ^ (1 << kUShift)) | off >> 2;  // Flip U to adjust sign.
+  } else {
+    CHECK_LT(offset_, 1024);
+    CHECK_EQ((offset_ & 0b11), 0);    // Must be multiple of 4.
+    encoding =  am | offset_ >> 2;
+  }
+  encoding |= static_cast<uint32_t>(rn_) << 16;
+  return encoding;
+}
+
+// Encoding for ARM addressing mode 3.
+uint32_t Address::encoding3() const {
+  const uint32_t offset_mask = (1 << 12) - 1;
+  uint32_t encoding = encodingArm();
+  uint32_t offset = encoding & offset_mask;
+  CHECK_LT(offset, 256u);
+  return (encoding & ~offset_mask) | ((offset & 0xf0) << 4) | (offset & 0xf);
+}
+
+// Encoding for vfp load/store addressing.
+uint32_t Address::vencoding() const {
+  const uint32_t offset_mask = (1 << 12) - 1;
+  uint32_t encoding = encodingArm();
+  uint32_t offset = encoding & offset_mask;
+  CHECK(IsAbsoluteUint(10, offset));  // In the range -1020 to +1020.
+  CHECK_ALIGNED(offset, 2);  // Multiple of 4.
+  CHECK((am_ == Offset) || (am_ == NegOffset));
+  uint32_t vencoding = (encoding & (0xf << kRnShift)) | (offset >> 2);
+  if (am_ == Offset) {
+    vencoding |= 1 << 23;
+  }
+  return vencoding;
+}
+
+
+bool Address::CanHoldLoadOffsetArm(LoadOperandType type, int offset) {
   switch (type) {
     case kLoadSignedByte:
     case kLoadSignedHalfword:
@@ -1225,7 +304,7 @@
 }
 
 
-bool Address::CanHoldStoreOffset(StoreOperandType type, int offset) {
+bool Address::CanHoldStoreOffsetArm(StoreOperandType type, int offset) {
   switch (type) {
     case kStoreHalfword:
     case kStoreWordPair:
@@ -1242,198 +321,48 @@
   }
 }
 
-
-// Implementation note: this method must emit at most one instruction when
-// Address::CanHoldLoadOffset.
-void ArmAssembler::LoadFromOffset(LoadOperandType type,
-                                  Register reg,
-                                  Register base,
-                                  int32_t offset,
-                                  Condition cond) {
-  if (!Address::CanHoldLoadOffset(type, offset)) {
-    CHECK(base != IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
-  }
-  CHECK(Address::CanHoldLoadOffset(type, offset));
+bool Address::CanHoldLoadOffsetThumb(LoadOperandType type, int offset) {
   switch (type) {
     case kLoadSignedByte:
-      ldrsb(reg, Address(base, offset), cond);
-      break;
-    case kLoadUnsignedByte:
-      ldrb(reg, Address(base, offset), cond);
-      break;
     case kLoadSignedHalfword:
-      ldrsh(reg, Address(base, offset), cond);
-      break;
     case kLoadUnsignedHalfword:
-      ldrh(reg, Address(base, offset), cond);
-      break;
+    case kLoadUnsignedByte:
     case kLoadWord:
-      ldr(reg, Address(base, offset), cond);
-      break;
+      return IsAbsoluteUint(12, offset);
+    case kLoadSWord:
+    case kLoadDWord:
+      return IsAbsoluteUint(10, offset);  // VFP addressing mode.
     case kLoadWordPair:
-      ldrd(reg, Address(base, offset), cond);
-      break;
-    default:
+      return IsAbsoluteUint(10, offset);
+  default:
       LOG(FATAL) << "UNREACHABLE";
+      return false;
   }
 }
 
-// Implementation note: this method must emit at most one instruction when
-// Address::CanHoldLoadOffset, as expected by JIT::GuardedLoadFromOffset.
-void ArmAssembler::LoadSFromOffset(SRegister reg,
-                                   Register base,
-                                   int32_t offset,
-                                   Condition cond) {
-  if (!Address::CanHoldLoadOffset(kLoadSWord, offset)) {
-    CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
-  }
-  CHECK(Address::CanHoldLoadOffset(kLoadSWord, offset));
-  vldrs(reg, Address(base, offset), cond);
-}
 
-// Implementation note: this method must emit at most one instruction when
-// Address::CanHoldLoadOffset, as expected by JIT::GuardedLoadFromOffset.
-void ArmAssembler::LoadDFromOffset(DRegister reg,
-                                   Register base,
-                                   int32_t offset,
-                                   Condition cond) {
-  if (!Address::CanHoldLoadOffset(kLoadDWord, offset)) {
-    CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
-  }
-  CHECK(Address::CanHoldLoadOffset(kLoadDWord, offset));
-  vldrd(reg, Address(base, offset), cond);
-}
-
-// Implementation note: this method must emit at most one instruction when
-// Address::CanHoldStoreOffset.
-void ArmAssembler::StoreToOffset(StoreOperandType type,
-                                 Register reg,
-                                 Register base,
-                                 int32_t offset,
-                                 Condition cond) {
-  if (!Address::CanHoldStoreOffset(type, offset)) {
-    CHECK(reg != IP);
-    CHECK(base != IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
-  }
-  CHECK(Address::CanHoldStoreOffset(type, offset));
+bool Address::CanHoldStoreOffsetThumb(StoreOperandType type, int offset) {
   switch (type) {
-    case kStoreByte:
-      strb(reg, Address(base, offset), cond);
-      break;
     case kStoreHalfword:
-      strh(reg, Address(base, offset), cond);
-      break;
+    case kStoreByte:
     case kStoreWord:
-      str(reg, Address(base, offset), cond);
-      break;
+      return IsAbsoluteUint(12, offset);
+    case kStoreSWord:
+    case kStoreDWord:
+      return IsAbsoluteUint(10, offset);  // VFP addressing mode.
     case kStoreWordPair:
-      strd(reg, Address(base, offset), cond);
-      break;
-    default:
+      return IsAbsoluteUint(10, offset);
+  default:
       LOG(FATAL) << "UNREACHABLE";
+      return false;
   }
 }
 
-// Implementation note: this method must emit at most one instruction when
-// Address::CanHoldStoreOffset, as expected by JIT::GuardedStoreToOffset.
-void ArmAssembler::StoreSToOffset(SRegister reg,
-                                  Register base,
-                                  int32_t offset,
-                                  Condition cond) {
-  if (!Address::CanHoldStoreOffset(kStoreSWord, offset)) {
-    CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
+void ArmAssembler::Pad(uint32_t bytes) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  for (uint32_t i = 0; i < bytes; ++i) {
+    buffer_.Emit<byte>(0);
   }
-  CHECK(Address::CanHoldStoreOffset(kStoreSWord, offset));
-  vstrs(reg, Address(base, offset), cond);
-}
-
-// Implementation note: this method must emit at most one instruction when
-// Address::CanHoldStoreOffset, as expected by JIT::GuardedStoreSToOffset.
-void ArmAssembler::StoreDToOffset(DRegister reg,
-                                  Register base,
-                                  int32_t offset,
-                                  Condition cond) {
-  if (!Address::CanHoldStoreOffset(kStoreDWord, offset)) {
-    CHECK_NE(base, IP);
-    LoadImmediate(IP, offset, cond);
-    add(IP, IP, ShifterOperand(base), cond);
-    base = IP;
-    offset = 0;
-  }
-  CHECK(Address::CanHoldStoreOffset(kStoreDWord, offset));
-  vstrd(reg, Address(base, offset), cond);
-}
-
-void ArmAssembler::Push(Register rd, Condition cond) {
-  str(rd, Address(SP, -kRegisterSize, Address::PreIndex), cond);
-}
-
-void ArmAssembler::Pop(Register rd, Condition cond) {
-  ldr(rd, Address(SP, kRegisterSize, Address::PostIndex), cond);
-}
-
-void ArmAssembler::PushList(RegList regs, Condition cond) {
-  stm(DB_W, SP, regs, cond);
-}
-
-void ArmAssembler::PopList(RegList regs, Condition cond) {
-  ldm(IA_W, SP, regs, cond);
-}
-
-void ArmAssembler::Mov(Register rd, Register rm, Condition cond) {
-  if (rd != rm) {
-    mov(rd, ShifterOperand(rm), cond);
-  }
-}
-
-void ArmAssembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
-                       Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
-  mov(rd, ShifterOperand(rm, LSL, shift_imm), cond);
-}
-
-void ArmAssembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
-                       Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
-  if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  mov(rd, ShifterOperand(rm, LSR, shift_imm), cond);
-}
-
-void ArmAssembler::Asr(Register rd, Register rm, uint32_t shift_imm,
-                       Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
-  if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
-  mov(rd, ShifterOperand(rm, ASR, shift_imm), cond);
-}
-
-void ArmAssembler::Ror(Register rd, Register rm, uint32_t shift_imm,
-                       Condition cond) {
-  CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
-  mov(rd, ShifterOperand(rm, ROR, shift_imm), cond);
-}
-
-void ArmAssembler::Rrx(Register rd, Register rm, Condition cond) {
-  mov(rd, ShifterOperand(rm, ROR, 0), cond);
 }
 
 constexpr size_t kFramePointerSize = 4;
@@ -1472,7 +401,7 @@
 void ArmAssembler::RemoveFrame(size_t frame_size,
                               const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  // Compute callee saves to pop and PC
+  // Compute callee saves to pop and PC.
   RegList pop_list = 1 << PC;
   size_t pop_values = 1;
   for (size_t i = 0; i < callee_save_regs.size(); i++) {
@@ -1481,12 +410,12 @@
     pop_values++;
   }
 
-  // Decrease frame to start of callee saves
+  // Decrease frame to start of callee saves.
   CHECK_GT(frame_size, pop_values * kFramePointerSize);
   size_t adjust = frame_size - (pop_values * kFramePointerSize);
   DecreaseFrameSize(adjust);
 
-  // Pop callee saves and PC
+  // Pop callee saves and PC.
   PopList(pop_list);
 }
 
@@ -1681,7 +610,7 @@
     } else {
       CHECK(dst.IsRegisterPair()) << dst;
       CHECK(src.IsRegisterPair()) << src;
-      // Ensure that the first move doesn't clobber the input of the second
+      // Ensure that the first move doesn't clobber the input of the second.
       if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) {
         mov(dst.AsRegisterPairLow(), ShifterOperand(src.AsRegisterPairLow()));
         mov(dst.AsRegisterPairHigh(), ShifterOperand(src.AsRegisterPairHigh()));
@@ -1743,15 +672,6 @@
   UNIMPLEMENTED(FATAL);
 }
 
-
-void ArmAssembler::MemoryBarrier(ManagedRegister mscratch) {
-  CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12);
-#if ANDROID_SMP != 0
-  int32_t encoding = 0xf57ff05f;  // dmb
-  Emit(encoding);
-#endif
-}
-
 void ArmAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
                                    FrameOffset handle_scope_offset,
                                    ManagedRegister min_reg, bool null_allowed) {
@@ -1770,7 +690,10 @@
     }
     cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
     if (!out_reg.Equals(in_reg)) {
+      it(EQ, kItElse);
       LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);
+    } else {
+      it(NE);
     }
     AddConstant(out_reg.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
   } else {
@@ -1791,6 +714,7 @@
     // the address in the handle scope holding the reference.
     // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
     cmp(scratch.AsCoreRegister(), ShifterOperand(0));
+    it(NE);
     AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), NE);
   } else {
     AddConstant(scratch.AsCoreRegister(), SP, handle_scope_offset.Int32Value(), AL);
@@ -1806,19 +730,20 @@
   CHECK(in_reg.IsCoreRegister()) << in_reg;
   Label null_arg;
   if (!out_reg.Equals(in_reg)) {
-    LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);
+    LoadImmediate(out_reg.AsCoreRegister(), 0, EQ);     // TODO: why EQ?
   }
   cmp(in_reg.AsCoreRegister(), ShifterOperand(0));
+  it(NE);
   LoadFromOffset(kLoadWord, out_reg.AsCoreRegister(),
                  in_reg.AsCoreRegister(), 0, NE);
 }
 
 void ArmAssembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
+  // TODO: not validating references.
 }
 
 void ArmAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) {
-  // TODO: not validating references
+  // TODO: not validating references.
 }
 
 void ArmAssembler::Call(ManagedRegister mbase, Offset offset,
@@ -1830,7 +755,7 @@
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
                  base.AsCoreRegister(), offset.Int32Value());
   blx(scratch.AsCoreRegister());
-  // TODO: place reference map on call
+  // TODO: place reference map on call.
 }
 
 void ArmAssembler::Call(FrameOffset base, Offset offset,
@@ -1876,16 +801,71 @@
   if (stack_adjust_ != 0) {  // Fix up the frame.
     __ DecreaseFrameSize(stack_adjust_);
   }
-  // Pass exception object as argument
-  // Don't care about preserving R0 as this call won't return
+  // Pass exception object as argument.
+  // Don't care about preserving R0 as this call won't return.
   __ mov(R0, ShifterOperand(scratch_.AsCoreRegister()));
-  // Set up call to Thread::Current()->pDeliverException
+  // Set up call to Thread::Current()->pDeliverException.
   __ LoadFromOffset(kLoadWord, R12, TR, QUICK_ENTRYPOINT_OFFSET(4, pDeliverException).Int32Value());
   __ blx(R12);
-  // Call never returns
+  // Call never returns.
   __ bkpt(0);
 #undef __
 }
 
+
+static int LeadingZeros(uint32_t val) {
+  uint32_t alt;
+  int32_t n;
+  int32_t count;
+
+  count = 16;
+  n = 32;
+  do {
+    alt = val >> count;
+    if (alt != 0) {
+      n = n - count;
+      val = alt;
+    }
+    count >>= 1;
+  } while (count);
+  return n - val;
+}
+
+
+uint32_t ArmAssembler::ModifiedImmediate(uint32_t value) {
+  int32_t z_leading;
+  int32_t z_trailing;
+  uint32_t b0 = value & 0xff;
+
+  /* Note: case of value==0 must use 0:000:0:0000000 encoding */
+  if (value <= 0xFF)
+    return b0;  // 0:000:a:bcdefgh.
+  if (value == ((b0 << 16) | b0))
+    return (0x1 << 12) | b0; /* 0:001:a:bcdefgh */
+  if (value == ((b0 << 24) | (b0 << 16) | (b0 << 8) | b0))
+    return (0x3 << 12) | b0; /* 0:011:a:bcdefgh */
+  b0 = (value >> 8) & 0xff;
+  if (value == ((b0 << 24) | (b0 << 8)))
+    return (0x2 << 12) | b0; /* 0:010:a:bcdefgh */
+  /* Can we do it with rotation? */
+  z_leading = LeadingZeros(value);
+  z_trailing = 32 - LeadingZeros(~value & (value - 1));
+  /* A run of eight or fewer active bits? */
+  if ((z_leading + z_trailing) < 24)
+    return kInvalidModifiedImmediate;  /* No - bail */
+  /* left-justify the constant, discarding msb (known to be 1) */
+  value <<= z_leading + 1;
+  /* Create bcdefgh */
+  value >>= 25;
+
+  /* Put it all together */
+  uint32_t v = 8 + z_leading;
+
+  uint32_t i = (v & 0b10000) >> 4;
+  uint32_t imm3 = (v >> 1) & 0b111;
+  uint32_t a = v & 1;
+  return value | i << 26 | imm3 << 12 | a << 7;
+}
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 396e603..7b662e1 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -29,88 +29,118 @@
 namespace art {
 namespace arm {
 
-// Encodes Addressing Mode 1 - Data-processing operands defined in Section 5.1.
 class ShifterOperand {
  public:
-  // Data-processing operands - Uninitialized
-  ShifterOperand() {
-    type_ = -1;
-    encoding_ = 0;
+  ShifterOperand() : type_(kUnknown), rm_(kNoRegister), rs_(kNoRegister),
+      is_rotate_(false), is_shift_(false), shift_(kNoShift), rotate_(0), immed_(0) {
   }
 
-  // Data-processing operands - Immediate
-  explicit ShifterOperand(uint32_t immediate) {
-    CHECK(immediate < (1 << kImmed8Bits));
-    type_ = 1;
-    encoding_ = immediate;
-  }
-
-  // Data-processing operands - Rotated immediate
-  ShifterOperand(uint32_t rotate, uint32_t immed8) {
-    CHECK((rotate < (1 << kRotateBits)) && (immed8 < (1 << kImmed8Bits)));
-    type_ = 1;
-    encoding_ = (rotate << kRotateShift) | (immed8 << kImmed8Shift);
+  explicit ShifterOperand(uint32_t immed) : type_(kImmediate), rm_(kNoRegister), rs_(kNoRegister),
+      is_rotate_(false), is_shift_(false), shift_(kNoShift), rotate_(0), immed_(immed) {
   }
 
   // Data-processing operands - Register
-  explicit ShifterOperand(Register rm) {
-    type_ = 0;
-    encoding_ = static_cast<uint32_t>(rm);
+  explicit ShifterOperand(Register rm) : type_(kRegister), rm_(rm), rs_(kNoRegister),
+      is_rotate_(false), is_shift_(false), shift_(kNoShift), rotate_(0), immed_(0) {
   }
 
-  // Data-processing operands - Logical shift/rotate by immediate
-  ShifterOperand(Register rm, Shift shift, uint32_t shift_imm) {
-    CHECK(shift_imm < (1 << kShiftImmBits));
-    type_ = 0;
-    encoding_ = shift_imm << kShiftImmShift |
-                static_cast<uint32_t>(shift) << kShiftShift |
-                static_cast<uint32_t>(rm);
+  ShifterOperand(uint32_t rotate, uint32_t immed8) : type_(kImmediate), rm_(kNoRegister),
+      rs_(kNoRegister),
+      is_rotate_(true), is_shift_(false), shift_(kNoShift), rotate_(rotate), immed_(immed8) {
+  }
+
+  ShifterOperand(Register rm, Shift shift, uint32_t shift_imm = 0) : type_(kRegister), rm_(rm),
+      rs_(kNoRegister),
+      is_rotate_(false), is_shift_(true), shift_(shift), rotate_(0), immed_(shift_imm) {
   }
 
   // Data-processing operands - Logical shift/rotate by register
-  ShifterOperand(Register rm, Shift shift, Register rs) {
-    type_ = 0;
-    encoding_ = static_cast<uint32_t>(rs) << kShiftRegisterShift |
-                static_cast<uint32_t>(shift) << kShiftShift | (1 << 4) |
-                static_cast<uint32_t>(rm);
+  ShifterOperand(Register rm, Shift shift, Register rs)  : type_(kRegister), rm_(rm),
+      rs_(rs),
+      is_rotate_(false), is_shift_(true), shift_(shift), rotate_(0), immed_(0) {
   }
 
-  static bool CanHold(uint32_t immediate, ShifterOperand* shifter_op) {
-    // Avoid the more expensive test for frequent small immediate values.
-    if (immediate < (1 << kImmed8Bits)) {
-      shifter_op->type_ = 1;
-      shifter_op->encoding_ = (0 << kRotateShift) | (immediate << kImmed8Shift);
-      return true;
-    }
-    // Note that immediate must be unsigned for the test to work correctly.
-    for (int rot = 0; rot < 16; rot++) {
-      uint32_t imm8 = (immediate << 2*rot) | (immediate >> (32 - 2*rot));
-      if (imm8 < (1 << kImmed8Bits)) {
-        shifter_op->type_ = 1;
-        shifter_op->encoding_ = (rot << kRotateShift) | (imm8 << kImmed8Shift);
-        return true;
-      }
-    }
-    return false;
-  }
-
- private:
-  bool is_valid() const { return (type_ == 0) || (type_ == 1); }
+  bool is_valid() const { return (type_ == kImmediate) || (type_ == kRegister); }
 
   uint32_t type() const {
     CHECK(is_valid());
     return type_;
   }
 
-  uint32_t encoding() const {
-    CHECK(is_valid());
-    return encoding_;
+  uint32_t encodingArm() const;
+  uint32_t encodingThumb(int version) const;
+
+  bool IsEmpty() const {
+    return type_ == kUnknown;
   }
 
-  uint32_t type_;  // Encodes the type field (bits 27-25) in the instruction.
-  uint32_t encoding_;
+  bool IsImmediate() const {
+    return type_ == kImmediate;
+  }
 
-  friend class ArmAssembler;
+  bool IsRegister() const {
+    return type_ == kRegister;
+  }
+
+  bool IsShift() const {
+    return is_shift_;
+  }
+
+  uint32_t GetImmediate() const {
+    return immed_;
+  }
+
+  Shift GetShift() const {
+    return shift_;
+  }
+
+  Register GetRegister() const {
+    return rm_;
+  }
+
+  enum Type {
+    kUnknown = -1,
+    kRegister,
+    kImmediate
+  };
+
+  static bool CanHoldArm(uint32_t immediate, ShifterOperand* shifter_op) {
+    // Avoid the more expensive test for frequent small immediate values.
+    if (immediate < (1 << kImmed8Bits)) {
+      shifter_op->type_ = kImmediate;
+      shifter_op->is_rotate_ = true;
+      shifter_op->rotate_ = 0;
+      shifter_op->immed_ = immediate;
+      return true;
+    }
+    // Note that immediate must be unsigned for the test to work correctly.
+    for (int rot = 0; rot < 16; rot++) {
+      uint32_t imm8 = (immediate << 2*rot) | (immediate >> (32 - 2*rot));
+      if (imm8 < (1 << kImmed8Bits)) {
+        shifter_op->type_ = kImmediate;
+        shifter_op->is_rotate_ = true;
+        shifter_op->rotate_ = rot;
+        shifter_op->immed_ = imm8;
+        return true;
+      }
+    }
+    return false;
+  }
+
+  static bool CanHoldThumb(Register rd, Register rn, Opcode opcode,
+                           uint32_t immediate, ShifterOperand* shifter_op);
+
+
+ private:
+  Type type_;
+  Register rm_;
+  Register rs_;
+  bool is_rotate_;
+  bool is_shift_;
+  Shift shift_;
+  uint32_t rotate_;
+  uint32_t immed_;
+
 #ifdef SOURCE_ASSEMBLER_SUPPORT
   friend class BinaryAssembler;
 #endif
@@ -152,10 +182,10 @@
   IB_W         = (8|4|1) << 21   // increment before with writeback to base
 };
 
-
 class Address {
  public:
-  // Memory operand addressing mode
+  // Memory operand addressing mode (in ARM encoding form.  For others we need
+  // to adjust)
   enum Mode {
     // bit encoding P U W
     Offset       = (8|4|0) << 21,  // offset (w/o writeback to base)
@@ -166,273 +196,366 @@
     NegPostIndex = (0|0|0) << 21   // negative post-indexed with writeback
   };
 
-  explicit Address(Register rn, int32_t offset = 0, Mode am = Offset) {
-    CHECK(IsAbsoluteUint(12, offset));
-    if (offset < 0) {
-      encoding_ = (am ^ (1 << kUShift)) | -offset;  // Flip U to adjust sign.
-    } else {
-      encoding_ = am | offset;
-    }
-    encoding_ |= static_cast<uint32_t>(rn) << kRnShift;
+  explicit Address(Register rn, int32_t offset = 0, Mode am = Offset) : rn_(rn), offset_(offset),
+      am_(am) {
   }
 
-  static bool CanHoldLoadOffset(LoadOperandType type, int offset);
-  static bool CanHoldStoreOffset(StoreOperandType type, int offset);
+  static bool CanHoldLoadOffsetArm(LoadOperandType type, int offset);
+  static bool CanHoldStoreOffsetArm(StoreOperandType type, int offset);
+
+  static bool CanHoldLoadOffsetThumb(LoadOperandType type, int offset);
+  static bool CanHoldStoreOffsetThumb(StoreOperandType type, int offset);
+
+  uint32_t encodingArm() const;
+  uint32_t encodingThumb(int version) const;
+
+  uint32_t encoding3() const;
+  uint32_t vencoding() const;
+
+  uint32_t encodingThumbLdrdStrd() const;
+
+  Register GetRegister() const {
+    return rn_;
+  }
+
+  int32_t GetOffset() const {
+    return offset_;
+  }
+
+  Mode GetMode() const {
+    return am_;
+  }
 
  private:
-  uint32_t encoding() const { return encoding_; }
-
-  // Encoding for addressing mode 3.
-  uint32_t encoding3() const {
-    const uint32_t offset_mask = (1 << 12) - 1;
-    uint32_t offset = encoding_ & offset_mask;
-    CHECK_LT(offset, 256u);
-    return (encoding_ & ~offset_mask) | ((offset & 0xf0) << 4) | (offset & 0xf);
-  }
-
-  // Encoding for vfp load/store addressing.
-  uint32_t vencoding() const {
-    const uint32_t offset_mask = (1 << 12) - 1;
-    uint32_t offset = encoding_ & offset_mask;
-    CHECK(IsAbsoluteUint(10, offset));  // In the range -1020 to +1020.
-    CHECK_ALIGNED(offset, 2);  // Multiple of 4.
-    int mode = encoding_ & ((8|4|1) << 21);
-    CHECK((mode == Offset) || (mode == NegOffset));
-    uint32_t vencoding = (encoding_ & (0xf << kRnShift)) | (offset >> 2);
-    if (mode == Offset) {
-      vencoding |= 1 << 23;
-    }
-    return vencoding;
-  }
-
-  uint32_t encoding_;
-
-  friend class ArmAssembler;
+  Register rn_;
+  int32_t offset_;
+  Mode am_;
 };
 
+// Instruction encoding bits.
+enum {
+  H   = 1 << 5,   // halfword (or byte)
+  L   = 1 << 20,  // load (or store)
+  S   = 1 << 20,  // set condition code (or leave unchanged)
+  W   = 1 << 21,  // writeback base register (or leave unchanged)
+  A   = 1 << 21,  // accumulate in multiply instruction (or not)
+  B   = 1 << 22,  // unsigned byte (or word)
+  N   = 1 << 22,  // long (or short)
+  U   = 1 << 23,  // positive (or negative) offset/index
+  P   = 1 << 24,  // offset/pre-indexed addressing (or post-indexed addressing)
+  I   = 1 << 25,  // immediate shifter operand (or not)
 
-class ArmAssembler FINAL : public Assembler {
+  B0 = 1,
+  B1 = 1 << 1,
+  B2 = 1 << 2,
+  B3 = 1 << 3,
+  B4 = 1 << 4,
+  B5 = 1 << 5,
+  B6 = 1 << 6,
+  B7 = 1 << 7,
+  B8 = 1 << 8,
+  B9 = 1 << 9,
+  B10 = 1 << 10,
+  B11 = 1 << 11,
+  B12 = 1 << 12,
+  B13 = 1 << 13,
+  B14 = 1 << 14,
+  B15 = 1 << 15,
+  B16 = 1 << 16,
+  B17 = 1 << 17,
+  B18 = 1 << 18,
+  B19 = 1 << 19,
+  B20 = 1 << 20,
+  B21 = 1 << 21,
+  B22 = 1 << 22,
+  B23 = 1 << 23,
+  B24 = 1 << 24,
+  B25 = 1 << 25,
+  B26 = 1 << 26,
+  B27 = 1 << 27,
+  B28 = 1 << 28,
+  B29 = 1 << 29,
+  B30 = 1 << 30,
+  B31 = 1 << 31,
+
+  // Instruction bit masks.
+  RdMask = 15 << 12,  // in str instruction
+  CondMask = 15 << 28,
+  CoprocessorMask = 15 << 8,
+  OpCodeMask = 15 << 21,  // in data-processing instructions
+  Imm24Mask = (1 << 24) - 1,
+  Off12Mask = (1 << 12) - 1,
+
+  // ldrex/strex register field encodings.
+  kLdExRnShift = 16,
+  kLdExRtShift = 12,
+  kStrExRnShift = 16,
+  kStrExRdShift = 12,
+  kStrExRtShift = 0,
+};
+
+// IfThen state for IT instructions.
+enum ItState {
+  kItOmitted,
+  kItThen,
+  kItT = kItThen,
+  kItElse,
+  kItE = kItElse
+};
+
+constexpr uint32_t kNoItCondition = 3;
+constexpr uint32_t kInvalidModifiedImmediate = -1;
+
+extern const char* kRegisterNames[];
+extern const char* kConditionNames[];
+extern std::ostream& operator<<(std::ostream& os, const Register& rhs);
+extern std::ostream& operator<<(std::ostream& os, const SRegister& rhs);
+extern std::ostream& operator<<(std::ostream& os, const DRegister& rhs);
+extern std::ostream& operator<<(std::ostream& os, const Condition& rhs);
+
+// This is an abstract ARM assembler.  Subclasses provide assemblers for the individual
+// instruction sets (ARM32, Thumb2, etc.)
+//
+class ArmAssembler : public Assembler {
  public:
-  ArmAssembler() {}
   virtual ~ArmAssembler() {}
 
+  // Is this assembler for the thumb instruction set?
+  virtual bool IsThumb() const = 0;
+
   // Data-processing instructions.
-  void and_(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void eor(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void eor(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void sub(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
-  void subs(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void sub(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void subs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void rsb(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
-  void rsbs(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void rsb(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void rsbs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void add(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void add(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void adds(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void adds(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void adc(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void adc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void sbc(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void sbc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void rsc(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void rsc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void tst(Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void tst(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void teq(Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void teq(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void cmp(Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void cmn(Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void orr(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
-  void orrs(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void orr(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void orrs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void mov(Register rd, ShifterOperand so, Condition cond = AL);
-  void movs(Register rd, ShifterOperand so, Condition cond = AL);
+  virtual void mov(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void movs(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void bic(Register rd, Register rn, ShifterOperand so, Condition cond = AL);
+  virtual void bic(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) = 0;
 
-  void mvn(Register rd, ShifterOperand so, Condition cond = AL);
-  void mvns(Register rd, ShifterOperand so, Condition cond = AL);
+  virtual void mvn(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
+  virtual void mvns(Register rd, const ShifterOperand& so, Condition cond = AL) = 0;
 
   // Miscellaneous data-processing instructions.
-  void clz(Register rd, Register rm, Condition cond = AL);
-  void movw(Register rd, uint16_t imm16, Condition cond = AL);
-  void movt(Register rd, uint16_t imm16, Condition cond = AL);
+  virtual void clz(Register rd, Register rm, Condition cond = AL) = 0;
+  virtual void movw(Register rd, uint16_t imm16, Condition cond = AL) = 0;
+  virtual void movt(Register rd, uint16_t imm16, Condition cond = AL) = 0;
 
   // Multiply instructions.
-  void mul(Register rd, Register rn, Register rm, Condition cond = AL);
-  void mla(Register rd, Register rn, Register rm, Register ra,
-           Condition cond = AL);
-  void mls(Register rd, Register rn, Register rm, Register ra,
-           Condition cond = AL);
-  void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
-             Condition cond = AL);
+  virtual void mul(Register rd, Register rn, Register rm, Condition cond = AL) = 0;
+  virtual void mla(Register rd, Register rn, Register rm, Register ra,
+                   Condition cond = AL) = 0;
+  virtual void mls(Register rd, Register rn, Register rm, Register ra,
+                   Condition cond = AL) = 0;
+  virtual void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
+                     Condition cond = AL) = 0;
+
+  virtual void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) = 0;
+  virtual void udiv(Register rd, Register rn, Register rm, Condition cond = AL) = 0;
 
   // Load/store instructions.
-  void ldr(Register rd, Address ad, Condition cond = AL);
-  void str(Register rd, Address ad, Condition cond = AL);
+  virtual void ldr(Register rd, const Address& ad, Condition cond = AL) = 0;
+  virtual void str(Register rd, const Address& ad, Condition cond = AL) = 0;
 
-  void ldrb(Register rd, Address ad, Condition cond = AL);
-  void strb(Register rd, Address ad, Condition cond = AL);
+  virtual void ldrb(Register rd, const Address& ad, Condition cond = AL) = 0;
+  virtual void strb(Register rd, const Address& ad, Condition cond = AL) = 0;
 
-  void ldrh(Register rd, Address ad, Condition cond = AL);
-  void strh(Register rd, Address ad, Condition cond = AL);
+  virtual void ldrh(Register rd, const Address& ad, Condition cond = AL) = 0;
+  virtual void strh(Register rd, const Address& ad, Condition cond = AL) = 0;
 
-  void ldrsb(Register rd, Address ad, Condition cond = AL);
-  void ldrsh(Register rd, Address ad, Condition cond = AL);
+  virtual void ldrsb(Register rd, const Address& ad, Condition cond = AL) = 0;
+  virtual void ldrsh(Register rd, const Address& ad, Condition cond = AL) = 0;
 
-  void ldrd(Register rd, Address ad, Condition cond = AL);
-  void strd(Register rd, Address ad, Condition cond = AL);
+  virtual void ldrd(Register rd, const Address& ad, Condition cond = AL) = 0;
+  virtual void strd(Register rd, const Address& ad, Condition cond = AL) = 0;
 
-  void ldm(BlockAddressMode am, Register base,
-           RegList regs, Condition cond = AL);
-  void stm(BlockAddressMode am, Register base,
-           RegList regs, Condition cond = AL);
+  virtual void ldm(BlockAddressMode am, Register base,
+                   RegList regs, Condition cond = AL) = 0;
+  virtual void stm(BlockAddressMode am, Register base,
+                   RegList regs, Condition cond = AL) = 0;
 
-  void ldrex(Register rd, Register rn, Condition cond = AL);
-  void strex(Register rd, Register rt, Register rn, Condition cond = AL);
+  virtual void ldrex(Register rd, Register rn, Condition cond = AL) = 0;
+  virtual void strex(Register rd, Register rt, Register rn, Condition cond = AL) = 0;
 
   // Miscellaneous instructions.
-  void clrex();
-  void nop(Condition cond = AL);
+  virtual void clrex(Condition cond = AL) = 0;
+  virtual void nop(Condition cond = AL) = 0;
 
   // Note that gdb sets breakpoints using the undefined instruction 0xe7f001f0.
-  void bkpt(uint16_t imm16);
-  void svc(uint32_t imm24);
+  virtual void bkpt(uint16_t imm16) = 0;
+  virtual void svc(uint32_t imm24) = 0;
+
+  virtual void it(Condition firstcond, ItState i1 = kItOmitted,
+                  ItState i2 = kItOmitted, ItState i3 = kItOmitted) {
+    // Ignored if not supported.
+  }
+
+  virtual void cbz(Register rn, Label* target) = 0;
+  virtual void cbnz(Register rn, Label* target) = 0;
 
   // Floating point instructions (VFPv3-D16 and VFPv3-D32 profiles).
-  void vmovsr(SRegister sn, Register rt, Condition cond = AL);
-  void vmovrs(Register rt, SRegister sn, Condition cond = AL);
-  void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL);
-  void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL);
-  void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL);
-  void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL);
-  void vmovs(SRegister sd, SRegister sm, Condition cond = AL);
-  void vmovd(DRegister dd, DRegister dm, Condition cond = AL);
+  virtual void vmovsr(SRegister sn, Register rt, Condition cond = AL) = 0;
+  virtual void vmovrs(Register rt, SRegister sn, Condition cond = AL) = 0;
+  virtual void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL) = 0;
+  virtual void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL) = 0;
+  virtual void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL) = 0;
+  virtual void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL) = 0;
+  virtual void vmovs(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vmovd(DRegister dd, DRegister dm, Condition cond = AL) = 0;
 
   // Returns false if the immediate cannot be encoded.
-  bool vmovs(SRegister sd, float s_imm, Condition cond = AL);
-  bool vmovd(DRegister dd, double d_imm, Condition cond = AL);
+  virtual bool vmovs(SRegister sd, float s_imm, Condition cond = AL) = 0;
+  virtual bool vmovd(DRegister dd, double d_imm, Condition cond = AL) = 0;
 
-  void vldrs(SRegister sd, Address ad, Condition cond = AL);
-  void vstrs(SRegister sd, Address ad, Condition cond = AL);
-  void vldrd(DRegister dd, Address ad, Condition cond = AL);
-  void vstrd(DRegister dd, Address ad, Condition cond = AL);
+  virtual void vldrs(SRegister sd, const Address& ad, Condition cond = AL) = 0;
+  virtual void vstrs(SRegister sd, const Address& ad, Condition cond = AL) = 0;
+  virtual void vldrd(DRegister dd, const Address& ad, Condition cond = AL) = 0;
+  virtual void vstrd(DRegister dd, const Address& ad, Condition cond = AL) = 0;
 
-  void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
-  void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
-  void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
-  void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
-  void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
-  void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
-  void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
-  void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
-  void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
-  void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
-  void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
-  void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
+  virtual void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) = 0;
+  virtual void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) = 0;
+  virtual void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) = 0;
+  virtual void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) = 0;
+  virtual void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) = 0;
+  virtual void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) = 0;
+  virtual void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) = 0;
+  virtual void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) = 0;
+  virtual void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) = 0;
+  virtual void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) = 0;
+  virtual void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) = 0;
+  virtual void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) = 0;
 
-  void vabss(SRegister sd, SRegister sm, Condition cond = AL);
-  void vabsd(DRegister dd, DRegister dm, Condition cond = AL);
-  void vnegs(SRegister sd, SRegister sm, Condition cond = AL);
-  void vnegd(DRegister dd, DRegister dm, Condition cond = AL);
-  void vsqrts(SRegister sd, SRegister sm, Condition cond = AL);
-  void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL);
+  virtual void vabss(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vabsd(DRegister dd, DRegister dm, Condition cond = AL) = 0;
+  virtual void vnegs(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vnegd(DRegister dd, DRegister dm, Condition cond = AL) = 0;
+  virtual void vsqrts(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL) = 0;
 
-  void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL);
-  void vcvtds(DRegister dd, SRegister sm, Condition cond = AL);
-  void vcvtis(SRegister sd, SRegister sm, Condition cond = AL);
-  void vcvtid(SRegister sd, DRegister dm, Condition cond = AL);
-  void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL);
-  void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL);
-  void vcvtus(SRegister sd, SRegister sm, Condition cond = AL);
-  void vcvtud(SRegister sd, DRegister dm, Condition cond = AL);
-  void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL);
-  void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL);
+  virtual void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL) = 0;
+  virtual void vcvtds(DRegister dd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vcvtis(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vcvtid(SRegister sd, DRegister dm, Condition cond = AL) = 0;
+  virtual void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vcvtus(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vcvtud(SRegister sd, DRegister dm, Condition cond = AL) = 0;
+  virtual void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL) = 0;
 
-  void vcmps(SRegister sd, SRegister sm, Condition cond = AL);
-  void vcmpd(DRegister dd, DRegister dm, Condition cond = AL);
-  void vcmpsz(SRegister sd, Condition cond = AL);
-  void vcmpdz(DRegister dd, Condition cond = AL);
-  void vmstat(Condition cond = AL);  // VMRS APSR_nzcv, FPSCR
+  virtual void vcmps(SRegister sd, SRegister sm, Condition cond = AL) = 0;
+  virtual void vcmpd(DRegister dd, DRegister dm, Condition cond = AL) = 0;
+  virtual void vcmpsz(SRegister sd, Condition cond = AL) = 0;
+  virtual void vcmpdz(DRegister dd, Condition cond = AL) = 0;
+  virtual void vmstat(Condition cond = AL) = 0;  // VMRS APSR_nzcv, FPSCR
+
+  virtual void vpushs(SRegister reg, int nregs, Condition cond = AL) = 0;
+  virtual void vpushd(DRegister reg, int nregs, Condition cond = AL) = 0;
+  virtual void vpops(SRegister reg, int nregs, Condition cond = AL) = 0;
+  virtual void vpopd(DRegister reg, int nregs, Condition cond = AL) = 0;
 
   // Branch instructions.
-  void b(Label* label, Condition cond = AL);
-  void bl(Label* label, Condition cond = AL);
-  void blx(Register rm, Condition cond = AL);
-  void bx(Register rm, Condition cond = AL);
+  virtual void b(Label* label, Condition cond = AL) = 0;
+  virtual void bl(Label* label, Condition cond = AL) = 0;
+  virtual void blx(Register rm, Condition cond = AL) = 0;
+  virtual void bx(Register rm, Condition cond = AL) = 0;
+
+  void Pad(uint32_t bytes);
 
   // Macros.
+  // Most of these are pure virtual as they need to be implemented per instruction set.
+
   // Add signed constant value to rd. May clobber IP.
-  void AddConstant(Register rd, int32_t value, Condition cond = AL);
-  void AddConstant(Register rd, Register rn, int32_t value,
-                   Condition cond = AL);
-  void AddConstantSetFlags(Register rd, Register rn, int32_t value,
-                           Condition cond = AL);
-  void AddConstantWithCarry(Register rd, Register rn, int32_t value,
-                            Condition cond = AL);
+  virtual void AddConstant(Register rd, int32_t value, Condition cond = AL) = 0;
+  virtual void AddConstant(Register rd, Register rn, int32_t value,
+                           Condition cond = AL) = 0;
+  virtual void AddConstantSetFlags(Register rd, Register rn, int32_t value,
+                                   Condition cond = AL) = 0;
+  virtual void AddConstantWithCarry(Register rd, Register rn, int32_t value,
+                                    Condition cond = AL) = 0;
 
   // Load and Store. May clobber IP.
-  void LoadImmediate(Register rd, int32_t value, Condition cond = AL);
-  void LoadSImmediate(SRegister sd, float value, Condition cond = AL);
-  void LoadDImmediate(DRegister dd, double value,
-                      Register scratch, Condition cond = AL);
-  void MarkExceptionHandler(Label* label);
-  void LoadFromOffset(LoadOperandType type,
-                      Register reg,
-                      Register base,
-                      int32_t offset,
-                      Condition cond = AL);
-  void StoreToOffset(StoreOperandType type,
-                     Register reg,
-                     Register base,
-                     int32_t offset,
-                     Condition cond = AL);
-  void LoadSFromOffset(SRegister reg,
-                       Register base,
-                       int32_t offset,
-                       Condition cond = AL);
-  void StoreSToOffset(SRegister reg,
-                      Register base,
-                      int32_t offset,
-                      Condition cond = AL);
-  void LoadDFromOffset(DRegister reg,
-                       Register base,
-                       int32_t offset,
-                       Condition cond = AL);
-  void StoreDToOffset(DRegister reg,
-                      Register base,
-                      int32_t offset,
-                      Condition cond = AL);
+  virtual void LoadImmediate(Register rd, int32_t value, Condition cond = AL) = 0;
+  virtual void LoadSImmediate(SRegister sd, float value, Condition cond = AL) = 0;
+  virtual void LoadDImmediate(DRegister dd, double value,
+                              Register scratch, Condition cond = AL) = 0;
+  virtual void MarkExceptionHandler(Label* label) = 0;
+  virtual void LoadFromOffset(LoadOperandType type,
+                              Register reg,
+                              Register base,
+                              int32_t offset,
+                              Condition cond = AL) = 0;
+  virtual void StoreToOffset(StoreOperandType type,
+                             Register reg,
+                             Register base,
+                             int32_t offset,
+                             Condition cond = AL) = 0;
+  virtual void LoadSFromOffset(SRegister reg,
+                               Register base,
+                               int32_t offset,
+                               Condition cond = AL) = 0;
+  virtual void StoreSToOffset(SRegister reg,
+                              Register base,
+                              int32_t offset,
+                              Condition cond = AL) = 0;
+  virtual void LoadDFromOffset(DRegister reg,
+                               Register base,
+                               int32_t offset,
+                               Condition cond = AL) = 0;
+  virtual void StoreDToOffset(DRegister reg,
+                              Register base,
+                              int32_t offset,
+                              Condition cond = AL) = 0;
 
-  void Push(Register rd, Condition cond = AL);
-  void Pop(Register rd, Condition cond = AL);
+  virtual void Push(Register rd, Condition cond = AL) = 0;
+  virtual void Pop(Register rd, Condition cond = AL) = 0;
 
-  void PushList(RegList regs, Condition cond = AL);
-  void PopList(RegList regs, Condition cond = AL);
+  virtual void PushList(RegList regs, Condition cond = AL) = 0;
+  virtual void PopList(RegList regs, Condition cond = AL) = 0;
 
-  void Mov(Register rd, Register rm, Condition cond = AL);
+  virtual void Mov(Register rd, Register rm, Condition cond = AL) = 0;
 
   // Convenience shift instructions. Use mov instruction with shifter operand
   // for variants setting the status flags or using a register shift count.
-  void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Lsr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Asr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Ror(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
-  void Rrx(Register rd, Register rm, Condition cond = AL);
+  virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
+  virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
+  virtual void Asr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
+  virtual void Ror(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL) = 0;
+  virtual void Rrx(Register rd, Register rm, Condition cond = AL) = 0;
 
-  // Encode a signed constant in tst instructions, only affecting the flags.
-  void EncodeUint32InTstInstructions(uint32_t data);
-  // ... and decode from a pc pointing to the start of encoding instructions.
-  static uint32_t DecodeUint32FromTstInstructions(uword pc);
   static bool IsInstructionForExceptionHandling(uword pc);
 
-  // Emit data (e.g. encoded instruction or immediate) to the
-  // instruction stream.
-  void Emit(int32_t value);
-  void Bind(Label* label);
+  virtual void Bind(Label* label) = 0;
+
+  virtual void CompareAndBranchIfZero(Register r, Label* label) = 0;
+  virtual void CompareAndBranchIfNonZero(Register r, Label* label) = 0;
 
   //
   // Overridden common assembler high-level functionality
@@ -445,7 +568,7 @@
 
   // Emit code that will remove an activation from the stack
   void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs)
-      OVERRIDE;
+    OVERRIDE;
 
   void IncreaseFrameSize(size_t adjust) OVERRIDE;
   void DecreaseFrameSize(size_t adjust) OVERRIDE;
@@ -509,8 +632,6 @@
   void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
             ManagedRegister scratch, size_t size) OVERRIDE;
 
-  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
-
   // Sign extension
   void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
 
@@ -550,81 +671,9 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
- private:
-  void EmitType01(Condition cond,
-                  int type,
-                  Opcode opcode,
-                  int set_cc,
-                  Register rn,
-                  Register rd,
-                  ShifterOperand so);
+  static uint32_t ModifiedImmediate(uint32_t value);
 
-  void EmitType5(Condition cond, int offset, bool link);
-
-  void EmitMemOp(Condition cond,
-                 bool load,
-                 bool byte,
-                 Register rd,
-                 Address ad);
-
-  void EmitMemOpAddressMode3(Condition cond,
-                             int32_t mode,
-                             Register rd,
-                             Address ad);
-
-  void EmitMultiMemOp(Condition cond,
-                      BlockAddressMode am,
-                      bool load,
-                      Register base,
-                      RegList regs);
-
-  void EmitShiftImmediate(Condition cond,
-                          Shift opcode,
-                          Register rd,
-                          Register rm,
-                          ShifterOperand so);
-
-  void EmitShiftRegister(Condition cond,
-                         Shift opcode,
-                         Register rd,
-                         Register rm,
-                         ShifterOperand so);
-
-  void EmitMulOp(Condition cond,
-                 int32_t opcode,
-                 Register rd,
-                 Register rn,
-                 Register rm,
-                 Register rs);
-
-  void EmitVFPsss(Condition cond,
-                  int32_t opcode,
-                  SRegister sd,
-                  SRegister sn,
-                  SRegister sm);
-
-  void EmitVFPddd(Condition cond,
-                  int32_t opcode,
-                  DRegister dd,
-                  DRegister dn,
-                  DRegister dm);
-
-  void EmitVFPsd(Condition cond,
-                 int32_t opcode,
-                 SRegister sd,
-                 DRegister dm);
-
-  void EmitVFPds(Condition cond,
-                 int32_t opcode,
-                 DRegister dd,
-                 SRegister sm);
-
-  void EmitBranch(Condition cond, Label* label, bool link);
-  static int32_t EncodeBranchOffset(int offset, int32_t inst);
-  static int DecodeBranchOffset(int32_t inst);
-  int32_t EncodeTstOffset(int offset, int32_t inst);
-  int DecodeTstOffset(int32_t inst);
-
+ protected:
   // Returns whether or not the given register is used for passing parameters.
   static int RegisterCompare(const Register* reg1, const Register* reg2) {
     return *reg1 - *reg2;
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
new file mode 100644
index 0000000..b2bb20f
--- /dev/null
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -0,0 +1,1423 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_arm32.h"
+
+#include "base/logging.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "offsets.h"
+#include "thread.h"
+#include "utils.h"
+
+namespace art {
+namespace arm {
+
+void Arm32Assembler::and_(Register rd, Register rn, const ShifterOperand& so,
+                        Condition cond) {
+  EmitType01(cond, so.type(), AND, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::eor(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), EOR, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::sub(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), SUB, 0, rn, rd, so);
+}
+
+void Arm32Assembler::rsb(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), RSB, 0, rn, rd, so);
+}
+
+void Arm32Assembler::rsbs(Register rd, Register rn, const ShifterOperand& so,
+                        Condition cond) {
+  EmitType01(cond, so.type(), RSB, 1, rn, rd, so);
+}
+
+
+void Arm32Assembler::add(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), ADD, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::adds(Register rd, Register rn, const ShifterOperand& so,
+                        Condition cond) {
+  EmitType01(cond, so.type(), ADD, 1, rn, rd, so);
+}
+
+
+void Arm32Assembler::subs(Register rd, Register rn, const ShifterOperand& so,
+                        Condition cond) {
+  EmitType01(cond, so.type(), SUB, 1, rn, rd, so);
+}
+
+
+void Arm32Assembler::adc(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), ADC, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::sbc(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), SBC, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::rsc(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), RSC, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::tst(Register rn, const ShifterOperand& so, Condition cond) {
+  CHECK_NE(rn, PC);  // Reserve tst pc instruction for exception handler marker.
+  EmitType01(cond, so.type(), TST, 1, rn, R0, so);
+}
+
+
+void Arm32Assembler::teq(Register rn, const ShifterOperand& so, Condition cond) {
+  CHECK_NE(rn, PC);  // Reserve teq pc instruction for exception handler marker.
+  EmitType01(cond, so.type(), TEQ, 1, rn, R0, so);
+}
+
+
+void Arm32Assembler::cmp(Register rn, const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), CMP, 1, rn, R0, so);
+}
+
+
+void Arm32Assembler::cmn(Register rn, const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), CMN, 1, rn, R0, so);
+}
+
+
+void Arm32Assembler::orr(Register rd, Register rn,
+                    const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), ORR, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::orrs(Register rd, Register rn,
+                        const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), ORR, 1, rn, rd, so);
+}
+
+
+void Arm32Assembler::mov(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), MOV, 0, R0, rd, so);
+}
+
+
+void Arm32Assembler::movs(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), MOV, 1, R0, rd, so);
+}
+
+
+void Arm32Assembler::bic(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitType01(cond, so.type(), BIC, 0, rn, rd, so);
+}
+
+
+void Arm32Assembler::mvn(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), MVN, 0, R0, rd, so);
+}
+
+
+void Arm32Assembler::mvns(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitType01(cond, so.type(), MVN, 1, R0, rd, so);
+}
+
+
+void Arm32Assembler::mul(Register rd, Register rn, Register rm, Condition cond) {
+  // Assembler registers rd, rn, rm are encoded as rn, rm, rs.
+  EmitMulOp(cond, 0, R0, rd, rn, rm);
+}
+
+
+void Arm32Assembler::mla(Register rd, Register rn, Register rm, Register ra,
+                         Condition cond) {
+  // Assembler registers rd, rn, rm, ra are encoded as rn, rm, rs, rd.
+  EmitMulOp(cond, B21, ra, rd, rn, rm);
+}
+
+
+void Arm32Assembler::mls(Register rd, Register rn, Register rm, Register ra,
+                         Condition cond) {
+  // Assembler registers rd, rn, rm, ra are encoded as rn, rm, rs, rd.
+  EmitMulOp(cond, B22 | B21, ra, rd, rn, rm);
+}
+
+
+void Arm32Assembler::umull(Register rd_lo, Register rd_hi, Register rn,
+                           Register rm, Condition cond) {
+  // Assembler registers rd_lo, rd_hi, rn, rm are encoded as rd, rn, rm, rs.
+  EmitMulOp(cond, B23, rd_lo, rd_hi, rn, rm);
+}
+
+
+void Arm32Assembler::sdiv(Register rd, Register rn, Register rm, Condition cond) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = B26 | B25 | B24 | B20 |
+      B15 | B14 | B13 | B12 |
+      (static_cast<int32_t>(cond) << kConditionShift) |
+      (static_cast<int32_t>(rn) << 0) |
+      (static_cast<int32_t>(rd) << 16) |
+      (static_cast<int32_t>(rm) << 8) |
+      B4;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::udiv(Register rd, Register rn, Register rm, Condition cond) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = B26 | B25 | B24 | B21 | B20 |
+      B15 | B14 | B13 | B12 |
+      (static_cast<int32_t>(cond) << kConditionShift) |
+      (static_cast<int32_t>(rn) << 0) |
+      (static_cast<int32_t>(rd) << 16) |
+      (static_cast<int32_t>(rm) << 8) |
+      B4;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::ldr(Register rd, const Address& ad, Condition cond) {
+  EmitMemOp(cond, true, false, rd, ad);
+}
+
+
+void Arm32Assembler::str(Register rd, const Address& ad, Condition cond) {
+  EmitMemOp(cond, false, false, rd, ad);
+}
+
+
+void Arm32Assembler::ldrb(Register rd, const Address& ad, Condition cond) {
+  EmitMemOp(cond, true, true, rd, ad);
+}
+
+
+void Arm32Assembler::strb(Register rd, const Address& ad, Condition cond) {
+  EmitMemOp(cond, false, true, rd, ad);
+}
+
+
+void Arm32Assembler::ldrh(Register rd, const Address& ad, Condition cond) {
+  EmitMemOpAddressMode3(cond, L | B7 | H | B4, rd, ad);
+}
+
+
+void Arm32Assembler::strh(Register rd, const Address& ad, Condition cond) {
+  EmitMemOpAddressMode3(cond, B7 | H | B4, rd, ad);
+}
+
+
+void Arm32Assembler::ldrsb(Register rd, const Address& ad, Condition cond) {
+  EmitMemOpAddressMode3(cond, L | B7 | B6 | B4, rd, ad);
+}
+
+
+void Arm32Assembler::ldrsh(Register rd, const Address& ad, Condition cond) {
+  EmitMemOpAddressMode3(cond, L | B7 | B6 | H | B4, rd, ad);
+}
+
+
+void Arm32Assembler::ldrd(Register rd, const Address& ad, Condition cond) {
+  CHECK_EQ(rd % 2, 0);
+  EmitMemOpAddressMode3(cond, B7 | B6 | B4, rd, ad);
+}
+
+
+void Arm32Assembler::strd(Register rd, const Address& ad, Condition cond) {
+  CHECK_EQ(rd % 2, 0);
+  EmitMemOpAddressMode3(cond, B7 | B6 | B5 | B4, rd, ad);
+}
+
+
+void Arm32Assembler::ldm(BlockAddressMode am,
+                       Register base,
+                       RegList regs,
+                       Condition cond) {
+  EmitMultiMemOp(cond, am, true, base, regs);
+}
+
+
+void Arm32Assembler::stm(BlockAddressMode am,
+                       Register base,
+                       RegList regs,
+                       Condition cond) {
+  EmitMultiMemOp(cond, am, false, base, regs);
+}
+
+
+void Arm32Assembler::vmovs(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vmovd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B6, dd, D0, dm);
+}
+
+
+bool Arm32Assembler::vmovs(SRegister sd, float s_imm, Condition cond) {
+  uint32_t imm32 = bit_cast<uint32_t, float>(s_imm);
+  if (((imm32 & ((1 << 19) - 1)) == 0) &&
+      ((((imm32 >> 25) & ((1 << 6) - 1)) == (1 << 5)) ||
+       (((imm32 >> 25) & ((1 << 6) - 1)) == ((1 << 5) -1)))) {
+    uint8_t imm8 = ((imm32 >> 31) << 7) | (((imm32 >> 29) & 1) << 6) |
+        ((imm32 >> 19) & ((1 << 6) -1));
+    EmitVFPsss(cond, B23 | B21 | B20 | ((imm8 >> 4)*B16) | (imm8 & 0xf),
+               sd, S0, S0);
+    return true;
+  }
+  return false;
+}
+
+
+bool Arm32Assembler::vmovd(DRegister dd, double d_imm, Condition cond) {
+  uint64_t imm64 = bit_cast<uint64_t, double>(d_imm);
+  if (((imm64 & ((1LL << 48) - 1)) == 0) &&
+      ((((imm64 >> 54) & ((1 << 9) - 1)) == (1 << 8)) ||
+       (((imm64 >> 54) & ((1 << 9) - 1)) == ((1 << 8) -1)))) {
+    uint8_t imm8 = ((imm64 >> 63) << 7) | (((imm64 >> 61) & 1) << 6) |
+        ((imm64 >> 48) & ((1 << 6) -1));
+    EmitVFPddd(cond, B23 | B21 | B20 | ((imm8 >> 4)*B16) | B8 | (imm8 & 0xf),
+               dd, D0, D0);
+    return true;
+  }
+  return false;
+}
+
+
+void Arm32Assembler::vadds(SRegister sd, SRegister sn, SRegister sm,
+                           Condition cond) {
+  EmitVFPsss(cond, B21 | B20, sd, sn, sm);
+}
+
+
+void Arm32Assembler::vaddd(DRegister dd, DRegister dn, DRegister dm,
+                           Condition cond) {
+  EmitVFPddd(cond, B21 | B20, dd, dn, dm);
+}
+
+
+void Arm32Assembler::vsubs(SRegister sd, SRegister sn, SRegister sm,
+                           Condition cond) {
+  EmitVFPsss(cond, B21 | B20 | B6, sd, sn, sm);
+}
+
+
+void Arm32Assembler::vsubd(DRegister dd, DRegister dn, DRegister dm,
+                           Condition cond) {
+  EmitVFPddd(cond, B21 | B20 | B6, dd, dn, dm);
+}
+
+
+void Arm32Assembler::vmuls(SRegister sd, SRegister sn, SRegister sm,
+                           Condition cond) {
+  EmitVFPsss(cond, B21, sd, sn, sm);
+}
+
+
+void Arm32Assembler::vmuld(DRegister dd, DRegister dn, DRegister dm,
+                           Condition cond) {
+  EmitVFPddd(cond, B21, dd, dn, dm);
+}
+
+
+void Arm32Assembler::vmlas(SRegister sd, SRegister sn, SRegister sm,
+                           Condition cond) {
+  EmitVFPsss(cond, 0, sd, sn, sm);
+}
+
+
+void Arm32Assembler::vmlad(DRegister dd, DRegister dn, DRegister dm,
+                           Condition cond) {
+  EmitVFPddd(cond, 0, dd, dn, dm);
+}
+
+
+void Arm32Assembler::vmlss(SRegister sd, SRegister sn, SRegister sm,
+                           Condition cond) {
+  EmitVFPsss(cond, B6, sd, sn, sm);
+}
+
+
+void Arm32Assembler::vmlsd(DRegister dd, DRegister dn, DRegister dm,
+                           Condition cond) {
+  EmitVFPddd(cond, B6, dd, dn, dm);
+}
+
+
+void Arm32Assembler::vdivs(SRegister sd, SRegister sn, SRegister sm,
+                           Condition cond) {
+  EmitVFPsss(cond, B23, sd, sn, sm);
+}
+
+
+void Arm32Assembler::vdivd(DRegister dd, DRegister dn, DRegister dm,
+                           Condition cond) {
+  EmitVFPddd(cond, B23, dd, dn, dm);
+}
+
+
+void Arm32Assembler::vabss(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B7 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vabsd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B7 | B6, dd, D0, dm);
+}
+
+
+void Arm32Assembler::vnegs(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B16 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vnegd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B16 | B6, dd, D0, dm);
+}
+
+
+void Arm32Assembler::vsqrts(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B16 | B7 | B6, sd, S0, sm);
+}
+
+void Arm32Assembler::vsqrtd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B16 | B7 | B6, dd, D0, dm);
+}
+
+
+void Arm32Assembler::vcvtsd(SRegister sd, DRegister dm, Condition cond) {
+  EmitVFPsd(cond, B23 | B21 | B20 | B18 | B17 | B16 | B8 | B7 | B6, sd, dm);
+}
+
+
+void Arm32Assembler::vcvtds(DRegister dd, SRegister sm, Condition cond) {
+  EmitVFPds(cond, B23 | B21 | B20 | B18 | B17 | B16 | B7 | B6, dd, sm);
+}
+
+
+void Arm32Assembler::vcvtis(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B18 | B16 | B7 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vcvtid(SRegister sd, DRegister dm, Condition cond) {
+  EmitVFPsd(cond, B23 | B21 | B20 | B19 | B18 | B16 | B8 | B7 | B6, sd, dm);
+}
+
+
+void Arm32Assembler::vcvtsi(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B7 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vcvtdi(DRegister dd, SRegister sm, Condition cond) {
+  EmitVFPds(cond, B23 | B21 | B20 | B19 | B8 | B7 | B6, dd, sm);
+}
+
+
+void Arm32Assembler::vcvtus(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B18 | B7 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vcvtud(SRegister sd, DRegister dm, Condition cond) {
+  EmitVFPsd(cond, B23 | B21 | B20 | B19 | B18 | B8 | B7 | B6, sd, dm);
+}
+
+
+void Arm32Assembler::vcvtsu(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vcvtdu(DRegister dd, SRegister sm, Condition cond) {
+  EmitVFPds(cond, B23 | B21 | B20 | B19 | B8 | B6, dd, sm);
+}
+
+
+void Arm32Assembler::vcmps(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B18 | B6, sd, S0, sm);
+}
+
+
+void Arm32Assembler::vcmpd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B18 | B6, dd, D0, dm);
+}
+
+
+void Arm32Assembler::vcmpsz(SRegister sd, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B18 | B16 | B6, sd, S0, S0);
+}
+
+
+void Arm32Assembler::vcmpdz(DRegister dd, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B18 | B16 | B6, dd, D0, D0);
+}
+
+void Arm32Assembler::b(Label* label, Condition cond) {
+  EmitBranch(cond, label, false);
+}
+
+
+void Arm32Assembler::bl(Label* label, Condition cond) {
+  EmitBranch(cond, label, true);
+}
+
+
+void Arm32Assembler::MarkExceptionHandler(Label* label) {
+  EmitType01(AL, 1, TST, 1, PC, R0, ShifterOperand(0));
+  Label l;
+  b(&l);
+  EmitBranch(AL, label, false);
+  Bind(&l);
+}
+
+
+void Arm32Assembler::Emit(int32_t value) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  buffer_.Emit<int32_t>(value);
+}
+
+
+void Arm32Assembler::EmitType01(Condition cond,
+                                int type,
+                                Opcode opcode,
+                                int set_cc,
+                                Register rn,
+                                Register rd,
+                                const ShifterOperand& so) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
+                     type << kTypeShift |
+                     static_cast<int32_t>(opcode) << kOpcodeShift |
+                     set_cc << kSShift |
+                     static_cast<int32_t>(rn) << kRnShift |
+                     static_cast<int32_t>(rd) << kRdShift |
+                     so.encodingArm();
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitType5(Condition cond, int offset, bool link) {
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
+                     5 << kTypeShift |
+                     (link ? 1 : 0) << kLinkShift;
+  Emit(Arm32Assembler::EncodeBranchOffset(offset, encoding));
+}
+
+
+void Arm32Assembler::EmitMemOp(Condition cond,
+                             bool load,
+                             bool byte,
+                             Register rd,
+                             const Address& ad) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  const Address& addr = static_cast<const Address&>(ad);
+
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B26 |
+                     (load ? L : 0) |
+                     (byte ? B : 0) |
+                     (static_cast<int32_t>(rd) << kRdShift) |
+                     addr.encodingArm();
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitMemOpAddressMode3(Condition cond,
+                                           int32_t mode,
+                                           Register rd,
+                                           const Address& ad) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  const Address& addr = static_cast<const Address&>(ad);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B22  |
+                     mode |
+                     (static_cast<int32_t>(rd) << kRdShift) |
+                     addr.encoding3();
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitMultiMemOp(Condition cond,
+                                    BlockAddressMode am,
+                                    bool load,
+                                    Register base,
+                                    RegList regs) {
+  CHECK_NE(base, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 |
+                     am |
+                     (load ? L : 0) |
+                     (static_cast<int32_t>(base) << kRnShift) |
+                     regs;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitShiftImmediate(Condition cond,
+                                        Shift opcode,
+                                        Register rd,
+                                        Register rm,
+                                        const ShifterOperand& so) {
+  CHECK_NE(cond, kNoCondition);
+  CHECK(so.IsImmediate());
+  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
+                     static_cast<int32_t>(MOV) << kOpcodeShift |
+                     static_cast<int32_t>(rd) << kRdShift |
+                     so.encodingArm() << kShiftImmShift |
+                     static_cast<int32_t>(opcode) << kShiftShift |
+                     static_cast<int32_t>(rm);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitShiftRegister(Condition cond,
+                                       Shift opcode,
+                                       Register rd,
+                                       Register rm,
+                                       const ShifterOperand& so) {
+  CHECK_NE(cond, kNoCondition);
+  CHECK(so.IsRegister());
+  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
+                     static_cast<int32_t>(MOV) << kOpcodeShift |
+                     static_cast<int32_t>(rd) << kRdShift |
+                     so.encodingArm() << kShiftRegisterShift |
+                     static_cast<int32_t>(opcode) << kShiftShift |
+                     B4 |
+                     static_cast<int32_t>(rm);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitBranch(Condition cond, Label* label, bool link) {
+  if (label->IsBound()) {
+    EmitType5(cond, label->Position() - buffer_.Size(), link);
+  } else {
+    int position = buffer_.Size();
+    // Use the offset field of the branch instruction for linking the sites.
+    EmitType5(cond, label->position_, link);
+    label->LinkTo(position);
+  }
+}
+
+
+void Arm32Assembler::clz(Register rd, Register rm, Condition cond) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  CHECK_NE(rd, PC);
+  CHECK_NE(rm, PC);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B24 | B22 | B21 | (0xf << 16) |
+                     (static_cast<int32_t>(rd) << kRdShift) |
+                     (0xf << 8) | B4 | static_cast<int32_t>(rm);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::movw(Register rd, uint16_t imm16, Condition cond) {
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
+                     B25 | B24 | ((imm16 >> 12) << 16) |
+                     static_cast<int32_t>(rd) << kRdShift | (imm16 & 0xfff);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::movt(Register rd, uint16_t imm16, Condition cond) {
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = static_cast<int32_t>(cond) << kConditionShift |
+                     B25 | B24 | B22 | ((imm16 >> 12) << 16) |
+                     static_cast<int32_t>(rd) << kRdShift | (imm16 & 0xfff);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitMulOp(Condition cond, int32_t opcode,
+                               Register rd, Register rn,
+                               Register rm, Register rs) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(rs, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = opcode |
+      (static_cast<int32_t>(cond) << kConditionShift) |
+      (static_cast<int32_t>(rn) << kRnShift) |
+      (static_cast<int32_t>(rd) << kRdShift) |
+      (static_cast<int32_t>(rs) << kRsShift) |
+      B7 | B4 |
+      (static_cast<int32_t>(rm) << kRmShift);
+  Emit(encoding);
+}
+
+void Arm32Assembler::ldrex(Register rt, Register rn, Condition cond) {
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B24 |
+                     B23 |
+                     L   |
+                     (static_cast<int32_t>(rn) << kLdExRnShift) |
+                     (static_cast<int32_t>(rt) << kLdExRtShift) |
+                     B11 | B10 | B9 | B8 | B7 | B4 | B3 | B2 | B1 | B0;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::strex(Register rd,
+                           Register rt,
+                           Register rn,
+                           Condition cond) {
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B24 |
+                     B23 |
+                     (static_cast<int32_t>(rn) << kStrExRnShift) |
+                     (static_cast<int32_t>(rd) << kStrExRdShift) |
+                     B11 | B10 | B9 | B8 | B7 | B4 |
+                     (static_cast<int32_t>(rt) << kStrExRtShift);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::clrex(Condition cond) {
+  CHECK_EQ(cond, AL);   // This cannot be conditional on ARM.
+  int32_t encoding = (kSpecialCondition << kConditionShift) |
+                     B26 | B24 | B22 | B21 | B20 | (0xff << 12) | B4 | 0xf;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::nop(Condition cond) {
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B25 | B24 | B21 | (0xf << 12);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vmovsr(SRegister sn, Register rt, Condition cond) {
+  CHECK_NE(sn, kNoSRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 |
+                     ((static_cast<int32_t>(sn) >> 1)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sn) & 1)*B7) | B4;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vmovrs(Register rt, SRegister sn, Condition cond) {
+  CHECK_NE(sn, kNoSRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B20 |
+                     ((static_cast<int32_t>(sn) >> 1)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sn) & 1)*B7) | B4;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vmovsrr(SRegister sm, Register rt, Register rt2,
+                             Condition cond) {
+  CHECK_NE(sm, kNoSRegister);
+  CHECK_NE(sm, S31);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sm) & 1)*B5) | B4 |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vmovrrs(Register rt, Register rt2, SRegister sm,
+                             Condition cond) {
+  CHECK_NE(sm, kNoSRegister);
+  CHECK_NE(sm, S31);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CHECK_NE(rt, rt2);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 | B20 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sm) & 1)*B5) | B4 |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vmovdrr(DRegister dm, Register rt, Register rt2,
+                             Condition cond) {
+  CHECK_NE(dm, kNoDRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 | B8 |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) | B4 |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vmovrrd(Register rt, Register rt2, DRegister dm,
+                             Condition cond) {
+  CHECK_NE(dm, kNoDRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CHECK_NE(rt, rt2);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 | B20 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 | B8 |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) | B4 |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vldrs(SRegister sd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(sd, kNoSRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 | B20 |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     B11 | B9 | addr.vencoding();
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vstrs(SRegister sd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(static_cast<Register>(addr.encodingArm() & (0xf << kRnShift)), PC);
+  CHECK_NE(sd, kNoSRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     B11 | B9 | addr.vencoding();
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vldrd(DRegister dd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(dd, kNoDRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 | B20 |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     B11 | B9 | B8 | addr.vencoding();
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vstrd(DRegister dd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(static_cast<Register>(addr.encodingArm() & (0xf << kRnShift)), PC);
+  CHECK_NE(dd, kNoDRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     B11 | B9 | B8 | addr.vencoding();
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::vpushs(SRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, true, false, cond);
+}
+
+
+void Arm32Assembler::vpushd(DRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, true, true, cond);
+}
+
+
+void Arm32Assembler::vpops(SRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, false, false, cond);
+}
+
+
+void Arm32Assembler::vpopd(DRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, false, true, cond);
+}
+
+
+void Arm32Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond) {
+  CHECK_NE(cond, kNoCondition);
+  CHECK_GT(nregs, 0);
+  uint32_t D;
+  uint32_t Vd;
+  if (dbl) {
+    // Encoded as D:Vd.
+    D = (reg >> 4) & 1;
+    Vd = reg & 0b1111;
+  } else {
+    // Encoded as Vd:D.
+    D = reg & 1;
+    Vd = (reg >> 1) & 0b1111;
+  }
+  int32_t encoding = B27 | B26 | B21 | B19 | B18 | B16 |
+                    B11 | B9 |
+        (dbl ? B8 : 0) |
+        (push ? B24 : (B23 | B20)) |
+        static_cast<int32_t>(cond) << kConditionShift |
+        nregs << (dbl ? 1 : 0) |
+        D << 22 |
+        Vd << 12;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitVFPsss(Condition cond, int32_t opcode,
+                                SRegister sd, SRegister sn, SRegister sm) {
+  CHECK_NE(sd, kNoSRegister);
+  CHECK_NE(sn, kNoSRegister);
+  CHECK_NE(sm, kNoSRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | opcode |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sn) >> 1)*B16) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     ((static_cast<int32_t>(sn) & 1)*B7) |
+                     ((static_cast<int32_t>(sm) & 1)*B5) |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitVFPddd(Condition cond, int32_t opcode,
+                                DRegister dd, DRegister dn, DRegister dm) {
+  CHECK_NE(dd, kNoDRegister);
+  CHECK_NE(dn, kNoDRegister);
+  CHECK_NE(dm, kNoDRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | B8 | opcode |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dn) & 0xf)*B16) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     ((static_cast<int32_t>(dn) >> 4)*B7) |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitVFPsd(Condition cond, int32_t opcode,
+                               SRegister sd, DRegister dm) {
+  CHECK_NE(sd, kNoSRegister);
+  CHECK_NE(dm, kNoDRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | opcode |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::EmitVFPds(Condition cond, int32_t opcode,
+                             DRegister dd, SRegister sm) {
+  CHECK_NE(dd, kNoDRegister);
+  CHECK_NE(sm, kNoSRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | opcode |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     ((static_cast<int32_t>(sm) & 1)*B5) |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
+                         Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
+  mov(rd, ShifterOperand(rm, LSL, shift_imm), cond);
+}
+
+
+void Arm32Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
+                         Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
+  if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
+  mov(rd, ShifterOperand(rm, LSR, shift_imm), cond);
+}
+
+
+void Arm32Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
+                         Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
+  if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
+  mov(rd, ShifterOperand(rm, ASR, shift_imm), cond);
+}
+
+
+void Arm32Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
+                         Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
+  mov(rd, ShifterOperand(rm, ROR, shift_imm), cond);
+}
+
+void Arm32Assembler::Rrx(Register rd, Register rm, Condition cond) {
+  mov(rd, ShifterOperand(rm, ROR, 0), cond);
+}
+
+
+void Arm32Assembler::vmstat(Condition cond) {  // VMRS APSR_nzcv, FPSCR
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+      B27 | B26 | B25 | B23 | B22 | B21 | B20 | B16 |
+      (static_cast<int32_t>(PC)*B12) |
+      B11 | B9 | B4;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::svc(uint32_t imm24) {
+  CHECK(IsUint(24, imm24)) << imm24;
+  int32_t encoding = (AL << kConditionShift) | B27 | B26 | B25 | B24 | imm24;
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::bkpt(uint16_t imm16) {
+  int32_t encoding = (AL << kConditionShift) | B24 | B21 |
+                     ((imm16 >> 4) << 8) | B6 | B5 | B4 | (imm16 & 0xf);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::blx(Register rm, Condition cond) {
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B24 | B21 | (0xfff << 8) | B5 | B4 |
+                     (static_cast<int32_t>(rm) << kRmShift);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::bx(Register rm, Condition cond) {
+  CHECK_NE(rm, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B24 | B21 | (0xfff << 8) | B4 |
+                     (static_cast<int32_t>(rm) << kRmShift);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::Push(Register rd, Condition cond) {
+  str(rd, Address(SP, -kRegisterSize, Address::PreIndex), cond);
+}
+
+
+void Arm32Assembler::Pop(Register rd, Condition cond) {
+  ldr(rd, Address(SP, kRegisterSize, Address::PostIndex), cond);
+}
+
+
+void Arm32Assembler::PushList(RegList regs, Condition cond) {
+  stm(DB_W, SP, regs, cond);
+}
+
+
+void Arm32Assembler::PopList(RegList regs, Condition cond) {
+  ldm(IA_W, SP, regs, cond);
+}
+
+
+void Arm32Assembler::Mov(Register rd, Register rm, Condition cond) {
+  if (rd != rm) {
+    mov(rd, ShifterOperand(rm), cond);
+  }
+}
+
+
+void Arm32Assembler::Bind(Label* label) {
+  CHECK(!label->IsBound());
+  int bound_pc = buffer_.Size();
+  while (label->IsLinked()) {
+    int32_t position = label->Position();
+    int32_t next = buffer_.Load<int32_t>(position);
+    int32_t encoded = Arm32Assembler::EncodeBranchOffset(bound_pc - position, next);
+    buffer_.Store<int32_t>(position, encoded);
+    label->position_ = Arm32Assembler::DecodeBranchOffset(next);
+  }
+  label->BindTo(bound_pc);
+}
+
+
+int32_t Arm32Assembler::EncodeBranchOffset(int offset, int32_t inst) {
+  // The offset is off by 8 due to the way the ARM CPUs read PC.
+  offset -= 8;
+  CHECK_ALIGNED(offset, 4);
+  CHECK(IsInt(POPCOUNT(kBranchOffsetMask), offset)) << offset;
+
+  // Properly preserve only the bits supported in the instruction.
+  offset >>= 2;
+  offset &= kBranchOffsetMask;
+  return (inst & ~kBranchOffsetMask) | offset;
+}
+
+
+int Arm32Assembler::DecodeBranchOffset(int32_t inst) {
+  // Sign-extend, left-shift by 2, then add 8.
+  return ((((inst & kBranchOffsetMask) << 8) >> 6) + 8);
+}
+
+
+void Arm32Assembler::AddConstant(Register rd, int32_t value, Condition cond) {
+  AddConstant(rd, rd, value, cond);
+}
+
+
+void Arm32Assembler::AddConstant(Register rd, Register rn, int32_t value,
+                                 Condition cond) {
+  if (value == 0) {
+    if (rd != rn) {
+      mov(rd, ShifterOperand(rn), cond);
+    }
+    return;
+  }
+  // We prefer to select the shorter code sequence rather than selecting add for
+  // positive values and sub for negatives ones, which would slightly improve
+  // the readability of generated code for some constants.
+  ShifterOperand shifter_op;
+  if (ShifterOperand::CanHoldArm(value, &shifter_op)) {
+    add(rd, rn, shifter_op, cond);
+  } else if (ShifterOperand::CanHoldArm(-value, &shifter_op)) {
+    sub(rd, rn, shifter_op, cond);
+  } else {
+    CHECK(rn != IP);
+    if (ShifterOperand::CanHoldArm(~value, &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      add(rd, rn, ShifterOperand(IP), cond);
+    } else if (ShifterOperand::CanHoldArm(~(-value), &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      sub(rd, rn, ShifterOperand(IP), cond);
+    } else {
+      movw(IP, Low16Bits(value), cond);
+      uint16_t value_high = High16Bits(value);
+      if (value_high != 0) {
+        movt(IP, value_high, cond);
+      }
+      add(rd, rn, ShifterOperand(IP), cond);
+    }
+  }
+}
+
+
+void Arm32Assembler::AddConstantSetFlags(Register rd, Register rn, int32_t value,
+                                         Condition cond) {
+  ShifterOperand shifter_op;
+  if (ShifterOperand::CanHoldArm(value, &shifter_op)) {
+    adds(rd, rn, shifter_op, cond);
+  } else if (ShifterOperand::CanHoldArm(-value, &shifter_op)) {
+    subs(rd, rn, shifter_op, cond);
+  } else {
+    CHECK(rn != IP);
+    if (ShifterOperand::CanHoldArm(~value, &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      adds(rd, rn, ShifterOperand(IP), cond);
+    } else if (ShifterOperand::CanHoldArm(~(-value), &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      subs(rd, rn, ShifterOperand(IP), cond);
+    } else {
+      movw(IP, Low16Bits(value), cond);
+      uint16_t value_high = High16Bits(value);
+      if (value_high != 0) {
+        movt(IP, value_high, cond);
+      }
+      adds(rd, rn, ShifterOperand(IP), cond);
+    }
+  }
+}
+
+
+void Arm32Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
+  ShifterOperand shifter_op;
+  if (ShifterOperand::CanHoldArm(value, &shifter_op)) {
+    mov(rd, shifter_op, cond);
+  } else if (ShifterOperand::CanHoldArm(~value, &shifter_op)) {
+    mvn(rd, shifter_op, cond);
+  } else {
+    movw(rd, Low16Bits(value), cond);
+    uint16_t value_high = High16Bits(value);
+    if (value_high != 0) {
+      movt(rd, value_high, cond);
+    }
+  }
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldLoadOffsetArm.
+void Arm32Assembler::LoadFromOffset(LoadOperandType type,
+                                    Register reg,
+                                    Register base,
+                                    int32_t offset,
+                                    Condition cond) {
+  if (!Address::CanHoldLoadOffsetArm(type, offset)) {
+    CHECK(base != IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldLoadOffsetArm(type, offset));
+  switch (type) {
+    case kLoadSignedByte:
+      ldrsb(reg, Address(base, offset), cond);
+      break;
+    case kLoadUnsignedByte:
+      ldrb(reg, Address(base, offset), cond);
+      break;
+    case kLoadSignedHalfword:
+      ldrsh(reg, Address(base, offset), cond);
+      break;
+    case kLoadUnsignedHalfword:
+      ldrh(reg, Address(base, offset), cond);
+      break;
+    case kLoadWord:
+      ldr(reg, Address(base, offset), cond);
+      break;
+    case kLoadWordPair:
+      ldrd(reg, Address(base, offset), cond);
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldLoadOffsetArm, as expected by JIT::GuardedLoadFromOffset.
+void Arm32Assembler::LoadSFromOffset(SRegister reg,
+                                     Register base,
+                                     int32_t offset,
+                                     Condition cond) {
+  if (!Address::CanHoldLoadOffsetArm(kLoadSWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldLoadOffsetArm(kLoadSWord, offset));
+  vldrs(reg, Address(base, offset), cond);
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldLoadOffsetArm, as expected by JIT::GuardedLoadFromOffset.
+void Arm32Assembler::LoadDFromOffset(DRegister reg,
+                                     Register base,
+                                     int32_t offset,
+                                     Condition cond) {
+  if (!Address::CanHoldLoadOffsetArm(kLoadDWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldLoadOffsetArm(kLoadDWord, offset));
+  vldrd(reg, Address(base, offset), cond);
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldStoreOffsetArm.
+void Arm32Assembler::StoreToOffset(StoreOperandType type,
+                                   Register reg,
+                                   Register base,
+                                   int32_t offset,
+                                   Condition cond) {
+  if (!Address::CanHoldStoreOffsetArm(type, offset)) {
+    CHECK(reg != IP);
+    CHECK(base != IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldStoreOffsetArm(type, offset));
+  switch (type) {
+    case kStoreByte:
+      strb(reg, Address(base, offset), cond);
+      break;
+    case kStoreHalfword:
+      strh(reg, Address(base, offset), cond);
+      break;
+    case kStoreWord:
+      str(reg, Address(base, offset), cond);
+      break;
+    case kStoreWordPair:
+      strd(reg, Address(base, offset), cond);
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldStoreOffsetArm, as expected by JIT::GuardedStoreToOffset.
+void Arm32Assembler::StoreSToOffset(SRegister reg,
+                                    Register base,
+                                    int32_t offset,
+                                    Condition cond) {
+  if (!Address::CanHoldStoreOffsetArm(kStoreSWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldStoreOffsetArm(kStoreSWord, offset));
+  vstrs(reg, Address(base, offset), cond);
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldStoreOffsetArm, as expected by JIT::GuardedStoreSToOffset.
+void Arm32Assembler::StoreDToOffset(DRegister reg,
+                                    Register base,
+                                    int32_t offset,
+                                    Condition cond) {
+  if (!Address::CanHoldStoreOffsetArm(kStoreDWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldStoreOffsetArm(kStoreDWord, offset));
+  vstrd(reg, Address(base, offset), cond);
+}
+
+
+void Arm32Assembler::MemoryBarrier(ManagedRegister mscratch) {
+  CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12);
+#if ANDROID_SMP != 0
+  int32_t encoding = 0xf57ff05f;  // dmb
+  Emit(encoding);
+#endif
+}
+
+
+void Arm32Assembler::cbz(Register rn, Label* target) {
+  LOG(FATAL) << "cbz is not supported on ARM32";
+}
+
+
+void Arm32Assembler::cbnz(Register rn, Label* target) {
+  LOG(FATAL) << "cbnz is not supported on ARM32";
+}
+
+
+void Arm32Assembler::CompareAndBranchIfZero(Register r, Label* label) {
+  cmp(r, ShifterOperand(0));
+  b(label, EQ);
+}
+
+
+void Arm32Assembler::CompareAndBranchIfNonZero(Register r, Label* label) {
+  cmp(r, ShifterOperand(0));
+  b(label, NE);
+}
+
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
new file mode 100644
index 0000000..7a0fce2
--- /dev/null
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -0,0 +1,352 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM32_H_
+#define ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM32_H_
+
+#include <vector>
+
+#include "base/logging.h"
+#include "constants_arm.h"
+#include "utils/arm/managed_register_arm.h"
+#include "utils/arm/assembler_arm.h"
+#include "offsets.h"
+#include "utils.h"
+
+namespace art {
+namespace arm {
+
+class Arm32Assembler FINAL : public ArmAssembler {
+ public:
+  Arm32Assembler() {
+  }
+  virtual ~Arm32Assembler() {}
+
+  bool IsThumb() const OVERRIDE {
+    return false;
+  }
+
+  // Data-processing instructions.
+  void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void eor(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void sub(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void subs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void rsb(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void rsbs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void add(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void adds(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void adc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void sbc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void rsc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void teq(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void orr(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void orrs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void mov(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void movs(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void bic(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void mvn(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void mvns(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  // Miscellaneous data-processing instructions.
+  void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
+  void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
+
+  // Multiply instructions.
+  void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
+  void mla(Register rd, Register rn, Register rm, Register ra,
+           Condition cond = AL) OVERRIDE;
+  void mls(Register rd, Register rn, Register rm, Register ra,
+           Condition cond = AL) OVERRIDE;
+  void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
+             Condition cond = AL) OVERRIDE;
+
+  void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
+  void udiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
+
+  // Load/store instructions.
+  void ldr(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void str(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void strb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void strh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldm(BlockAddressMode am, Register base,
+           RegList regs, Condition cond = AL) OVERRIDE;
+  void stm(BlockAddressMode am, Register base,
+           RegList regs, Condition cond = AL) OVERRIDE;
+
+  void ldrex(Register rd, Register rn, Condition cond = AL) OVERRIDE;
+  void strex(Register rd, Register rt, Register rn, Condition cond = AL) OVERRIDE;
+
+  // Miscellaneous instructions.
+  void clrex(Condition cond = AL) OVERRIDE;
+  void nop(Condition cond = AL) OVERRIDE;
+
+  // Note that gdb sets breakpoints using the undefined instruction 0xe7f001f0.
+  void bkpt(uint16_t imm16) OVERRIDE;
+  void svc(uint32_t imm24) OVERRIDE;
+
+  void cbz(Register rn, Label* target) OVERRIDE;
+  void cbnz(Register rn, Label* target) OVERRIDE;
+
+  // Floating point instructions (VFPv3-D16 and VFPv3-D32 profiles).
+  void vmovsr(SRegister sn, Register rt, Condition cond = AL) OVERRIDE;
+  void vmovrs(Register rt, SRegister sn, Condition cond = AL) OVERRIDE;
+  void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
+  void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
+  void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmovs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmovd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+
+  // Returns false if the immediate cannot be encoded.
+  bool vmovs(SRegister sd, float s_imm, Condition cond = AL) OVERRIDE;
+  bool vmovd(DRegister dd, double d_imm, Condition cond = AL) OVERRIDE;
+
+  void vldrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void vstrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void vldrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void vstrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+
+  void vabss(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vabsd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vnegs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vnegd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vsqrts(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+
+  void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcvtds(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtis(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtid(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtus(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtud(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
+
+  void vcmps(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcmpd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcmpsz(SRegister sd, Condition cond = AL) OVERRIDE;
+  void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
+  void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
+
+  void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+
+  // Branch instructions.
+  void b(Label* label, Condition cond = AL);
+  void bl(Label* label, Condition cond = AL);
+  void blx(Register rm, Condition cond = AL) OVERRIDE;
+  void bx(Register rm, Condition cond = AL) OVERRIDE;
+  void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Lsr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Asr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Ror(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Rrx(Register rd, Register rm, Condition cond = AL);
+
+  void Push(Register rd, Condition cond = AL) OVERRIDE;
+  void Pop(Register rd, Condition cond = AL) OVERRIDE;
+
+  void PushList(RegList regs, Condition cond = AL) OVERRIDE;
+  void PopList(RegList regs, Condition cond = AL) OVERRIDE;
+
+  void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+
+  void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE;
+  void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE;
+
+
+  // Macros.
+  // Add signed constant value to rd. May clobber IP.
+  void AddConstant(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+  void AddConstant(Register rd, Register rn, int32_t value,
+                   Condition cond = AL) OVERRIDE;
+  void AddConstantSetFlags(Register rd, Register rn, int32_t value,
+                           Condition cond = AL) OVERRIDE;
+  void AddConstantWithCarry(Register rd, Register rn, int32_t value,
+                            Condition cond = AL) {}
+
+  // Load and Store. May clobber IP.
+  void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+  void LoadSImmediate(SRegister sd, float value, Condition cond = AL) {}
+  void LoadDImmediate(DRegister dd, double value,
+                      Register scratch, Condition cond = AL) {}
+  void MarkExceptionHandler(Label* label) OVERRIDE;
+  void LoadFromOffset(LoadOperandType type,
+                      Register reg,
+                      Register base,
+                      int32_t offset,
+                      Condition cond = AL) OVERRIDE;
+  void StoreToOffset(StoreOperandType type,
+                     Register reg,
+                     Register base,
+                     int32_t offset,
+                     Condition cond = AL) OVERRIDE;
+  void LoadSFromOffset(SRegister reg,
+                       Register base,
+                       int32_t offset,
+                       Condition cond = AL) OVERRIDE;
+  void StoreSToOffset(SRegister reg,
+                      Register base,
+                      int32_t offset,
+                      Condition cond = AL) OVERRIDE;
+  void LoadDFromOffset(DRegister reg,
+                       Register base,
+                       int32_t offset,
+                       Condition cond = AL) OVERRIDE;
+  void StoreDToOffset(DRegister reg,
+                      Register base,
+                      int32_t offset,
+                      Condition cond = AL) OVERRIDE;
+
+
+  static bool IsInstructionForExceptionHandling(uword pc);
+
+  // Emit data (e.g. encoded instruction or immediate) to the
+  // instruction stream.
+  void Emit(int32_t value);
+  void Bind(Label* label) OVERRIDE;
+
+  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
+
+ private:
+  void EmitType01(Condition cond,
+                  int type,
+                  Opcode opcode,
+                  int set_cc,
+                  Register rn,
+                  Register rd,
+                  const ShifterOperand& so);
+
+  void EmitType5(Condition cond, int offset, bool link);
+
+  void EmitMemOp(Condition cond,
+                 bool load,
+                 bool byte,
+                 Register rd,
+                 const Address& ad);
+
+  void EmitMemOpAddressMode3(Condition cond,
+                             int32_t mode,
+                             Register rd,
+                             const Address& ad);
+
+  void EmitMultiMemOp(Condition cond,
+                      BlockAddressMode am,
+                      bool load,
+                      Register base,
+                      RegList regs);
+
+  void EmitShiftImmediate(Condition cond,
+                          Shift opcode,
+                          Register rd,
+                          Register rm,
+                          const ShifterOperand& so);
+
+  void EmitShiftRegister(Condition cond,
+                         Shift opcode,
+                         Register rd,
+                         Register rm,
+                         const ShifterOperand& so);
+
+  void EmitMulOp(Condition cond,
+                 int32_t opcode,
+                 Register rd,
+                 Register rn,
+                 Register rm,
+                 Register rs);
+
+  void EmitVFPsss(Condition cond,
+                  int32_t opcode,
+                  SRegister sd,
+                  SRegister sn,
+                  SRegister sm);
+
+  void EmitVFPddd(Condition cond,
+                  int32_t opcode,
+                  DRegister dd,
+                  DRegister dn,
+                  DRegister dm);
+
+  void EmitVFPsd(Condition cond,
+                 int32_t opcode,
+                 SRegister sd,
+                 DRegister dm);
+
+  void EmitVFPds(Condition cond,
+                 int32_t opcode,
+                 DRegister dd,
+                 SRegister sm);
+
+  void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond);
+
+  void EmitBranch(Condition cond, Label* label, bool link);
+  static int32_t EncodeBranchOffset(int offset, int32_t inst);
+  static int DecodeBranchOffset(int32_t inst);
+  int32_t EncodeTstOffset(int offset, int32_t inst);
+  int DecodeTstOffset(int32_t inst);
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM32_H_
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
new file mode 100644
index 0000000..703d68e
--- /dev/null
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -0,0 +1,2363 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_thumb2.h"
+
+#include "base/logging.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "offsets.h"
+#include "thread.h"
+#include "utils.h"
+
+namespace art {
+namespace arm {
+
+void Thumb2Assembler::and_(Register rd, Register rn, const ShifterOperand& so,
+                           Condition cond) {
+  EmitDataProcessing(cond, AND, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::eor(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond) {
+  EmitDataProcessing(cond, EOR, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::sub(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond) {
+  EmitDataProcessing(cond, SUB, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::rsb(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond) {
+  EmitDataProcessing(cond, RSB, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::rsbs(Register rd, Register rn, const ShifterOperand& so,
+                           Condition cond) {
+  EmitDataProcessing(cond, RSB, 1, rn, rd, so);
+}
+
+
+void Thumb2Assembler::add(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond) {
+  EmitDataProcessing(cond, ADD, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::adds(Register rd, Register rn, const ShifterOperand& so,
+                           Condition cond) {
+  EmitDataProcessing(cond, ADD, 1, rn, rd, so);
+}
+
+
+void Thumb2Assembler::subs(Register rd, Register rn, const ShifterOperand& so,
+                           Condition cond) {
+  EmitDataProcessing(cond, SUB, 1, rn, rd, so);
+}
+
+
+void Thumb2Assembler::adc(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond) {
+  EmitDataProcessing(cond, ADC, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::sbc(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond) {
+  EmitDataProcessing(cond, SBC, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::rsc(Register rd, Register rn, const ShifterOperand& so,
+                          Condition cond) {
+  EmitDataProcessing(cond, RSC, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::tst(Register rn, const ShifterOperand& so, Condition cond) {
+  CHECK_NE(rn, PC);  // Reserve tst pc instruction for exception handler marker.
+  EmitDataProcessing(cond, TST, 1, rn, R0, so);
+}
+
+
+void Thumb2Assembler::teq(Register rn, const ShifterOperand& so, Condition cond) {
+  CHECK_NE(rn, PC);  // Reserve teq pc instruction for exception handler marker.
+  EmitDataProcessing(cond, TEQ, 1, rn, R0, so);
+}
+
+
+void Thumb2Assembler::cmp(Register rn, const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, CMP, 1, rn, R0, so);
+}
+
+
+void Thumb2Assembler::cmn(Register rn, const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, CMN, 1, rn, R0, so);
+}
+
+
+void Thumb2Assembler::orr(Register rd, Register rn,
+                          const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, ORR, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::orrs(Register rd, Register rn,
+                           const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, ORR, 1, rn, rd, so);
+}
+
+
+void Thumb2Assembler::mov(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, MOV, 0, R0, rd, so);
+}
+
+
+void Thumb2Assembler::movs(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, MOV, 1, R0, rd, so);
+}
+
+
+void Thumb2Assembler::bic(Register rd, Register rn, const ShifterOperand& so,
+                       Condition cond) {
+  EmitDataProcessing(cond, BIC, 0, rn, rd, so);
+}
+
+
+void Thumb2Assembler::mvn(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, MVN, 0, R0, rd, so);
+}
+
+
+void Thumb2Assembler::mvns(Register rd, const ShifterOperand& so, Condition cond) {
+  EmitDataProcessing(cond, MVN, 1, R0, rd, so);
+}
+
+
+void Thumb2Assembler::mul(Register rd, Register rn, Register rm, Condition cond) {
+  if (rd == rm && !IsHighRegister(rd) && !IsHighRegister(rn) && !force_32bit_) {
+    // 16 bit.
+    int16_t encoding = B14 | B9 | B8 | B6 |
+        rn << 3 | rd;
+    Emit16(encoding);
+  } else {
+    // 32 bit.
+    uint32_t op1 = 0b000;
+    uint32_t op2 = 0b00;
+    int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 |
+        op1 << 20 |
+        B15 | B14 | B13 | B12 |
+        op2 << 4 |
+        static_cast<uint32_t>(rd) << 8 |
+        static_cast<uint32_t>(rn) << 16 |
+        static_cast<uint32_t>(rm);
+
+    Emit32(encoding);
+  }
+}
+
+
+void Thumb2Assembler::mla(Register rd, Register rn, Register rm, Register ra,
+                          Condition cond) {
+  uint32_t op1 = 0b000;
+  uint32_t op2 = 0b00;
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 |
+      op1 << 20 |
+      op2 << 4 |
+      static_cast<uint32_t>(rd) << 8 |
+      static_cast<uint32_t>(ra) << 12 |
+      static_cast<uint32_t>(rn) << 16 |
+      static_cast<uint32_t>(rm);
+
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::mls(Register rd, Register rn, Register rm, Register ra,
+                          Condition cond) {
+  uint32_t op1 = 0b000;
+  uint32_t op2 = 0b01;
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 |
+      op1 << 20 |
+      op2 << 4 |
+      static_cast<uint32_t>(rd) << 8 |
+      static_cast<uint32_t>(ra) << 12 |
+      static_cast<uint32_t>(rn) << 16 |
+      static_cast<uint32_t>(rm);
+
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::umull(Register rd_lo, Register rd_hi, Register rn,
+                            Register rm, Condition cond) {
+  uint32_t op1 = 0b010;
+  uint32_t op2 = 0b0000;
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 | B23 |
+      op1 << 20 |
+      op2 << 4 |
+      static_cast<uint32_t>(rd_lo) << 12 |
+      static_cast<uint32_t>(rd_hi) << 8 |
+      static_cast<uint32_t>(rn) << 16 |
+      static_cast<uint32_t>(rm);
+
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::sdiv(Register rd, Register rn, Register rm, Condition cond) {
+  uint32_t op1 = 0b001;
+  uint32_t op2 = 0b1111;
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 | B23 | B20 |
+      op1 << 20 |
+      op2 << 4 |
+      0xf << 12 |
+      static_cast<uint32_t>(rd) << 8 |
+      static_cast<uint32_t>(rn) << 16 |
+      static_cast<uint32_t>(rm);
+
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::udiv(Register rd, Register rn, Register rm, Condition cond) {
+  uint32_t op1 = 0b001;
+  uint32_t op2 = 0b1111;
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 | B24 | B23 | B21 | B20 |
+      op1 << 20 |
+      op2 << 4 |
+      0xf << 12 |
+      static_cast<uint32_t>(rd) << 8 |
+      static_cast<uint32_t>(rn) << 16 |
+      static_cast<uint32_t>(rm);
+
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::ldr(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, true, false, false, false, rd, ad);
+}
+
+
+void Thumb2Assembler::str(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, false, false, false, false, rd, ad);
+}
+
+
+void Thumb2Assembler::ldrb(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, true, true, false, false, rd, ad);
+}
+
+
+void Thumb2Assembler::strb(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, false, true, false, false, rd, ad);
+}
+
+
+void Thumb2Assembler::ldrh(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, true, false, true, false, rd, ad);
+}
+
+
+void Thumb2Assembler::strh(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, false, false, true, false, rd, ad);
+}
+
+
+void Thumb2Assembler::ldrsb(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, true, true, false, true, rd, ad);
+}
+
+
+void Thumb2Assembler::ldrsh(Register rd, const Address& ad, Condition cond) {
+  EmitLoadStore(cond, true, false, true, true, rd, ad);
+}
+
+
+void Thumb2Assembler::ldrd(Register rd, const Address& ad, Condition cond) {
+  CHECK_EQ(rd % 2, 0);
+  // This is different from other loads.  The encoding is like ARM.
+  int32_t encoding = B31 | B30 | B29 | B27 | B22 | B20 |
+      static_cast<int32_t>(rd) << 12 |
+      (static_cast<int32_t>(rd) + 1) << 8 |
+      ad.encodingThumbLdrdStrd();
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::strd(Register rd, const Address& ad, Condition cond) {
+  CHECK_EQ(rd % 2, 0);
+  // This is different from other loads.  The encoding is like ARM.
+  int32_t encoding = B31 | B30 | B29 | B27 | B22 |
+      static_cast<int32_t>(rd) << 12 |
+      (static_cast<int32_t>(rd) + 1) << 8 |
+      ad.encodingThumbLdrdStrd();
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::ldm(BlockAddressMode am,
+                          Register base,
+                          RegList regs,
+                          Condition cond) {
+  if (__builtin_popcount(regs) == 1) {
+    // Thumb doesn't support one reg in the list.
+    // Find the register number.
+    int reg = 0;
+    while (reg < 16) {
+      if ((regs & (1 << reg)) != 0) {
+         break;
+      }
+      ++reg;
+    }
+    CHECK_LT(reg, 16);
+    CHECK(am == DB_W);      // Only writeback is supported.
+    ldr(static_cast<Register>(reg), Address(base, kRegisterSize, Address::PostIndex), cond);
+  } else {
+    EmitMultiMemOp(cond, am, true, base, regs);
+  }
+}
+
+
+void Thumb2Assembler::stm(BlockAddressMode am,
+                          Register base,
+                          RegList regs,
+                          Condition cond) {
+  if (__builtin_popcount(regs) == 1) {
+    // Thumb doesn't support one reg in the list.
+    // Find the register number.
+    int reg = 0;
+    while (reg < 16) {
+      if ((regs & (1 << reg)) != 0) {
+         break;
+      }
+      ++reg;
+    }
+    CHECK_LT(reg, 16);
+    CHECK(am == IA || am == IA_W);
+    Address::Mode strmode = am == IA ? Address::PreIndex : Address::Offset;
+    str(static_cast<Register>(reg), Address(base, -kRegisterSize, strmode), cond);
+  } else {
+    EmitMultiMemOp(cond, am, false, base, regs);
+  }
+}
+
+
+bool Thumb2Assembler::vmovs(SRegister sd, float s_imm, Condition cond) {
+  uint32_t imm32 = bit_cast<uint32_t, float>(s_imm);
+  if (((imm32 & ((1 << 19) - 1)) == 0) &&
+      ((((imm32 >> 25) & ((1 << 6) - 1)) == (1 << 5)) ||
+       (((imm32 >> 25) & ((1 << 6) - 1)) == ((1 << 5) -1)))) {
+    uint8_t imm8 = ((imm32 >> 31) << 7) | (((imm32 >> 29) & 1) << 6) |
+        ((imm32 >> 19) & ((1 << 6) -1));
+    EmitVFPsss(cond, B23 | B21 | B20 | ((imm8 >> 4)*B16) | (imm8 & 0xf),
+               sd, S0, S0);
+    return true;
+  }
+  return false;
+}
+
+
+bool Thumb2Assembler::vmovd(DRegister dd, double d_imm, Condition cond) {
+  uint64_t imm64 = bit_cast<uint64_t, double>(d_imm);
+  if (((imm64 & ((1LL << 48) - 1)) == 0) &&
+      ((((imm64 >> 54) & ((1 << 9) - 1)) == (1 << 8)) ||
+       (((imm64 >> 54) & ((1 << 9) - 1)) == ((1 << 8) -1)))) {
+    uint8_t imm8 = ((imm64 >> 63) << 7) | (((imm64 >> 61) & 1) << 6) |
+        ((imm64 >> 48) & ((1 << 6) -1));
+    EmitVFPddd(cond, B23 | B21 | B20 | ((imm8 >> 4)*B16) | B8 | (imm8 & 0xf),
+               dd, D0, D0);
+    return true;
+  }
+  return false;
+}
+
+
+void Thumb2Assembler::vmovs(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vmovd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B6, dd, D0, dm);
+}
+
+
+void Thumb2Assembler::vadds(SRegister sd, SRegister sn, SRegister sm,
+                            Condition cond) {
+  EmitVFPsss(cond, B21 | B20, sd, sn, sm);
+}
+
+
+void Thumb2Assembler::vaddd(DRegister dd, DRegister dn, DRegister dm,
+                            Condition cond) {
+  EmitVFPddd(cond, B21 | B20, dd, dn, dm);
+}
+
+
+void Thumb2Assembler::vsubs(SRegister sd, SRegister sn, SRegister sm,
+                            Condition cond) {
+  EmitVFPsss(cond, B21 | B20 | B6, sd, sn, sm);
+}
+
+
+void Thumb2Assembler::vsubd(DRegister dd, DRegister dn, DRegister dm,
+                            Condition cond) {
+  EmitVFPddd(cond, B21 | B20 | B6, dd, dn, dm);
+}
+
+
+void Thumb2Assembler::vmuls(SRegister sd, SRegister sn, SRegister sm,
+                            Condition cond) {
+  EmitVFPsss(cond, B21, sd, sn, sm);
+}
+
+
+void Thumb2Assembler::vmuld(DRegister dd, DRegister dn, DRegister dm,
+                            Condition cond) {
+  EmitVFPddd(cond, B21, dd, dn, dm);
+}
+
+
+void Thumb2Assembler::vmlas(SRegister sd, SRegister sn, SRegister sm,
+                            Condition cond) {
+  EmitVFPsss(cond, 0, sd, sn, sm);
+}
+
+
+void Thumb2Assembler::vmlad(DRegister dd, DRegister dn, DRegister dm,
+                            Condition cond) {
+  EmitVFPddd(cond, 0, dd, dn, dm);
+}
+
+
+void Thumb2Assembler::vmlss(SRegister sd, SRegister sn, SRegister sm,
+                            Condition cond) {
+  EmitVFPsss(cond, B6, sd, sn, sm);
+}
+
+
+void Thumb2Assembler::vmlsd(DRegister dd, DRegister dn, DRegister dm,
+                            Condition cond) {
+  EmitVFPddd(cond, B6, dd, dn, dm);
+}
+
+
+void Thumb2Assembler::vdivs(SRegister sd, SRegister sn, SRegister sm,
+                            Condition cond) {
+  EmitVFPsss(cond, B23, sd, sn, sm);
+}
+
+
+void Thumb2Assembler::vdivd(DRegister dd, DRegister dn, DRegister dm,
+                            Condition cond) {
+  EmitVFPddd(cond, B23, dd, dn, dm);
+}
+
+
+void Thumb2Assembler::vabss(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B7 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vabsd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B7 | B6, dd, D0, dm);
+}
+
+
+void Thumb2Assembler::vnegs(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B16 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vnegd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B16 | B6, dd, D0, dm);
+}
+
+
+void Thumb2Assembler::vsqrts(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B16 | B7 | B6, sd, S0, sm);
+}
+
+void Thumb2Assembler::vsqrtd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B16 | B7 | B6, dd, D0, dm);
+}
+
+
+void Thumb2Assembler::vcvtsd(SRegister sd, DRegister dm, Condition cond) {
+  EmitVFPsd(cond, B23 | B21 | B20 | B18 | B17 | B16 | B8 | B7 | B6, sd, dm);
+}
+
+
+void Thumb2Assembler::vcvtds(DRegister dd, SRegister sm, Condition cond) {
+  EmitVFPds(cond, B23 | B21 | B20 | B18 | B17 | B16 | B7 | B6, dd, sm);
+}
+
+
+void Thumb2Assembler::vcvtis(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B18 | B16 | B7 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vcvtid(SRegister sd, DRegister dm, Condition cond) {
+  EmitVFPsd(cond, B23 | B21 | B20 | B19 | B18 | B16 | B8 | B7 | B6, sd, dm);
+}
+
+
+void Thumb2Assembler::vcvtsi(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B7 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vcvtdi(DRegister dd, SRegister sm, Condition cond) {
+  EmitVFPds(cond, B23 | B21 | B20 | B19 | B8 | B7 | B6, dd, sm);
+}
+
+
+void Thumb2Assembler::vcvtus(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B18 | B7 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vcvtud(SRegister sd, DRegister dm, Condition cond) {
+  EmitVFPsd(cond, B23 | B21 | B20 | B19 | B18 | B8 | B7 | B6, sd, dm);
+}
+
+
+void Thumb2Assembler::vcvtsu(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B19 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vcvtdu(DRegister dd, SRegister sm, Condition cond) {
+  EmitVFPds(cond, B23 | B21 | B20 | B19 | B8 | B6, dd, sm);
+}
+
+
+void Thumb2Assembler::vcmps(SRegister sd, SRegister sm, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B18 | B6, sd, S0, sm);
+}
+
+
+void Thumb2Assembler::vcmpd(DRegister dd, DRegister dm, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B18 | B6, dd, D0, dm);
+}
+
+
+void Thumb2Assembler::vcmpsz(SRegister sd, Condition cond) {
+  EmitVFPsss(cond, B23 | B21 | B20 | B18 | B16 | B6, sd, S0, S0);
+}
+
+
+void Thumb2Assembler::vcmpdz(DRegister dd, Condition cond) {
+  EmitVFPddd(cond, B23 | B21 | B20 | B18 | B16 | B6, dd, D0, D0);
+}
+
+void Thumb2Assembler::b(Label* label, Condition cond) {
+  EmitBranch(cond, label, false, false);
+}
+
+
+void Thumb2Assembler::bl(Label* label, Condition cond) {
+  CheckCondition(cond);
+  EmitBranch(cond, label, true, false);
+}
+
+
+void Thumb2Assembler::blx(Label* label) {
+  EmitBranch(AL, label, true, true);
+}
+
+
+void Thumb2Assembler::MarkExceptionHandler(Label* label) {
+  EmitDataProcessing(AL, TST, 1, PC, R0, ShifterOperand(0));
+  Label l;
+  b(&l);
+  EmitBranch(AL, label, false, false);
+  Bind(&l);
+}
+
+
+void Thumb2Assembler::Emit32(int32_t value) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  buffer_.Emit<int16_t>(value >> 16);
+  buffer_.Emit<int16_t>(value & 0xffff);
+}
+
+
+void Thumb2Assembler::Emit16(int16_t value) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  buffer_.Emit<int16_t>(value);
+}
+
+
+bool Thumb2Assembler::Is32BitDataProcessing(Condition cond,
+                                            Opcode opcode,
+                                            int set_cc,
+                                            Register rn,
+                                            Register rd,
+                                            const ShifterOperand& so) {
+  if (force_32bit_) {
+    return true;
+  }
+
+  bool can_contain_high_register = opcode == MOV || opcode == ADD || opcode == SUB;
+
+  if (IsHighRegister(rd) || IsHighRegister(rn)) {
+    if (can_contain_high_register) {
+      // There are high register instructions available for this opcode.
+      // However, there is no RRX available.
+      if (so.IsShift() && so.GetShift() == RRX) {
+        return true;
+      }
+
+      // Check special case for SP relative ADD and SUB immediate.
+      if ((opcode == ADD || opcode == SUB) && so.IsImmediate()) {
+        // If rn is SP and rd is a high register we need to use a 32 bit encoding.
+         if (rn == SP && rd != SP && IsHighRegister(rd)) {
+           return true;
+         }
+
+         uint32_t imm = so.GetImmediate();
+         // If the immediates are out of range use 32 bit.
+         if (rd == SP && rn == SP) {
+           if (imm > (1 << 9)) {    // 9 bit immediate.
+             return true;
+           }
+         } else if (opcode == ADD && rd != SP && rn == SP) {   // 10 bit immediate.
+           if (imm > (1 << 10)) {
+             return true;
+           }
+         } else if (opcode == SUB && rd != SP && rn == SP) {
+           // SUB rd, SP, #imm is always 32 bit.
+           return true;
+         }
+      }
+    }
+
+    // The ADD,SUB and MOV instructions that work with high registers don't have
+    // immediate variants.
+    if (so.IsImmediate()) {
+      return true;
+    }
+  }
+
+  if (so.IsRegister() && IsHighRegister(so.GetRegister()) && !can_contain_high_register) {
+    return true;
+  }
+
+  // Check for MOV with an ROR.
+  if (opcode == MOV && so.IsRegister() && so.IsShift() && so.GetShift() == ROR) {
+    if (so.GetImmediate() != 0) {
+      return true;
+    }
+  }
+
+  bool rn_is_valid = true;
+
+  // Check for single operand instructions and ADD/SUB.
+  switch (opcode) {
+    case CMP:
+    case MOV:
+    case TST:
+    case MVN:
+      rn_is_valid = false;      // There is no Rn for these instructions.
+      break;
+    case TEQ:
+      return true;
+      break;
+    case ADD:
+    case SUB:
+      break;
+    default:
+      if (so.IsRegister() && rd != rn) {
+        return true;
+      }
+  }
+
+  if (so.IsImmediate()) {
+    if (rn_is_valid && rn != rd) {
+      // The only thumb1 instruction with a register and an immediate are ADD and SUB.  The
+      // immediate must be 3 bits.
+      if (opcode != ADD && opcode != SUB) {
+        return true;
+      } else {
+        // Check that the immediate is 3 bits for ADD and SUB.
+        if (so.GetImmediate() >= 8) {
+          return true;
+        }
+      }
+    } else {
+      // ADD, SUB, CMP and MOV may be thumb1 only if the immediate is 8 bits.
+      if (!(opcode == ADD || opcode == SUB || opcode == MOV || opcode == CMP)) {
+        return true;
+      } else {
+        if (so.GetImmediate() > 255) {
+          return true;
+        }
+      }
+    }
+  }
+
+  // The instruction can be encoded in 16 bits.
+  return false;
+}
+
+
+void Thumb2Assembler::Emit32BitDataProcessing(Condition cond,
+                                              Opcode opcode,
+                                              int set_cc,
+                                              Register rn,
+                                              Register rd,
+                                              const ShifterOperand& so) {
+  uint8_t thumb_opcode = 0b11111111;
+  switch (opcode) {
+    case AND: thumb_opcode = 0b0000; break;
+    case EOR: thumb_opcode = 0b0100; break;
+    case SUB: thumb_opcode = 0b1101; break;
+    case RSB: thumb_opcode = 0b1110; break;
+    case ADD: thumb_opcode = 0b1000; break;
+    case ADC: thumb_opcode = 0b1010; break;
+    case SBC: thumb_opcode = 0b1011; break;
+    case RSC: break;
+    case TST: thumb_opcode = 0b0000; set_cc = true; rd = PC; break;
+    case TEQ: thumb_opcode = 0b0100; set_cc = true; rd = PC; break;
+    case CMP: thumb_opcode = 0b1101; set_cc = true; rd = PC; break;
+    case CMN: thumb_opcode = 0b1000; set_cc = true; rd = PC; break;
+    case ORR: thumb_opcode = 0b0010; break;
+    case MOV: thumb_opcode = 0b0010; rn = PC; break;
+    case BIC: thumb_opcode = 0b0001; break;
+    case MVN: thumb_opcode = 0b0011; rn = PC; break;
+    default:
+      break;
+  }
+
+  if (thumb_opcode == 0b11111111) {
+    LOG(FATAL) << "Invalid thumb2 opcode " << opcode;
+  }
+
+  int32_t encoding = 0;
+  if (so.IsImmediate()) {
+    // Check special cases.
+    if ((opcode == SUB || opcode == ADD) && rn == SP) {
+      // There are special ADD/SUB rd, SP, #imm12 instructions.
+      if (opcode == SUB) {
+        thumb_opcode = 0b0101;
+      } else {
+        thumb_opcode = 0;
+      }
+      uint32_t imm = so.GetImmediate();
+      CHECK_LT(imm, (1u << 12));
+
+      uint32_t i = (imm >> 11) & 1;
+      uint32_t imm3 = (imm >> 8) & 0b111;
+      uint32_t imm8 = imm & 0xff;
+
+      encoding = B31 | B30 | B29 | B28 | B25 |
+           B19 | B18 | B16 |
+           thumb_opcode << 21 |
+           rd << 8 |
+           i << 26 |
+           imm3 << 12 |
+           imm8;
+    } else {
+      // Modified immediate.
+      uint32_t imm = ModifiedImmediate(so.encodingThumb(2));
+      if (imm == kInvalidModifiedImmediate) {
+        LOG(FATAL) << "Immediate value cannot fit in thumb2 modified immediate";
+      }
+      encoding = B31 | B30 | B29 | B28 |
+          thumb_opcode << 21 |
+          set_cc << 20 |
+          rn << 16 |
+          rd << 8 |
+          imm;
+    }
+  } else if (so.IsRegister()) {
+     // Register (possibly shifted)
+     encoding = B31 | B30 | B29 | B27 | B25 |
+         thumb_opcode << 21 |
+         set_cc << 20 |
+         rn << 16 |
+         rd << 8 |
+         so.encodingThumb(2);
+  }
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::Emit16BitDataProcessing(Condition cond,
+                                              Opcode opcode,
+                                              int set_cc,
+                                              Register rn,
+                                              Register rd,
+                                              const ShifterOperand& so) {
+  if (opcode == ADD || opcode == SUB) {
+    Emit16BitAddSub(cond, opcode, set_cc, rn, rd, so);
+    return;
+  }
+  uint8_t thumb_opcode = 0b11111111;
+  // Thumb1.
+  uint8_t dp_opcode = 0b01;
+  uint8_t opcode_shift = 6;
+  uint8_t rd_shift = 0;
+  uint8_t rn_shift = 3;
+  uint8_t immediate_shift = 0;
+  bool use_immediate = false;
+  uint8_t immediate = 0;
+
+  if (opcode == MOV && so.IsRegister() && so.IsShift()) {
+    // Convert shifted mov operand2 into 16 bit opcodes.
+    dp_opcode = 0;
+    opcode_shift = 11;
+
+    use_immediate = true;
+    immediate = so.GetImmediate();
+    immediate_shift = 6;
+
+    rn = so.GetRegister();
+
+    switch (so.GetShift()) {
+    case LSL: thumb_opcode = 0b00; break;
+    case LSR: thumb_opcode = 0b01; break;
+    case ASR: thumb_opcode = 0b10; break;
+    case ROR:
+      // ROR doesn't allow immediates.
+      thumb_opcode = 0b111;
+      dp_opcode = 0b01;
+      opcode_shift = 6;
+      use_immediate = false;
+      break;
+    case RRX: break;
+    default:
+     break;
+    }
+  } else {
+    if (so.IsImmediate()) {
+      use_immediate = true;
+      immediate = so.GetImmediate();
+    }
+
+    switch (opcode) {
+      case AND: thumb_opcode = 0b0000; break;
+      case EOR: thumb_opcode = 0b0001; break;
+      case SUB: break;
+      case RSB: thumb_opcode = 0b1001; break;
+      case ADD: break;
+      case ADC: thumb_opcode = 0b0101; break;
+      case SBC: thumb_opcode = 0b0110; break;
+      case RSC: break;
+      case TST: thumb_opcode = 0b1000; rn = so.GetRegister(); break;
+      case TEQ: break;
+      case CMP:
+        if (use_immediate) {
+          // T2 encoding.
+           dp_opcode = 0;
+           opcode_shift = 11;
+           thumb_opcode = 0b101;
+           rd_shift = 8;
+           rn_shift = 8;
+        } else {
+          thumb_opcode = 0b1010;
+          rn = so.GetRegister();
+        }
+
+        break;
+      case CMN: thumb_opcode = 0b1011; rn = so.GetRegister(); break;
+      case ORR: thumb_opcode = 0b1100; break;
+      case MOV:
+        dp_opcode = 0;
+        if (use_immediate) {
+          // T2 encoding.
+          opcode_shift = 11;
+          thumb_opcode = 0b100;
+          rd_shift = 8;
+          rn_shift = 8;
+        } else {
+          rn = so.GetRegister();
+          if (IsHighRegister(rn) || IsHighRegister(rd)) {
+            // Special mov for high registers.
+            dp_opcode = 0b01;
+            opcode_shift = 7;
+            // Put the top bit of rd into the bottom bit of the opcode.
+            thumb_opcode = 0b0001100 | static_cast<uint32_t>(rd) >> 3;
+            rd = static_cast<Register>(static_cast<uint32_t>(rd) & 0b111);
+          } else {
+            thumb_opcode = 0;
+          }
+        }
+        break;
+      case BIC: thumb_opcode = 0b1110; break;
+      case MVN: thumb_opcode = 0b1111; rn = so.GetRegister(); break;
+      default:
+        break;
+    }
+  }
+
+  if (thumb_opcode == 0b11111111) {
+    LOG(FATAL) << "Invalid thumb1 opcode " << opcode;
+  }
+
+  int16_t encoding = dp_opcode << 14 |
+      (thumb_opcode << opcode_shift) |
+      rd << rd_shift |
+      rn << rn_shift |
+      (use_immediate ? (immediate << immediate_shift) : 0);
+
+  Emit16(encoding);
+}
+
+
+// ADD and SUB are complex enough to warrant their own emitter.
+void Thumb2Assembler::Emit16BitAddSub(Condition cond,
+                                      Opcode opcode,
+                                      int set_cc,
+                                      Register rn,
+                                      Register rd,
+                                      const ShifterOperand& so) {
+  uint8_t dp_opcode = 0;
+  uint8_t opcode_shift = 6;
+  uint8_t rd_shift = 0;
+  uint8_t rn_shift = 3;
+  uint8_t immediate_shift = 0;
+  bool use_immediate = false;
+  uint8_t immediate = 0;
+  uint8_t thumb_opcode;;
+
+  if (so.IsImmediate()) {
+    use_immediate = true;
+    immediate = so.GetImmediate();
+  }
+
+  switch (opcode) {
+    case ADD:
+      if (so.IsRegister()) {
+        Register rm = so.GetRegister();
+        if (rn == rd) {
+          // Can use T2 encoding (allows 4 bit registers)
+          dp_opcode = 0b01;
+          opcode_shift = 10;
+          thumb_opcode = 0b0001;
+          // Make Rn also contain the top bit of rd.
+          rn = static_cast<Register>(static_cast<uint32_t>(rm) |
+                                     (static_cast<uint32_t>(rd) & 0b1000) << 1);
+          rd = static_cast<Register>(static_cast<uint32_t>(rd) & 0b111);
+        } else {
+          // T1.
+          opcode_shift = 9;
+          thumb_opcode = 0b01100;
+          immediate = static_cast<uint32_t>(so.GetRegister());
+          use_immediate = true;
+          immediate_shift = 6;
+        }
+      } else {
+        // Immediate.
+        if (rd == SP && rn == SP) {
+          // ADD sp, sp, #imm
+          dp_opcode = 0b10;
+          thumb_opcode = 0b11;
+          opcode_shift = 12;
+          CHECK_LT(immediate, (1 << 9));
+          CHECK_EQ((immediate & 0b11), 0);
+
+          // Remove rd and rn from instruction by orring it with immed and clearing bits.
+          rn = R0;
+          rd = R0;
+          rd_shift = 0;
+          rn_shift = 0;
+          immediate >>= 2;
+        } else if (rd != SP && rn == SP) {
+          // ADD rd, SP, #imm
+          dp_opcode = 0b10;
+          thumb_opcode = 0b101;
+          opcode_shift = 11;
+          CHECK_LT(immediate, (1 << 10));
+          CHECK_EQ((immediate & 0b11), 0);
+
+          // Remove rn from instruction.
+          rn = R0;
+          rn_shift = 0;
+          rd_shift = 8;
+          immediate >>= 2;
+        } else if (rn != rd) {
+          // Must use T1.
+          opcode_shift = 9;
+          thumb_opcode = 0b01110;
+          immediate_shift = 6;
+        } else {
+          // T2 encoding.
+          opcode_shift = 11;
+          thumb_opcode = 0b110;
+          rd_shift = 8;
+          rn_shift = 8;
+        }
+      }
+      break;
+
+    case SUB:
+      if (so.IsRegister()) {
+         // T1.
+         opcode_shift = 9;
+         thumb_opcode = 0b01101;
+         immediate = static_cast<uint32_t>(so.GetRegister());
+         use_immediate = true;
+         immediate_shift = 6;
+       } else {
+         if (rd == SP && rn == SP) {
+           // SUB sp, sp, #imm
+           dp_opcode = 0b10;
+           thumb_opcode = 0b1100001;
+           opcode_shift = 7;
+           CHECK_LT(immediate, (1 << 9));
+           CHECK_EQ((immediate & 0b11), 0);
+
+           // Remove rd and rn from instruction by orring it with immed and clearing bits.
+           rn = R0;
+           rd = R0;
+           rd_shift = 0;
+           rn_shift = 0;
+           immediate >>= 2;
+         } else if (rn != rd) {
+           // Must use T1.
+           opcode_shift = 9;
+           thumb_opcode = 0b01111;
+           immediate_shift = 6;
+         } else {
+           // T2 encoding.
+           opcode_shift = 11;
+           thumb_opcode = 0b111;
+           rd_shift = 8;
+           rn_shift = 8;
+         }
+       }
+      break;
+    default:
+      LOG(FATAL) << "This opcode is not an ADD or SUB: " << opcode;
+      return;
+  }
+
+  int16_t encoding = dp_opcode << 14 |
+      (thumb_opcode << opcode_shift) |
+      rd << rd_shift |
+      rn << rn_shift |
+      (use_immediate ? (immediate << immediate_shift) : 0);
+
+  Emit16(encoding);
+}
+
+
+void Thumb2Assembler::EmitDataProcessing(Condition cond,
+                                         Opcode opcode,
+                                         int set_cc,
+                                         Register rn,
+                                         Register rd,
+                                         const ShifterOperand& so) {
+  CHECK_NE(rd, kNoRegister);
+  CheckCondition(cond);
+
+  if (Is32BitDataProcessing(cond, opcode, set_cc, rn, rd, so)) {
+    Emit32BitDataProcessing(cond, opcode, set_cc, rn, rd, so);
+  } else {
+    Emit16BitDataProcessing(cond, opcode, set_cc, rn, rd, so);
+  }
+}
+
+
+void Thumb2Assembler::Branch::Emit(AssemblerBuffer* buffer) const {
+  bool link = type_ == kUnconditionalLinkX || type_ == kUnconditionalLink;
+  bool x = type_ == kUnconditionalX || type_ == kUnconditionalLinkX;
+  int32_t offset = target_ - location_;
+
+  if (size_ == k32Bit) {
+    int32_t encoding = B31 | B30 | B29 | B28 | B15;
+    if (link) {
+      // BL or BLX immediate.
+      encoding |= B14;
+      if (!x) {
+        encoding |= B12;
+      } else {
+        // Bottom bit of offset must be 0.
+        CHECK_EQ((offset & 1), 0);
+      }
+    } else {
+      if (x) {
+        LOG(FATAL) << "Invalid use of BX";
+      } else {
+        if (cond_ == AL) {
+          // Can use the T4 encoding allowing a 24 bit offset.
+          if (!x) {
+            encoding |= B12;
+          }
+        } else {
+          // Must be T3 encoding with a 20 bit offset.
+          encoding |= cond_ << 22;
+        }
+      }
+    }
+    encoding = Thumb2Assembler::EncodeBranchOffset(offset, encoding);
+    buffer->Store<int16_t>(location_, static_cast<int16_t>(encoding >> 16));
+    buffer->Store<int16_t>(location_+2, static_cast<int16_t>(encoding & 0xffff));
+  } else {
+    if (IsCompareAndBranch()) {
+      offset -= 4;
+      uint16_t i = (offset >> 6) & 1;
+      uint16_t imm5 = (offset >> 1) & 0b11111;
+      int16_t encoding = B15 | B13 | B12 |
+            (type_ ==  kCompareAndBranchNonZero ? B11 : 0) |
+            static_cast<uint32_t>(rn_) |
+            B8 |
+            i << 9 |
+            imm5 << 3;
+      buffer->Store<int16_t>(location_, encoding);
+    } else {
+      offset -= 4;    // Account for PC offset.
+      int16_t encoding;
+      // 16 bit.
+      if (cond_ == AL) {
+        encoding = B15 | B14 | B13 |
+            ((offset >> 1) & 0x7ff);
+      } else {
+        encoding = B15 | B14 | B12 |
+            cond_ << 8 | ((offset >> 1) & 0xff);
+      }
+      buffer->Store<int16_t>(location_, encoding);
+    }
+  }
+}
+
+
+uint16_t Thumb2Assembler::EmitCompareAndBranch(Register rn, uint16_t prev, bool n) {
+  uint32_t location = buffer_.Size();
+
+  // This is always unresolved as it must be a forward branch.
+  Emit16(prev);      // Previous link.
+  return AddBranch(n ? Branch::kCompareAndBranchNonZero : Branch::kCompareAndBranchZero,
+      location, rn);
+}
+
+
+// NOTE: this only support immediate offsets, not [rx,ry].
+// TODO: support [rx,ry] instructions.
+void Thumb2Assembler::EmitLoadStore(Condition cond,
+                                    bool load,
+                                    bool byte,
+                                    bool half,
+                                    bool is_signed,
+                                    Register rd,
+                                    const Address& ad) {
+  CHECK_NE(rd, kNoRegister);
+  CheckCondition(cond);
+  bool must_be_32bit = force_32bit_;
+  if (IsHighRegister(rd)) {
+    must_be_32bit = true;
+  }
+
+  Register rn = ad.GetRegister();
+  if (IsHighRegister(rn) && rn != SP) {
+    must_be_32bit = true;
+  }
+
+  if (is_signed || ad.GetOffset() < 0 || ad.GetMode() != Address::Offset) {
+    must_be_32bit = true;
+  }
+
+  int32_t offset = ad.GetOffset();
+
+  // The 16 bit SP relative instruction can only have a 10 bit offset.
+  if (rn == SP && offset > 1024) {
+    must_be_32bit = true;
+  }
+
+  if (byte) {
+    // 5 bit offset, no shift.
+    if (offset > 32) {
+      must_be_32bit = true;
+    }
+  } else if (half) {
+    // 6 bit offset, shifted by 1.
+    if (offset > 64) {
+      must_be_32bit = true;
+    }
+  } else {
+    // 7 bit offset, shifted by 2.
+    if (offset > 128) {
+       must_be_32bit = true;
+     }
+  }
+
+  if (must_be_32bit) {
+    int32_t encoding = B31 | B30 | B29 | B28 | B27 |
+                  (load ? B20 : 0) |
+                  (is_signed ? B24 : 0) |
+                  static_cast<uint32_t>(rd) << 12 |
+                  ad.encodingThumb(2) |
+                  (byte ? 0 : half ? B21 : B22);
+    Emit32(encoding);
+  } else {
+    // 16 bit thumb1.
+    uint8_t opA = 0;
+    bool sp_relative = false;
+
+    if (byte) {
+      opA = 0b0111;
+    } else if (half) {
+      opA = 0b1000;
+    } else {
+      if (rn == SP) {
+        opA = 0b1001;
+        sp_relative = true;
+      } else {
+        opA = 0b0110;
+      }
+    }
+    int16_t encoding = opA << 12 |
+                (load ? B11 : 0);
+
+    CHECK_GE(offset, 0);
+    if (sp_relative) {
+      // SP relative, 10 bit offset.
+      CHECK_LT(offset, 1024);
+      CHECK_EQ((offset & 0b11), 0);
+      encoding |= rd << 8 | offset >> 2;
+    } else {
+      // No SP relative.  The offset is shifted right depending on
+      // the size of the load/store.
+      encoding |= static_cast<uint32_t>(rd);
+
+      if (byte) {
+        // 5 bit offset, no shift.
+        CHECK_LT(offset, 32);
+      } else if (half) {
+        // 6 bit offset, shifted by 1.
+        CHECK_LT(offset, 64);
+        CHECK_EQ((offset & 0b1), 0);
+        offset >>= 1;
+      } else {
+        // 7 bit offset, shifted by 2.
+        CHECK_LT(offset, 128);
+        CHECK_EQ((offset & 0b11), 0);
+        offset >>= 2;
+      }
+      encoding |= rn << 3 | offset  << 6;
+    }
+
+    Emit16(encoding);
+  }
+}
+
+
+void Thumb2Assembler::EmitMultiMemOp(Condition cond,
+                                     BlockAddressMode am,
+                                     bool load,
+                                     Register base,
+                                     RegList regs) {
+  CHECK_NE(base, kNoRegister);
+  CheckCondition(cond);
+  bool must_be_32bit = force_32bit_;
+
+  if ((regs & 0xff00) != 0) {
+    must_be_32bit = true;
+  }
+
+  uint32_t w_bit = am == IA_W || am == DB_W || am == DA_W || am == IB_W;
+  // 16 bit always uses writeback.
+  if (!w_bit) {
+    must_be_32bit = true;
+  }
+
+  if (must_be_32bit) {
+    uint32_t op = 0;
+    switch (am) {
+      case IA:
+      case IA_W:
+        op = 0b01;
+        break;
+      case DB:
+      case DB_W:
+        op = 0b10;
+        break;
+      case DA:
+      case IB:
+      case DA_W:
+      case IB_W:
+        LOG(FATAL) << "LDM/STM mode not supported on thumb: " << am;
+    }
+    if (load) {
+      // Cannot have SP in the list.
+      CHECK_EQ((regs & (1 << SP)), 0);
+    } else {
+      // Cannot have PC or SP in the list.
+      CHECK_EQ((regs & (1 << PC | 1 << SP)), 0);
+    }
+    int32_t encoding = B31 | B30 | B29 | B27 |
+                    (op << 23) |
+                    (load ? B20 : 0) |
+                    base << 16 |
+                    regs |
+                    (w_bit << 21);
+    Emit32(encoding);
+  } else {
+    int16_t encoding = B15 | B14 |
+                    (load ? B11 : 0) |
+                    base << 8 |
+                    regs;
+    Emit16(encoding);
+  }
+}
+
+
+void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x) {
+  uint32_t pc = buffer_.Size();
+  Branch::Type branch_type;
+  if (cond == AL) {
+    if (link) {
+      if (x) {
+        branch_type = Branch::kUnconditionalLinkX;      // BLX.
+      } else {
+        branch_type = Branch::kUnconditionalLink;       // BX.
+      }
+    } else {
+      branch_type = Branch::kUnconditional;             // B.
+    }
+  } else {
+    branch_type = Branch::kConditional;                 // B<cond>.
+  }
+
+  if (label->IsBound()) {
+    Branch::Size size = AddBranch(branch_type, pc, label->Position(), cond);  // Resolved branch.
+
+    // The branch is to a bound label which means that it's a backwards branch.  We know the
+    // current size of it so we can emit the appropriate space.  Note that if it's a 16 bit
+    // branch the size may change if it so happens that other branches change size that change
+    // the distance to the target and that distance puts this branch over the limit for 16 bits.
+    if (size == Branch::k16Bit) {
+      Emit16(0);          // Space for a 16 bit branch.
+    } else {
+      Emit32(0);            // Space for a 32 bit branch.
+    }
+  } else {
+    // Branch is to an unbound label.  Emit space for it.
+    uint16_t branch_id = AddBranch(branch_type, pc, cond);    // Unresolved branch.
+    if (force_32bit_) {
+      Emit16(static_cast<uint16_t>(label->position_));    // Emit current label link.
+      Emit16(0);                   // another 16 bits.
+    } else {
+      Emit16(static_cast<uint16_t>(label->position_));    // Emit current label link.
+    }
+    label->LinkTo(branch_id);           // Link to the branch ID.
+  }
+}
+
+
+void Thumb2Assembler::clz(Register rd, Register rm, Condition cond) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rm, kNoRegister);
+  CheckCondition(cond);
+  CHECK_NE(rd, PC);
+  CHECK_NE(rm, PC);
+  int32_t encoding = B31 | B30 | B29 | B28 | B27 |
+      B25 | B23 | B21 | B20 |
+      static_cast<uint32_t>(rm) << 16 |
+      0xf << 12 |
+      static_cast<uint32_t>(rd) << 8 |
+      B7 |
+      static_cast<uint32_t>(rm);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::movw(Register rd, uint16_t imm16, Condition cond) {
+  CheckCondition(cond);
+  bool must_be_32bit = force_32bit_;
+  if (IsHighRegister(rd)|| imm16 >= 256u) {
+    must_be_32bit = true;
+  }
+
+  if (must_be_32bit) {
+    // Use encoding T3.
+    uint32_t imm4 = (imm16 >> 12) & 0b1111;
+    uint32_t i = (imm16 >> 11) & 0b1;
+    uint32_t imm3 = (imm16 >> 8) & 0b111;
+    uint32_t imm8 = imm16 & 0xff;
+    int32_t encoding = B31 | B30 | B29 | B28 |
+                    B25 | B22 |
+                    static_cast<uint32_t>(rd) << 8 |
+                    i << 26 |
+                    imm4 << 16 |
+                    imm3 << 12 |
+                    imm8;
+    Emit32(encoding);
+  } else {
+    int16_t encoding = B13 | static_cast<uint16_t>(rd) << 8 |
+                imm16;
+    Emit16(encoding);
+  }
+}
+
+
+void Thumb2Assembler::movt(Register rd, uint16_t imm16, Condition cond) {
+  CheckCondition(cond);
+  // Always 32 bits.
+  uint32_t imm4 = (imm16 >> 12) & 0b1111;
+  uint32_t i = (imm16 >> 11) & 0b1;
+  uint32_t imm3 = (imm16 >> 8) & 0b111;
+  uint32_t imm8 = imm16 & 0xff;
+  int32_t encoding = B31 | B30 | B29 | B28 |
+                  B25 | B23 | B22 |
+                  static_cast<uint32_t>(rd) << 8 |
+                  i << 26 |
+                  imm4 << 16 |
+                  imm3 << 12 |
+                  imm8;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::ldrex(Register rt, Register rn, uint16_t imm, Condition cond) {
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rt, kNoRegister);
+  CheckCondition(cond);
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rt, kNoRegister);
+  CheckCondition(cond);
+  CHECK_LT(imm, (1u << 10));
+
+  int32_t encoding = B31 | B30 | B29 | B27 | B22 | B20 |
+      static_cast<uint32_t>(rn) << 16 |
+      static_cast<uint32_t>(rt) << 12 |
+      0xf << 8 |
+      imm >> 2;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::ldrex(Register rt, Register rn, Condition cond) {
+  ldrex(rt, rn, 0, cond);
+}
+
+
+void Thumb2Assembler::strex(Register rd,
+                            Register rt,
+                            Register rn,
+                            uint16_t imm,
+                            Condition cond) {
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rt, kNoRegister);
+  CheckCondition(cond);
+  CHECK_LT(imm, (1u << 10));
+
+  int32_t encoding = B31 | B30 | B29 | B27 | B22 |
+      static_cast<uint32_t>(rn) << 16 |
+      static_cast<uint32_t>(rt) << 12 |
+      static_cast<uint32_t>(rd) << 8 |
+      imm >> 2;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::strex(Register rd,
+                            Register rt,
+                            Register rn,
+                            Condition cond) {
+  strex(rd, rt, rn, 0, cond);
+}
+
+
+void Thumb2Assembler::clrex(Condition cond) {
+  CheckCondition(cond);
+  int32_t encoding = B31 | B30 | B29 | B27 | B28 | B25 | B24 | B23 |
+      B21 | B20 |
+      0xf << 16 |
+      B15 |
+      0xf << 8 |
+      B5 |
+      0xf;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::nop(Condition cond) {
+  CheckCondition(cond);
+  int16_t encoding = B15 | B13 | B12 |
+      B11 | B10 | B9 | B8;
+  Emit16(encoding);
+}
+
+
+void Thumb2Assembler::vmovsr(SRegister sn, Register rt, Condition cond) {
+  CHECK_NE(sn, kNoSRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 |
+                     ((static_cast<int32_t>(sn) >> 1)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sn) & 1)*B7) | B4;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vmovrs(Register rt, SRegister sn, Condition cond) {
+  CHECK_NE(sn, kNoSRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B20 |
+                     ((static_cast<int32_t>(sn) >> 1)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sn) & 1)*B7) | B4;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vmovsrr(SRegister sm, Register rt, Register rt2,
+                              Condition cond) {
+  CHECK_NE(sm, kNoSRegister);
+  CHECK_NE(sm, S31);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sm) & 1)*B5) | B4 |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vmovrrs(Register rt, Register rt2, SRegister sm,
+                              Condition cond) {
+  CHECK_NE(sm, kNoSRegister);
+  CHECK_NE(sm, S31);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CHECK_NE(rt, rt2);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 | B20 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 |
+                     ((static_cast<int32_t>(sm) & 1)*B5) | B4 |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vmovdrr(DRegister dm, Register rt, Register rt2,
+                              Condition cond) {
+  CHECK_NE(dm, kNoDRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 | B8 |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) | B4 |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vmovrrd(Register rt, Register rt2, DRegister dm,
+                              Condition cond) {
+  CHECK_NE(dm, kNoDRegister);
+  CHECK_NE(rt, kNoRegister);
+  CHECK_NE(rt, SP);
+  CHECK_NE(rt, PC);
+  CHECK_NE(rt2, kNoRegister);
+  CHECK_NE(rt2, SP);
+  CHECK_NE(rt2, PC);
+  CHECK_NE(rt, rt2);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B22 | B20 |
+                     (static_cast<int32_t>(rt2)*B16) |
+                     (static_cast<int32_t>(rt)*B12) | B11 | B9 | B8 |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) | B4 |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vldrs(SRegister sd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(sd, kNoSRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 | B20 |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     B11 | B9 | addr.vencoding();
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vstrs(SRegister sd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(static_cast<Register>(addr.encodingArm() & (0xf << kRnShift)), PC);
+  CHECK_NE(sd, kNoSRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     B11 | B9 | addr.vencoding();
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vldrd(DRegister dd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(dd, kNoDRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 | B20 |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     B11 | B9 | B8 | addr.vencoding();
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vstrd(DRegister dd, const Address& ad, Condition cond) {
+  const Address& addr = static_cast<const Address&>(ad);
+  CHECK_NE(static_cast<Register>(addr.encodingArm() & (0xf << kRnShift)), PC);
+  CHECK_NE(dd, kNoDRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B24 |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     B11 | B9 | B8 | addr.vencoding();
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vpushs(SRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, true, false, cond);
+}
+
+
+void Thumb2Assembler::vpushd(DRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, true, true, cond);
+}
+
+
+void Thumb2Assembler::vpops(SRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, false, false, cond);
+}
+
+
+void Thumb2Assembler::vpopd(DRegister reg, int nregs, Condition cond) {
+  EmitVPushPop(static_cast<uint32_t>(reg), nregs, false, true, cond);
+}
+
+
+void Thumb2Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond) {
+  CheckCondition(cond);
+
+  uint32_t D;
+  uint32_t Vd;
+  if (dbl) {
+    // Encoded as D:Vd.
+    D = (reg >> 4) & 1;
+    Vd = reg & 0b1111;
+  } else {
+    // Encoded as Vd:D.
+    D = reg & 1;
+    Vd = (reg >> 1) & 0b1111;
+  }
+  int32_t encoding = B27 | B26 | B21 | B19 | B18 | B16 |
+                    B11 | B9 |
+        (dbl ? B8 : 0) |
+        (push ? B24 : (B23 | B20)) |
+        0b1110 << 28 |
+        nregs << (dbl ? 1 : 0) |
+        D << 22 |
+        Vd << 12;
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::EmitVFPsss(Condition cond, int32_t opcode,
+                                 SRegister sd, SRegister sn, SRegister sm) {
+  CHECK_NE(sd, kNoSRegister);
+  CHECK_NE(sn, kNoSRegister);
+  CHECK_NE(sm, kNoSRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | opcode |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sn) >> 1)*B16) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     ((static_cast<int32_t>(sn) & 1)*B7) |
+                     ((static_cast<int32_t>(sm) & 1)*B5) |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::EmitVFPddd(Condition cond, int32_t opcode,
+                                 DRegister dd, DRegister dn, DRegister dm) {
+  CHECK_NE(dd, kNoDRegister);
+  CHECK_NE(dn, kNoDRegister);
+  CHECK_NE(dm, kNoDRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | B8 | opcode |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dn) & 0xf)*B16) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     ((static_cast<int32_t>(dn) >> 4)*B7) |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::EmitVFPsd(Condition cond, int32_t opcode,
+                                SRegister sd, DRegister dm) {
+  CHECK_NE(sd, kNoSRegister);
+  CHECK_NE(dm, kNoDRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | opcode |
+                     ((static_cast<int32_t>(sd) & 1)*B22) |
+                     ((static_cast<int32_t>(sd) >> 1)*B12) |
+                     ((static_cast<int32_t>(dm) >> 4)*B5) |
+                     (static_cast<int32_t>(dm) & 0xf);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::EmitVFPds(Condition cond, int32_t opcode,
+                                DRegister dd, SRegister sm) {
+  CHECK_NE(dd, kNoDRegister);
+  CHECK_NE(sm, kNoSRegister);
+  CheckCondition(cond);
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+                     B27 | B26 | B25 | B11 | B9 | opcode |
+                     ((static_cast<int32_t>(dd) >> 4)*B22) |
+                     ((static_cast<int32_t>(dd) & 0xf)*B12) |
+                     ((static_cast<int32_t>(sm) & 1)*B5) |
+                     (static_cast<int32_t>(sm) >> 1);
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::vmstat(Condition cond) {  // VMRS APSR_nzcv, FPSCR.
+  CheckCondition(cond);
+  UNIMPLEMENTED(FATAL) << "Unimplemented thumb instruction";
+}
+
+
+void Thumb2Assembler::svc(uint32_t imm8) {
+  CHECK(IsUint(8, imm8)) << imm8;
+  int16_t encoding = B15 | B14 | B12 |
+       B11 | B10 | B9 | B8 |
+       imm8;
+  Emit16(encoding);
+}
+
+
+void Thumb2Assembler::bkpt(uint16_t imm8) {
+  CHECK(IsUint(8, imm8)) << imm8;
+  int16_t encoding = B15 | B13 | B12 |
+      B11 | B10 | B9 |
+      imm8;
+  Emit16(encoding);
+}
+
+// Convert the given IT state to a mask bit given bit 0 of the first
+// condition and a shift position.
+static uint8_t ToItMask(ItState s, uint8_t firstcond0, uint8_t shift) {
+  switch (s) {
+  case kItOmitted: return 1 << shift;
+  case kItThen: return firstcond0 << shift;
+  case kItElse: return !firstcond0 << shift;
+  }
+  return 0;
+}
+
+
+// Set the IT condition in the given position for the given state.  This is used
+// to check that conditional instructions match the preceding IT statement.
+void Thumb2Assembler::SetItCondition(ItState s, Condition cond, uint8_t index) {
+  switch (s) {
+  case kItOmitted: it_conditions_[index] = AL; break;
+  case kItThen: it_conditions_[index] = cond; break;
+  case kItElse:
+    it_conditions_[index] = static_cast<Condition>(static_cast<uint8_t>(cond) ^ 1);
+    break;
+  }
+}
+
+
+void Thumb2Assembler::it(Condition firstcond, ItState i1, ItState i2, ItState i3) {
+  CheckCondition(AL);       // Not allowed in IT block.
+  uint8_t firstcond0 = static_cast<uint8_t>(firstcond) & 1;
+
+  // All conditions to AL.
+  for (uint8_t i = 0; i < 4; ++i) {
+    it_conditions_[i] = AL;
+  }
+
+  SetItCondition(kItThen, firstcond, 0);
+  uint8_t mask = ToItMask(i1, firstcond0, 3);
+  SetItCondition(i1, firstcond, 1);
+
+  if (i1 != kItOmitted) {
+    mask |= ToItMask(i2, firstcond0, 2);
+    SetItCondition(i2, firstcond, 2);
+    if (i2 != kItOmitted) {
+      mask |= ToItMask(i3, firstcond0, 1);
+      SetItCondition(i3, firstcond, 3);
+      if (i3 != kItOmitted) {
+        mask |= 0b0001;
+      }
+    }
+  }
+
+  // Start at first condition.
+  it_cond_index_ = 0;
+  next_condition_ = it_conditions_[0];
+  uint16_t encoding = B15 | B13 | B12 |
+        B11 | B10 | B9 | B8 |
+        firstcond << 4 |
+        mask;
+  Emit16(encoding);
+}
+
+
+void Thumb2Assembler::cbz(Register rn, Label* label) {
+  CheckCondition(AL);
+  if (label->IsBound()) {
+    LOG(FATAL) << "cbz can only be used to branch forwards";
+  } else {
+    uint16_t branchid = EmitCompareAndBranch(rn, static_cast<uint16_t>(label->position_), false);
+    label->LinkTo(branchid);
+  }
+}
+
+
+void Thumb2Assembler::cbnz(Register rn, Label* label) {
+  CheckCondition(AL);
+  if (label->IsBound()) {
+    LOG(FATAL) << "cbnz can only be used to branch forwards";
+  } else {
+    uint16_t branchid = EmitCompareAndBranch(rn, static_cast<uint16_t>(label->position_), true);
+    label->LinkTo(branchid);
+  }
+}
+
+
+void Thumb2Assembler::blx(Register rm, Condition cond) {
+  CHECK_NE(rm, kNoRegister);
+  CheckCondition(cond);
+  int16_t encoding = B14 | B10 | B9 | B8 | B7 | static_cast<int16_t>(rm) << 3;
+  Emit16(encoding);
+}
+
+
+void Thumb2Assembler::bx(Register rm, Condition cond) {
+  CHECK_NE(rm, kNoRegister);
+  CheckCondition(cond);
+  int16_t encoding = B14 | B10 | B9 | B8 | static_cast<int16_t>(rm) << 3;
+  Emit16(encoding);
+}
+
+
+void Thumb2Assembler::Push(Register rd, Condition cond) {
+  str(rd, Address(SP, -kRegisterSize, Address::PreIndex), cond);
+}
+
+
+void Thumb2Assembler::Pop(Register rd, Condition cond) {
+  ldr(rd, Address(SP, kRegisterSize, Address::PostIndex), cond);
+}
+
+
+void Thumb2Assembler::PushList(RegList regs, Condition cond) {
+  stm(DB_W, SP, regs, cond);
+}
+
+
+void Thumb2Assembler::PopList(RegList regs, Condition cond) {
+  ldm(IA_W, SP, regs, cond);
+}
+
+
+void Thumb2Assembler::Mov(Register rd, Register rm, Condition cond) {
+  if (cond != AL || rd != rm) {
+    mov(rd, ShifterOperand(rm), cond);
+  }
+}
+
+
+// A branch has changed size.  Make a hole for it.
+void Thumb2Assembler::MakeHoleForBranch(uint32_t location, uint32_t delta) {
+  // Move the contents of the buffer using: Move(newposition, oldposition)
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  buffer_.Move(location + delta, location);
+}
+
+
+void Thumb2Assembler::Bind(Label* label) {
+  CHECK(!label->IsBound());
+  uint32_t bound_pc = buffer_.Size();
+  std::vector<Branch*> changed_branches;
+
+  while (label->IsLinked()) {
+    uint16_t position = label->Position();                  // Branch id for linked branch.
+    Branch* branch = GetBranch(position);                   // Get the branch at this id.
+    bool changed = branch->Resolve(bound_pc);               // Branch can be resolved now.
+    uint32_t branch_location = branch->GetLocation();
+    uint16_t next = buffer_.Load<uint16_t>(branch_location);       // Get next in chain.
+    if (changed) {
+      MakeHoleForBranch(branch->GetLocation(), 2);
+      if (branch->IsCompareAndBranch()) {
+        // A cbz/cbnz instruction has changed size.  There is no valid encoding for
+        // a 32 bit cbz/cbnz so we need to change this to an instruction pair:
+        // cmp rn, #0
+        // b<eq|ne> target
+        bool n = branch->GetType() == Branch::kCompareAndBranchNonZero;
+        Condition cond = n ? NE : EQ;
+        branch->Move(2);      // Move the branch forward by 2 bytes.
+        branch->ResetTypeAndCondition(Branch::kConditional, cond);
+        branch->ResetSize(Branch::k16Bit);
+
+        // Now add a compare instruction in the place the branch was.
+        int16_t cmp = B13 | B11 | static_cast<int16_t>(branch->GetRegister()) << 8;
+        buffer_.Store<int16_t>(branch_location, cmp);
+
+        // Since have moved made a hole in the code we need to reload the
+        // current pc.
+        bound_pc = buffer_.Size();
+
+        // Now resolve the newly added branch.
+        changed = branch->Resolve(bound_pc);
+        if (changed) {
+          MakeHoleForBranch(branch->GetLocation(), 2);
+          changed_branches.push_back(branch);
+        }
+      } else {
+        changed_branches.push_back(branch);
+      }
+    }
+    label->position_ = next;                                // Move to next.
+  }
+  label->BindTo(bound_pc);
+
+  // Now relocate any changed branches.  Do this until there are no more changes.
+  std::vector<Branch*> branches_to_process = changed_branches;
+  while (branches_to_process.size() != 0) {
+    changed_branches.clear();
+    for (auto& changed_branch : branches_to_process) {
+      for (auto& branch : branches_) {
+        bool changed = branch->Relocate(changed_branch->GetLocation(), 2);
+        if (changed) {
+          changed_branches.push_back(branch);
+        }
+      }
+      branches_to_process = changed_branches;
+    }
+  }
+}
+
+
+void Thumb2Assembler::EmitBranches() {
+  for (auto& branch : branches_) {
+    branch->Emit(&buffer_);
+  }
+}
+
+
+void Thumb2Assembler::Lsl(Register rd, Register rm, uint32_t shift_imm,
+                          Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Do not use Lsl if no shift is wanted.
+  mov(rd, ShifterOperand(rm, LSL, shift_imm), cond);
+}
+
+
+void Thumb2Assembler::Lsr(Register rd, Register rm, uint32_t shift_imm,
+                          Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Do not use Lsr if no shift is wanted.
+  if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
+  mov(rd, ShifterOperand(rm, LSR, shift_imm), cond);
+}
+
+
+void Thumb2Assembler::Asr(Register rd, Register rm, uint32_t shift_imm,
+                          Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Do not use Asr if no shift is wanted.
+  if (shift_imm == 32) shift_imm = 0;  // Comply to UAL syntax.
+  mov(rd, ShifterOperand(rm, ASR, shift_imm), cond);
+}
+
+
+void Thumb2Assembler::Ror(Register rd, Register rm, uint32_t shift_imm,
+                          Condition cond) {
+  CHECK_NE(shift_imm, 0u);  // Use Rrx instruction.
+  mov(rd, ShifterOperand(rm, ROR, shift_imm), cond);
+}
+
+
+void Thumb2Assembler::Rrx(Register rd, Register rm, Condition cond) {
+  mov(rd, ShifterOperand(rm, ROR, 0), cond);
+}
+
+
+int32_t Thumb2Assembler::EncodeBranchOffset(int32_t offset, int32_t inst) {
+  // The offset is off by 4 due to the way the ARM CPUs read PC.
+  offset -= 4;
+  offset >>= 1;
+
+  uint32_t value = 0;
+  // There are two different encodings depending on the value of bit 12.  In one case
+  // intermediate values are calculated using the sign bit.
+  if ((inst & B12) == B12) {
+    // 25 bits of offset.
+    uint32_t signbit = (offset >> 31) & 0x1;
+    uint32_t i1 = (offset >> 22) & 0x1;
+    uint32_t i2 = (offset >> 21) & 0x1;
+    uint32_t imm10 = (offset >> 11) & 0x03ff;
+    uint32_t imm11 = offset & 0x07ff;
+    uint32_t j1 = (i1 ^ signbit) ? 0 : 1;
+    uint32_t j2 = (i2 ^ signbit) ? 0 : 1;
+    value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) |
+                      imm11;
+    // Remove the offset from the current encoding.
+    inst &= ~(0x3ff << 16 | 0x7ff);
+  } else {
+    uint32_t signbit = (offset >> 31) & 0x1;
+    uint32_t imm6 = (offset >> 11) & 0x03f;
+    uint32_t imm11 = offset & 0x07ff;
+    uint32_t j1 = (offset >> 19) & 1;
+    uint32_t j2 = (offset >> 17) & 1;
+    value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm6 << 16) |
+        imm11;
+    // Remove the offset from the current encoding.
+    inst &= ~(0x3f << 16 | 0x7ff);
+  }
+  // Mask out offset bits in current instruction.
+  inst &= ~(B26 | B13 | B11);
+  inst |= value;
+  return inst;
+}
+
+
+int Thumb2Assembler::DecodeBranchOffset(int32_t instr) {
+  int32_t imm32;
+  if ((instr & B12) == B12) {
+    uint32_t S = (instr >> 26) & 1;
+    uint32_t J2 = (instr >> 11) & 1;
+    uint32_t J1 = (instr >> 13) & 1;
+    uint32_t imm10 = (instr >> 16) & 0x3FF;
+    uint32_t imm11 = instr & 0x7FF;
+
+    uint32_t I1 = ~(J1 ^ S) & 1;
+    uint32_t I2 = ~(J2 ^ S) & 1;
+    imm32 = (S << 24) | (I1 << 23) | (I2 << 22) | (imm10 << 12) | (imm11 << 1);
+    imm32 = (imm32 << 8) >> 8;  // sign extend 24 bit immediate.
+  } else {
+    uint32_t S = (instr >> 26) & 1;
+    uint32_t J2 = (instr >> 11) & 1;
+    uint32_t J1 = (instr >> 13) & 1;
+    uint32_t imm6 = (instr >> 16) & 0x3F;
+    uint32_t imm11 = instr & 0x7FF;
+
+    imm32 = (S << 20) | (J2 << 19) | (J1 << 18) | (imm6 << 12) | (imm11 << 1);
+    imm32 = (imm32 << 11) >> 11;  // sign extend 21 bit immediate.
+  }
+  imm32 += 4;
+  return imm32;
+}
+
+
+void Thumb2Assembler::AddConstant(Register rd, int32_t value, Condition cond) {
+  AddConstant(rd, rd, value, cond);
+}
+
+
+void Thumb2Assembler::AddConstant(Register rd, Register rn, int32_t value,
+                                  Condition cond) {
+  if (value == 0) {
+    if (rd != rn) {
+      mov(rd, ShifterOperand(rn), cond);
+    }
+    return;
+  }
+  // We prefer to select the shorter code sequence rather than selecting add for
+  // positive values and sub for negatives ones, which would slightly improve
+  // the readability of generated code for some constants.
+  ShifterOperand shifter_op;
+  if (ShifterOperand::CanHoldThumb(rd, rn, ADD, value, &shifter_op)) {
+    add(rd, rn, shifter_op, cond);
+  } else if (ShifterOperand::CanHoldThumb(rd, rn, SUB, -value, &shifter_op)) {
+    sub(rd, rn, shifter_op, cond);
+  } else {
+    CHECK(rn != IP);
+    if (ShifterOperand::CanHoldThumb(rd, rn, MVN, ~value, &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      add(rd, rn, ShifterOperand(IP), cond);
+    } else if (ShifterOperand::CanHoldThumb(rd, rn, MVN, ~(-value), &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      sub(rd, rn, ShifterOperand(IP), cond);
+    } else {
+      movw(IP, Low16Bits(value), cond);
+      uint16_t value_high = High16Bits(value);
+      if (value_high != 0) {
+        movt(IP, value_high, cond);
+      }
+      add(rd, rn, ShifterOperand(IP), cond);
+    }
+  }
+}
+
+
+void Thumb2Assembler::AddConstantSetFlags(Register rd, Register rn, int32_t value,
+                                          Condition cond) {
+  ShifterOperand shifter_op;
+  if (ShifterOperand::CanHoldThumb(rd, rn, ADD, value, &shifter_op)) {
+    adds(rd, rn, shifter_op, cond);
+  } else if (ShifterOperand::CanHoldThumb(rd, rn, ADD, -value, &shifter_op)) {
+    subs(rd, rn, shifter_op, cond);
+  } else {
+    CHECK(rn != IP);
+    if (ShifterOperand::CanHoldThumb(rd, rn, MVN, ~value, &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      adds(rd, rn, ShifterOperand(IP), cond);
+    } else if (ShifterOperand::CanHoldThumb(rd, rn, MVN, ~(-value), &shifter_op)) {
+      mvn(IP, shifter_op, cond);
+      subs(rd, rn, ShifterOperand(IP), cond);
+    } else {
+      movw(IP, Low16Bits(value), cond);
+      uint16_t value_high = High16Bits(value);
+      if (value_high != 0) {
+        movt(IP, value_high, cond);
+      }
+      adds(rd, rn, ShifterOperand(IP), cond);
+    }
+  }
+}
+
+
+void Thumb2Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
+  ShifterOperand shifter_op;
+  if (ShifterOperand::CanHoldThumb(rd, R0, MOV, value, &shifter_op)) {
+    mov(rd, shifter_op, cond);
+  } else if (ShifterOperand::CanHoldThumb(rd, R0, MVN, ~value, &shifter_op)) {
+    mvn(rd, shifter_op, cond);
+  } else {
+    movw(rd, Low16Bits(value), cond);
+    uint16_t value_high = High16Bits(value);
+    if (value_high != 0) {
+      movt(rd, value_high, cond);
+    }
+  }
+}
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldLoadOffsetThumb.
+void Thumb2Assembler::LoadFromOffset(LoadOperandType type,
+                                     Register reg,
+                                     Register base,
+                                     int32_t offset,
+                                     Condition cond) {
+  if (!Address::CanHoldLoadOffsetThumb(type, offset)) {
+    CHECK(base != IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldLoadOffsetThumb(type, offset));
+  switch (type) {
+    case kLoadSignedByte:
+      ldrsb(reg, Address(base, offset), cond);
+      break;
+    case kLoadUnsignedByte:
+      ldrb(reg, Address(base, offset), cond);
+      break;
+    case kLoadSignedHalfword:
+      ldrsh(reg, Address(base, offset), cond);
+      break;
+    case kLoadUnsignedHalfword:
+      ldrh(reg, Address(base, offset), cond);
+      break;
+    case kLoadWord:
+      ldr(reg, Address(base, offset), cond);
+      break;
+    case kLoadWordPair:
+      ldrd(reg, Address(base, offset), cond);
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldLoadOffsetThumb, as expected by JIT::GuardedLoadFromOffset.
+void Thumb2Assembler::LoadSFromOffset(SRegister reg,
+                                      Register base,
+                                      int32_t offset,
+                                      Condition cond) {
+  if (!Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset));
+  vldrs(reg, Address(base, offset), cond);
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldLoadOffsetThumb, as expected by JIT::GuardedLoadFromOffset.
+void Thumb2Assembler::LoadDFromOffset(DRegister reg,
+                                      Register base,
+                                      int32_t offset,
+                                      Condition cond) {
+  if (!Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset));
+  vldrd(reg, Address(base, offset), cond);
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldStoreOffsetThumb.
+void Thumb2Assembler::StoreToOffset(StoreOperandType type,
+                                    Register reg,
+                                    Register base,
+                                    int32_t offset,
+                                    Condition cond) {
+  if (!Address::CanHoldStoreOffsetThumb(type, offset)) {
+    CHECK(reg != IP);
+    CHECK(base != IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldStoreOffsetThumb(type, offset));
+  switch (type) {
+    case kStoreByte:
+      strb(reg, Address(base, offset), cond);
+      break;
+    case kStoreHalfword:
+      strh(reg, Address(base, offset), cond);
+      break;
+    case kStoreWord:
+      str(reg, Address(base, offset), cond);
+      break;
+    case kStoreWordPair:
+      strd(reg, Address(base, offset), cond);
+      break;
+    default:
+      LOG(FATAL) << "UNREACHABLE";
+  }
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldStoreOffsetThumb, as expected by JIT::GuardedStoreToOffset.
+void Thumb2Assembler::StoreSToOffset(SRegister reg,
+                                     Register base,
+                                     int32_t offset,
+                                     Condition cond) {
+  if (!Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset));
+  vstrs(reg, Address(base, offset), cond);
+}
+
+
+// Implementation note: this method must emit at most one instruction when
+// Address::CanHoldStoreOffsetThumb, as expected by JIT::GuardedStoreSToOffset.
+void Thumb2Assembler::StoreDToOffset(DRegister reg,
+                                     Register base,
+                                     int32_t offset,
+                                     Condition cond) {
+  if (!Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)) {
+    CHECK_NE(base, IP);
+    LoadImmediate(IP, offset, cond);
+    add(IP, IP, ShifterOperand(base), cond);
+    base = IP;
+    offset = 0;
+  }
+  CHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset));
+  vstrd(reg, Address(base, offset), cond);
+}
+
+
+void Thumb2Assembler::MemoryBarrier(ManagedRegister mscratch) {
+  CHECK_EQ(mscratch.AsArm().AsCoreRegister(), R12);
+#if ANDROID_SMP != 0
+  int32_t encoding = 0xf3bf8f5f;  // dmb in T1 encoding.
+  Emit32(encoding);
+#endif
+}
+
+
+void Thumb2Assembler::CompareAndBranchIfZero(Register r, Label* label) {
+  cbz(r, label);
+}
+
+
+void Thumb2Assembler::CompareAndBranchIfNonZero(Register r, Label* label) {
+  cbnz(r, label);
+}
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
new file mode 100644
index 0000000..60b9384
--- /dev/null
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -0,0 +1,685 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
+#define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
+
+#include <vector>
+
+#include "base/logging.h"
+#include "constants_arm.h"
+#include "utils/arm/managed_register_arm.h"
+#include "utils/arm/assembler_arm.h"
+#include "offsets.h"
+#include "utils.h"
+
+namespace art {
+namespace arm {
+
+
+class Thumb2Assembler FINAL : public ArmAssembler {
+ public:
+  Thumb2Assembler() : force_32bit_(false), it_cond_index_(kNoItCondition), next_condition_(AL) {
+  }
+
+  virtual ~Thumb2Assembler() {
+    for (auto& branch : branches_) {
+      delete branch;
+    }
+  }
+
+  bool IsThumb() const OVERRIDE {
+    return true;
+  }
+
+  bool IsForced32Bit() const {
+    return force_32bit_;
+  }
+
+  void FinalizeInstructions(const MemoryRegion& region) OVERRIDE {
+    EmitBranches();
+    Assembler::FinalizeInstructions(region);
+  }
+
+  // Data-processing instructions.
+  void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void eor(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void sub(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void subs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void rsb(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void rsbs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void add(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void adds(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void adc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void sbc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void rsc(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void tst(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void teq(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void orr(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void orrs(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void mov(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void movs(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void bic(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  void mvn(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+  void mvns(Register rd, const ShifterOperand& so, Condition cond = AL) OVERRIDE;
+
+  // Miscellaneous data-processing instructions.
+  void clz(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+  void movw(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
+  void movt(Register rd, uint16_t imm16, Condition cond = AL) OVERRIDE;
+
+  // Multiply instructions.
+  void mul(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
+  void mla(Register rd, Register rn, Register rm, Register ra,
+           Condition cond = AL) OVERRIDE;
+  void mls(Register rd, Register rn, Register rm, Register ra,
+           Condition cond = AL) OVERRIDE;
+  void umull(Register rd_lo, Register rd_hi, Register rn, Register rm,
+             Condition cond = AL) OVERRIDE;
+
+  void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
+  void udiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
+
+  // Load/store instructions.
+  void ldr(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void str(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void strb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void strh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void ldm(BlockAddressMode am, Register base,
+           RegList regs, Condition cond = AL) OVERRIDE;
+  void stm(BlockAddressMode am, Register base,
+           RegList regs, Condition cond = AL) OVERRIDE;
+
+  void ldrex(Register rd, Register rn, Condition cond = AL) OVERRIDE;
+  void strex(Register rd, Register rt, Register rn, Condition cond = AL) OVERRIDE;
+
+  void ldrex(Register rd, Register rn, uint16_t imm, Condition cond = AL);
+  void strex(Register rd, Register rt, Register rn, uint16_t imm, Condition cond = AL);
+
+
+  // Miscellaneous instructions.
+  void clrex(Condition cond = AL) OVERRIDE;
+  void nop(Condition cond = AL) OVERRIDE;
+
+  void bkpt(uint16_t imm16) OVERRIDE;
+  void svc(uint32_t imm24) OVERRIDE;
+
+  // If-then
+  void it(Condition firstcond, ItState i1 = kItOmitted,
+        ItState i2 = kItOmitted, ItState i3 = kItOmitted) OVERRIDE;
+
+  void cbz(Register rn, Label* target) OVERRIDE;
+  void cbnz(Register rn, Label* target) OVERRIDE;
+
+  // Floating point instructions (VFPv3-D16 and VFPv3-D32 profiles).
+  void vmovsr(SRegister sn, Register rt, Condition cond = AL) OVERRIDE;
+  void vmovrs(Register rt, SRegister sn, Condition cond = AL) OVERRIDE;
+  void vmovsrr(SRegister sm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
+  void vmovrrs(Register rt, Register rt2, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmovdrr(DRegister dm, Register rt, Register rt2, Condition cond = AL) OVERRIDE;
+  void vmovrrd(Register rt, Register rt2, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmovs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmovd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+
+  // Returns false if the immediate cannot be encoded.
+  bool vmovs(SRegister sd, float s_imm, Condition cond = AL) OVERRIDE;
+  bool vmovd(DRegister dd, double d_imm, Condition cond = AL) OVERRIDE;
+
+  void vldrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void vstrs(SRegister sd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void vldrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;
+  void vstrd(DRegister dd, const Address& ad, Condition cond = AL) OVERRIDE;
+
+  void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vsubd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmuls(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmuld(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmlad(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vmlss(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vmlsd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vdivs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vdivd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL) OVERRIDE;
+
+  void vabss(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vabsd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vnegs(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vnegd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vsqrts(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vsqrtd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+
+  void vcvtsd(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcvtds(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtis(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtid(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcvtsi(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtdi(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtus(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtud(SRegister sd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcvtsu(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcvtdu(DRegister dd, SRegister sm, Condition cond = AL) OVERRIDE;
+
+  void vcmps(SRegister sd, SRegister sm, Condition cond = AL) OVERRIDE;
+  void vcmpd(DRegister dd, DRegister dm, Condition cond = AL) OVERRIDE;
+  void vcmpsz(SRegister sd, Condition cond = AL) OVERRIDE;
+  void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE;
+  void vmstat(Condition cond = AL) OVERRIDE;  // VMRS APSR_nzcv, FPSCR
+
+  void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+  void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE;
+
+  // Branch instructions.
+  void b(Label* label, Condition cond = AL);
+  void bl(Label* label, Condition cond = AL);
+  void blx(Label* label);
+  void blx(Register rm, Condition cond = AL) OVERRIDE;
+  void bx(Register rm, Condition cond = AL) OVERRIDE;
+
+  void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Lsr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Asr(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Ror(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL);
+  void Rrx(Register rd, Register rm, Condition cond = AL);
+
+  void Push(Register rd, Condition cond = AL) OVERRIDE;
+  void Pop(Register rd, Condition cond = AL) OVERRIDE;
+
+  void PushList(RegList regs, Condition cond = AL) OVERRIDE;
+  void PopList(RegList regs, Condition cond = AL) OVERRIDE;
+
+  void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE;
+
+  void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE;
+  void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE;
+
+  // Macros.
+  // Add signed constant value to rd. May clobber IP.
+  void AddConstant(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+  void AddConstant(Register rd, Register rn, int32_t value,
+                   Condition cond = AL) OVERRIDE;
+  void AddConstantSetFlags(Register rd, Register rn, int32_t value,
+                           Condition cond = AL) OVERRIDE;
+  void AddConstantWithCarry(Register rd, Register rn, int32_t value,
+                            Condition cond = AL) {}
+
+  // Load and Store. May clobber IP.
+  void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
+  void LoadSImmediate(SRegister sd, float value, Condition cond = AL) {}
+  void LoadDImmediate(DRegister dd, double value,
+                      Register scratch, Condition cond = AL) {}
+  void MarkExceptionHandler(Label* label) OVERRIDE;
+  void LoadFromOffset(LoadOperandType type,
+                      Register reg,
+                      Register base,
+                      int32_t offset,
+                      Condition cond = AL) OVERRIDE;
+  void StoreToOffset(StoreOperandType type,
+                     Register reg,
+                     Register base,
+                     int32_t offset,
+                     Condition cond = AL) OVERRIDE;
+  void LoadSFromOffset(SRegister reg,
+                       Register base,
+                       int32_t offset,
+                       Condition cond = AL) OVERRIDE;
+  void StoreSToOffset(SRegister reg,
+                      Register base,
+                      int32_t offset,
+                      Condition cond = AL) OVERRIDE;
+  void LoadDFromOffset(DRegister reg,
+                       Register base,
+                       int32_t offset,
+                       Condition cond = AL) OVERRIDE;
+  void StoreDToOffset(DRegister reg,
+                      Register base,
+                      int32_t offset,
+                      Condition cond = AL) OVERRIDE;
+
+
+  static bool IsInstructionForExceptionHandling(uword pc);
+
+  // Emit data (e.g. encoded instruction or immediate) to the.
+  // instruction stream.
+  void Emit32(int32_t value);     // Emit a 32 bit instruction in thumb format.
+  void Emit16(int16_t value);     // Emit a 16 bit instruction in little endian format.
+  void Bind(Label* label) OVERRIDE;
+
+  void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
+
+  // Force the assembler to generate 32 bit instructions.
+  void Force32Bit() {
+    force_32bit_ = true;
+  }
+
+ private:
+  // Emit a single 32 or 16 bit data processing instruction.
+  void EmitDataProcessing(Condition cond,
+                  Opcode opcode,
+                  int set_cc,
+                  Register rn,
+                  Register rd,
+                  const ShifterOperand& so);
+
+  // Must the instruction be 32 bits or can it possibly be encoded
+  // in 16 bits?
+  bool Is32BitDataProcessing(Condition cond,
+                  Opcode opcode,
+                  int set_cc,
+                  Register rn,
+                  Register rd,
+                  const ShifterOperand& so);
+
+  // Emit a 32 bit data processing instruction.
+  void Emit32BitDataProcessing(Condition cond,
+                  Opcode opcode,
+                  int set_cc,
+                  Register rn,
+                  Register rd,
+                  const ShifterOperand& so);
+
+  // Emit a 16 bit data processing instruction.
+  void Emit16BitDataProcessing(Condition cond,
+                  Opcode opcode,
+                  int set_cc,
+                  Register rn,
+                  Register rd,
+                  const ShifterOperand& so);
+
+  void Emit16BitAddSub(Condition cond,
+                       Opcode opcode,
+                       int set_cc,
+                       Register rn,
+                       Register rd,
+                       const ShifterOperand& so);
+
+  uint16_t EmitCompareAndBranch(Register rn, uint16_t prev, bool n);
+
+  void EmitLoadStore(Condition cond,
+                 bool load,
+                 bool byte,
+                 bool half,
+                 bool is_signed,
+                 Register rd,
+                 const Address& ad);
+
+  void EmitMemOpAddressMode3(Condition cond,
+                             int32_t mode,
+                             Register rd,
+                             const Address& ad);
+
+  void EmitMultiMemOp(Condition cond,
+                      BlockAddressMode am,
+                      bool load,
+                      Register base,
+                      RegList regs);
+
+  void EmitMulOp(Condition cond,
+                 int32_t opcode,
+                 Register rd,
+                 Register rn,
+                 Register rm,
+                 Register rs);
+
+  void EmitVFPsss(Condition cond,
+                  int32_t opcode,
+                  SRegister sd,
+                  SRegister sn,
+                  SRegister sm);
+
+  void EmitVFPddd(Condition cond,
+                  int32_t opcode,
+                  DRegister dd,
+                  DRegister dn,
+                  DRegister dm);
+
+  void EmitVFPsd(Condition cond,
+                 int32_t opcode,
+                 SRegister sd,
+                 DRegister dm);
+
+  void EmitVFPds(Condition cond,
+                 int32_t opcode,
+                 DRegister dd,
+                 SRegister sm);
+
+  void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond);
+
+  void EmitBranch(Condition cond, Label* label, bool link, bool x);
+  static int32_t EncodeBranchOffset(int32_t offset, int32_t inst);
+  static int DecodeBranchOffset(int32_t inst);
+  int32_t EncodeTstOffset(int offset, int32_t inst);
+  int DecodeTstOffset(int32_t inst);
+
+  bool IsLowRegister(Register r) {
+    return r < R8;
+  }
+
+  bool IsHighRegister(Register r) {
+     return r >= R8;
+  }
+
+  bool force_32bit_;      // Force the assembler to use 32 bit thumb2 instructions.
+
+  // IfThen conditions.  Used to check that conditional instructions match the preceding IT.
+  Condition it_conditions_[4];
+  uint8_t it_cond_index_;
+  Condition next_condition_;
+
+  void SetItCondition(ItState s, Condition cond, uint8_t index);
+
+  void CheckCondition(Condition cond) {
+    CHECK_EQ(cond, next_condition_);
+
+    // Move to the next condition if there is one.
+    if (it_cond_index_ < 3) {
+      ++it_cond_index_;
+      next_condition_ = it_conditions_[it_cond_index_];
+    } else {
+      next_condition_ = AL;
+    }
+  }
+
+  void CheckConditionLastIt(Condition cond) {
+    if (it_cond_index_ < 3) {
+      // Check that the next condition is AL.  This means that the
+      // current condition is the last in the IT block.
+      CHECK_EQ(it_conditions_[it_cond_index_ + 1], AL);
+    }
+    CheckCondition(cond);
+  }
+
+  // Branches.
+  //
+  // The thumb2 architecture allows branches to be either 16 or 32 bit instructions.  This
+  // depends on both the type of branch and the offset to which it is branching.  When
+  // generating code for branches we don't know the size before hand (if the branch is
+  // going forward, because we haven't seen the target address yet), so we need to assume
+  // that it is going to be one of 16 or 32 bits.  When we know the target (the label is 'bound')
+  // we can determine the actual size of the branch.  However, if we had guessed wrong before
+  // we knew the target there will be no room in the instruction sequence for the new
+  // instruction (assume that we never decrease the size of a branch).
+  //
+  // To handle this, we keep a record of every branch in the program.  The actual instruction
+  // encoding for these is delayed until we know the final size of every branch.  When we
+  // bind a label to a branch (we then know the target address) we determine if the branch
+  // has changed size.  If it has we need to move all the instructions in the buffer after
+  // the branch point forward by the change in size of the branch.  This will create a gap
+  // in the code big enough for the new branch encoding.  However, since we have moved
+  // a chunk of code we need to relocate the branches in that code to their new address.
+  //
+  // Creating a hole in the code for the new branch encoding might cause another branch that was
+  // 16 bits to become 32 bits, so we need to find this in another pass.
+  //
+  // We also need to deal with a cbz/cbnz instruction that becomes too big for its offset
+  // range.  We do this by converting it to two instructions:
+  //     cmp Rn, #0
+  //     b<cond> target
+  // But we also need to handle the case where the conditional branch is out of range and
+  // becomes a 32 bit conditional branch.
+  //
+  // All branches have a 'branch id' which is a 16 bit unsigned number used to identify
+  // the branch.  Unresolved labels use the branch id to link to the next unresolved branch.
+
+  class Branch {
+   public:
+    // Branch type.
+    enum Type {
+      kUnconditional,             // B.
+      kConditional,               // B<cond>.
+      kCompareAndBranchZero,      // cbz.
+      kCompareAndBranchNonZero,   // cbnz.
+      kUnconditionalLink,         // BL.
+      kUnconditionalLinkX,        // BLX.
+      kUnconditionalX             // BX.
+    };
+
+    // Calculated size of branch instruction based on type and offset.
+    enum Size {
+      k16Bit,
+      k32Bit
+    };
+
+    // Unresolved branch possibly with a condition.
+    Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, Condition cond = AL) :
+        assembler_(assembler), type_(type), location_(location),
+        target_(kUnresolved),
+        cond_(cond), rn_(R0) {
+      CHECK(!IsCompareAndBranch());
+      size_ = CalculateSize();
+    }
+
+    // Unresolved compare-and-branch instruction with a register.
+    Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, Register rn) :
+        assembler_(assembler), type_(type), location_(location),
+        target_(kUnresolved), cond_(AL), rn_(rn) {
+      CHECK(IsCompareAndBranch());
+      size_ = CalculateSize();
+    }
+
+    // Resolved branch (can't be compare-and-branch) with a target and possibly a condition.
+    Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, uint32_t target,
+           Condition cond = AL) :
+           assembler_(assembler), type_(type), location_(location),
+           target_(target), cond_(cond), rn_(R0) {
+      CHECK(!IsCompareAndBranch());
+      // Resolved branch.
+      size_ = CalculateSize();
+    }
+
+    bool IsCompareAndBranch() const {
+      return type_ == kCompareAndBranchNonZero || type_ == kCompareAndBranchZero;
+    }
+
+    // Resolve a branch when the target is known.  If this causes the
+    // size of the branch to change return true.  Otherwise return false.
+    bool Resolve(uint32_t target) {
+      target_ = target;
+      Size newsize = CalculateSize();
+      if (size_ != newsize) {
+        size_ = newsize;
+        return true;
+      }
+      return false;
+    }
+
+    // Move a cbz/cbnz branch.  This is always forward.
+    void Move(int32_t delta) {
+      CHECK(IsCompareAndBranch());
+      CHECK_GT(delta, 0);
+      location_ += delta;
+      target_ += delta;
+    }
+
+    // Relocate a branch by a given delta.  This changed the location and
+    // target if they need to be changed.  It also recalculates the
+    // size of the branch instruction.  It returns true if the branch
+    // has changed size.
+    bool Relocate(uint32_t oldlocation, int32_t delta) {
+      if (location_ > oldlocation) {
+        location_ += delta;
+      }
+      if (target_ != kUnresolved) {
+        if (target_ > oldlocation) {
+          target_ += delta;
+        }
+      } else {
+        return false;       // Don't know the size yet.
+      }
+
+      // Calculate the new size.
+      Size newsize = CalculateSize();
+      if (size_ != newsize) {
+        size_ = newsize;
+        return true;
+      }
+      return false;
+    }
+
+    Size GetSize() const {
+      return size_;
+    }
+
+    Type GetType() const {
+      return type_;
+    }
+
+    uint32_t GetLocation() const {
+      return location_;
+    }
+
+    // Emit the branch instruction into the assembler buffer.  This does the
+    // encoding into the thumb instruction.
+    void Emit(AssemblerBuffer* buffer) const;
+
+    // Reset the type and condition to those given.  This used for
+    // cbz/cbnz instructions when they are converted to cmp/b<cond>
+    void ResetTypeAndCondition(Type type, Condition cond) {
+      CHECK(IsCompareAndBranch());
+      CHECK(cond == EQ || cond == NE);
+      type_ = type;
+      cond_ = cond;
+    }
+
+    Register GetRegister() const {
+      return rn_;
+    }
+
+    void ResetSize(Size size) {
+      size_ = size;
+    }
+
+   private:
+    // Calculate the size of the branch instruction based on its type and offset.
+    Size CalculateSize() const {
+      if (target_ == kUnresolved) {
+        if (assembler_->IsForced32Bit() && (type_ == kUnconditional || type_ == kConditional)) {
+          return k32Bit;
+        }
+        return k16Bit;
+      }
+      int32_t delta = target_ - location_ - 4;
+      if (delta < 0) {
+        delta = -delta;
+      }
+      switch (type_) {
+        case kUnconditional:
+          if (assembler_->IsForced32Bit() || delta >= (1 << 11)) {
+            return k32Bit;
+          } else {
+            return k16Bit;
+          }
+        case kConditional:
+          if (assembler_->IsForced32Bit() || delta >= (1 << 8)) {
+            return k32Bit;
+          } else {
+            return k16Bit;
+          }
+        case kCompareAndBranchZero:
+        case kCompareAndBranchNonZero:
+          if (delta >= (1 << 7)) {
+            return k32Bit;      // Will cause this branch to become invalid.
+          }
+          return k16Bit;
+
+        case kUnconditionalX:
+        case kUnconditionalLinkX:
+          return k16Bit;
+        case kUnconditionalLink:
+          return k32Bit;
+      }
+      LOG(FATAL) << "Cannot reach";
+      return k16Bit;
+    }
+
+    static constexpr uint32_t kUnresolved = 0xffffffff;     // Value for target_ for unresolved.
+    const Thumb2Assembler* assembler_;
+    Type type_;
+    uint32_t location_;     // Offset into assembler buffer in bytes.
+    uint32_t target_;       // Offset into assembler buffer in bytes.
+    Size size_;
+    Condition cond_;
+    const Register rn_;
+  };
+
+  std::vector<Branch*> branches_;
+
+  // Add a resolved branch and return its size.
+  Branch::Size AddBranch(Branch::Type type, uint32_t location, uint32_t target,
+                         Condition cond = AL) {
+    branches_.push_back(new Branch(this, type, location, target, cond));
+    return branches_[branches_.size()-1]->GetSize();
+  }
+
+  // Add a compare and branch (with a register) and return its id.
+  uint16_t AddBranch(Branch::Type type, uint32_t location, Register rn) {
+    branches_.push_back(new Branch(this, type, location, rn));
+    return branches_.size() - 1;
+  }
+
+  // Add an unresolved branch and return its id.
+  uint16_t AddBranch(Branch::Type type, uint32_t location, Condition cond = AL) {
+    branches_.push_back(new Branch(this, type, location, cond));
+    return branches_.size() - 1;
+  }
+
+  Branch* GetBranch(uint16_t branchid) {
+    if (branchid >= branches_.size()) {
+      return nullptr;
+    }
+    return branches_[branchid];
+  }
+
+  void EmitBranches();
+  void MakeHoleForBranch(uint32_t location, uint32_t size);
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
diff --git a/compiler/utils/arm/constants_arm.h b/compiler/utils/arm/constants_arm.h
index 058f945..3e4cd43 100644
--- a/compiler/utils/arm/constants_arm.h
+++ b/compiler/utils/arm/constants_arm.h
@@ -155,7 +155,8 @@
   LSR = 1,  // Logical shift right
   ASR = 2,  // Arithmetic shift right
   ROR = 3,  // Rotate right
-  kMaxShift = 4
+  RRX = 4,  // Rotate right with extend.
+  kMaxShift
 };
 
 
@@ -210,7 +211,6 @@
   kBranchOffsetMask = 0x00ffffff
 };
 
-
 // Size (in bytes) of registers.
 const int kRegisterSize = 4;