Use the thumb2 assembler for the optimizing compiler.

Change-Id: I2b058f4433504dc3299c06f5cb0b5ab12f34aa82
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 73c2d48..9d17fb1 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -97,7 +97,8 @@
     : CodeGenerator(graph, kNumberOfRegIds),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
-      move_resolver_(graph->GetArena(), this) {}
+      move_resolver_(graph->GetArena(), this),
+      assembler_(true) {}
 
 size_t CodeGeneratorARM::FrameEntrySpillSize() const {
   return kNumberOfPushedRegistersAtEntry * kArmWordSize;
@@ -948,9 +949,11 @@
              ShifterOperand(right.AsRegisterPairHigh()));  // Signed compare.
       __ b(&less, LT);
       __ b(&greater, GT);
+      // Do LoadImmediate before any `cmp`, as LoadImmediate might affect
+      // the status flags.
+      __ LoadImmediate(output, 0);
       __ cmp(left.AsRegisterPairLow(),
              ShifterOperand(right.AsRegisterPairLow()));  // Unsigned compare.
-      __ LoadImmediate(output, 0);
       __ b(&done, EQ);
       __ b(&less, CC);
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 1b5974f..3cc16aa 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -20,7 +20,7 @@
 #include "code_generator.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
-#include "utils/arm/assembler_arm32.h"
+#include "utils/arm/assembler_thumb2.h"
 
 namespace art {
 namespace arm {
@@ -172,7 +172,7 @@
   }
 
   virtual InstructionSet GetInstructionSet() const OVERRIDE {
-    return InstructionSet::kArm;
+    return InstructionSet::kThumb2;
   }
 
  private:
@@ -184,7 +184,7 @@
   LocationsBuilderARM location_builder_;
   InstructionCodeGeneratorARM instruction_visitor_;
   ParallelMoveResolverARM move_resolver_;
-  Arm32Assembler assembler_;
+  Thumb2Assembler assembler_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 7ec0c84..bfdc30f 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -48,10 +48,17 @@
 };
 
 #if defined(__i386__) || defined(__arm__) || defined(__x86_64__)
-static void Run(const InternalCodeAllocator& allocator, bool has_result, int32_t expected) {
+static void Run(const InternalCodeAllocator& allocator,
+                const CodeGenerator& codegen,
+                bool has_result,
+                int32_t expected) {
   typedef int32_t (*fptr)();
   CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
   fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
+  if (codegen.GetInstructionSet() == kThumb2) {
+    // For thumb we need the bottom bit set.
+    f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
+  }
   int32_t result = f();
   if (has_result) {
     CHECK_EQ(result, expected);
@@ -71,19 +78,19 @@
   CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, kX86);
   codegen->CompileBaseline(&allocator);
 #if defined(__i386__)
-  Run(allocator, has_result, expected);
+  Run(allocator, *codegen, has_result, expected);
 #endif
 
   codegen = CodeGenerator::Create(&arena, graph, kArm);
   codegen->CompileBaseline(&allocator);
 #if defined(__arm__)
-  Run(allocator, has_result, expected);
+  Run(allocator, *codegen, has_result, expected);
 #endif
 
   codegen = CodeGenerator::Create(&arena, graph, kX86_64);
   codegen->CompileBaseline(&allocator);
 #if defined(__x86_64__)
-  Run(allocator, has_result, expected);
+  Run(allocator, *codegen, has_result, expected);
 #endif
 }
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index b621e51..8a5077b 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -79,13 +79,14 @@
                                                jobject class_loader,
                                                const DexFile& dex_file) const {
   InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet();
-  // The optimizing compiler currently does not have a Thumb2 assembler.
-  if (instruction_set == kThumb2) {
-    instruction_set = kArm;
+  // Always use the thumb2 assembler: some runtime functionality (like implicit stack
+  // overflow checks) assume thumb2.
+  if (instruction_set == kArm) {
+    instruction_set = kThumb2;
   }
 
   // Do not attempt to compile on architectures we do not support.
-  if (instruction_set != kX86 && instruction_set != kX86_64 && instruction_set != kArm) {
+  if (instruction_set != kX86 && instruction_set != kX86_64 && instruction_set != kThumb2) {
     return nullptr;
   }