Merge "ART: Allow to compile interpret-only mips64 files"
diff --git a/compiler/Android.mk b/compiler/Android.mk
index ad9b266..f2f4550 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -98,6 +98,8 @@
 	optimizing/gvn.cc \
 	optimizing/inliner.cc \
 	optimizing/instruction_simplifier.cc \
+	optimizing/intrinsics.cc \
+	optimizing/intrinsics_x86_64.cc \
 	optimizing/locations.cc \
 	optimizing/nodes.cc \
 	optimizing/optimization.cc \
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 7df71f5..96f8e0c 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -164,7 +164,7 @@
                                               compiler_kind, instruction_set,
                                               instruction_set_features_.get(),
                                               true, new std::set<std::string>, nullptr,
-                                              2, true, true, timer_.get(), -1, ""));
+                                              2, true, true, "", timer_.get(), -1, ""));
   }
   // We typically don't generate an image in unit tests, disable this optimization by default.
   compiler_driver_->SetSupportBootImageFixup(false);
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 3039852..84c0d93 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -293,9 +293,9 @@
     { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, kInlineIntrinsic, { d } } }
 
     INTRINSIC(JavaLangDouble, DoubleToRawLongBits, D_J, kIntrinsicDoubleCvt, 0),
-    INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, 0),
+    INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, kIntrinsicFlagToFloatingPoint),
     INTRINSIC(JavaLangFloat, FloatToRawIntBits, F_I, kIntrinsicFloatCvt, 0),
-    INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, 0),
+    INTRINSIC(JavaLangFloat, IntBitsToFloat, I_F, kIntrinsicFloatCvt, kIntrinsicFlagToFloatingPoint),
 
     INTRINSIC(JavaLangInteger, ReverseBytes, I_I, kIntrinsicReverseBytes, k32),
     INTRINSIC(JavaLangLong, ReverseBytes, J_J, kIntrinsicReverseBytes, k64),
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index c48833b..4265ae1 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -482,7 +482,7 @@
   if (!unconditional) {
     hop_target = RawLIR(dalvik_offset, kPseudoTargetLabel);
     LIR* hop_branch = RawLIR(dalvik_offset, opcode, lir->operands[0],
-                            lir->operands[1], 0, 0, 0, hop_target);
+                             lir->operands[1], 0, 0, 0, hop_target);
     InsertLIRBefore(lir, hop_branch);
   }
   LIR* curr_pc = RawLIR(dalvik_offset, kMipsCurrPC);
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 8f976df..9c3ce7b 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -216,7 +216,8 @@
     void ConvertShortToLongBranch(LIR* lir);
     RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
                           RegLocation rl_src2, bool is_div, int flags) OVERRIDE;
-    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) OVERRIDE;
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div)
+        OVERRIDE;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 0778c3b..aabef60 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -172,7 +172,7 @@
   if (r_dest.IsFloat() || r_src.IsFloat())
     return OpFpRegCopy(r_dest, r_src);
   LIR* res = RawLIR(current_dalvik_offset_, kMipsMove,
-            r_dest.GetReg(), r_src.GetReg());
+                    r_dest.GetReg(), r_src.GetReg());
   if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
     res->flags.is_nop = true;
   }
@@ -194,7 +194,7 @@
       if (src_fp) {
         OpRegCopy(r_dest, r_src);
       } else {
-         /* note the operands are swapped for the mtc1 instr */
+        /* note the operands are swapped for the mtc1 instr */
         NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
         NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
       }
@@ -240,7 +240,7 @@
 }
 
 RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStorage reg2,
-                                    bool is_div) {
+                                   bool is_div) {
   NewLIR2(kMipsDiv, reg1.GetReg(), reg2.GetReg());
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (is_div) {
@@ -252,7 +252,7 @@
 }
 
 RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit,
-                                       bool is_div) {
+                                      bool is_div) {
   RegStorage t_reg = AllocTemp();
   NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit);
   NewLIR2(kMipsDiv, reg1.GetReg(), t_reg.GetReg());
@@ -501,7 +501,7 @@
  * Generate array load
  */
 void MipsMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
-                          RegLocation rl_index, RegLocation rl_dest, int scale) {
+                              RegLocation rl_index, RegLocation rl_dest, int scale) {
   RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
@@ -570,7 +570,7 @@
  *
  */
 void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
-                          RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
+                              RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
   RegisterClass reg_class = RegClassBySize(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
@@ -632,7 +632,7 @@
   } else {
     rl_src = LoadValue(rl_src, reg_class);
     if (needs_range_check) {
-       GenArrayBoundsCheck(rl_index.reg, reg_len);
+      GenArrayBoundsCheck(rl_index.reg, reg_len);
       FreeTemp(reg_len);
     }
     StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index c22ba04..c819903 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -89,9 +89,9 @@
 
 // Convert k64BitSolo into k64BitPair
 RegStorage MipsMir2Lir::Solo64ToPair64(RegStorage reg) {
-    DCHECK(reg.IsDouble());
-    int reg_num = (reg.GetRegNum() & ~1) | RegStorage::kFloatingPoint;
-    return RegStorage(RegStorage::k64BitPair, reg_num, reg_num + 1);
+  DCHECK(reg.IsDouble());
+  int reg_num = (reg.GetRegNum() & ~1) | RegStorage::kFloatingPoint;
+  return RegStorage(RegStorage::k64BitPair, reg_num, reg_num + 1);
 }
 
 // Return a target-dependent special register.
@@ -223,78 +223,78 @@
       if (nc == '!') {
         strcpy(tbuf, "!");
       } else {
-         DCHECK_LT(fmt, fmt_end);
-         DCHECK_LT(static_cast<unsigned>(nc-'0'), 4u);
-         operand = lir->operands[nc-'0'];
-         switch (*fmt++) {
-           case 'b':
-             strcpy(tbuf, "0000");
-             for (i = 3; i >= 0; i--) {
-               tbuf[i] += operand & 1;
-               operand >>= 1;
-             }
-             break;
-           case 's':
-             snprintf(tbuf, arraysize(tbuf), "$f%d", RegStorage::RegNum(operand));
-             break;
-           case 'S':
-             DCHECK_EQ(RegStorage::RegNum(operand) & 1, 0);
-             snprintf(tbuf, arraysize(tbuf), "$f%d", RegStorage::RegNum(operand));
-             break;
-           case 'h':
-             snprintf(tbuf, arraysize(tbuf), "%04x", operand);
-             break;
-           case 'M':
-           case 'd':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand);
-             break;
-           case 'D':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand+1);
-             break;
-           case 'E':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand*4);
-             break;
-           case 'F':
-             snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
-             break;
-           case 't':
-             snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
-                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
-                 lir->target);
-             break;
-           case 'T':
-             snprintf(tbuf, arraysize(tbuf), "0x%08x", operand << 2);
-             break;
-           case 'u': {
-             int offset_1 = lir->operands[0];
-             int offset_2 = NEXT_LIR(lir)->operands[0];
-             uintptr_t target =
-                 (((reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4) & ~3) +
-                 (offset_1 << 21 >> 9) + (offset_2 << 1)) & 0xfffffffc;
-             snprintf(tbuf, arraysize(tbuf), "%p", reinterpret_cast<void*>(target));
-             break;
+        DCHECK_LT(fmt, fmt_end);
+        DCHECK_LT(static_cast<unsigned>(nc-'0'), 4u);
+        operand = lir->operands[nc-'0'];
+        switch (*fmt++) {
+          case 'b':
+            strcpy(tbuf, "0000");
+            for (i = 3; i >= 0; i--) {
+              tbuf[i] += operand & 1;
+              operand >>= 1;
+            }
+            break;
+          case 's':
+            snprintf(tbuf, arraysize(tbuf), "$f%d", RegStorage::RegNum(operand));
+            break;
+          case 'S':
+            DCHECK_EQ(RegStorage::RegNum(operand) & 1, 0);
+            snprintf(tbuf, arraysize(tbuf), "$f%d", RegStorage::RegNum(operand));
+            break;
+          case 'h':
+            snprintf(tbuf, arraysize(tbuf), "%04x", operand);
+            break;
+          case 'M':
+          case 'd':
+            snprintf(tbuf, arraysize(tbuf), "%d", operand);
+            break;
+          case 'D':
+            snprintf(tbuf, arraysize(tbuf), "%d", operand+1);
+            break;
+          case 'E':
+            snprintf(tbuf, arraysize(tbuf), "%d", operand*4);
+            break;
+          case 'F':
+            snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
+            break;
+          case 't':
+            snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
+                     reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
+                     lir->target);
+            break;
+          case 'T':
+            snprintf(tbuf, arraysize(tbuf), "0x%08x", operand << 2);
+            break;
+          case 'u': {
+            int offset_1 = lir->operands[0];
+            int offset_2 = NEXT_LIR(lir)->operands[0];
+            uintptr_t target =
+                (((reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4) & ~3) +
+                    (offset_1 << 21 >> 9) + (offset_2 << 1)) & 0xfffffffc;
+            snprintf(tbuf, arraysize(tbuf), "%p", reinterpret_cast<void*>(target));
+            break;
           }
 
-           /* Nothing to print for BLX_2 */
-           case 'v':
-             strcpy(tbuf, "see above");
-             break;
-           case 'r':
-             DCHECK(operand >= 0 && operand < MIPS_REG_COUNT);
-             strcpy(tbuf, mips_reg_name[operand]);
-             break;
-           case 'N':
-             // Placeholder for delay slot handling
-             strcpy(tbuf, ";  nop");
-             break;
-           default:
-             strcpy(tbuf, "DecodeError");
-             break;
-         }
-         buf += tbuf;
+          /* Nothing to print for BLX_2 */
+          case 'v':
+            strcpy(tbuf, "see above");
+            break;
+          case 'r':
+            DCHECK(operand >= 0 && operand < MIPS_REG_COUNT);
+            strcpy(tbuf, mips_reg_name[operand]);
+            break;
+          case 'N':
+            // Placeholder for delay slot handling
+            strcpy(tbuf, ";  nop");
+            break;
+          default:
+            strcpy(tbuf, "DecodeError");
+            break;
+        }
+        buf += tbuf;
       }
     } else {
-       buf += *fmt++;
+      buf += *fmt++;
     }
   }
   return buf;
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index adb9270..15fc69d 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -228,17 +228,17 @@
       }
       break;
     case kOpLsl:
-        DCHECK(value >= 0 && value <= 31);
-        opcode = kMipsSll;
-        break;
+      DCHECK(value >= 0 && value <= 31);
+      opcode = kMipsSll;
+      break;
     case kOpLsr:
-        DCHECK(value >= 0 && value <= 31);
-        opcode = kMipsSrl;
-        break;
+      DCHECK(value >= 0 && value <= 31);
+      opcode = kMipsSrl;
+      break;
     case kOpAsr:
-        DCHECK(value >= 0 && value <= 31);
-        opcode = kMipsSra;
-        break;
+      DCHECK(value >= 0 && value <= 31);
+      opcode = kMipsSra;
+      break;
     case kOpAnd:
       if (IS_UIMM16((value))) {
         opcode = kMipsAndi;
@@ -324,7 +324,7 @@
       }
       return res;
     case kOp2Char:
-       return NewLIR3(kMipsAndi, r_dest_src1.GetReg(), r_src2.GetReg(), 0xFFFF);
+      return NewLIR3(kMipsAndi, r_dest_src1.GetReg(), r_src2.GetReg(), 0xFFFF);
     default:
       LOG(FATAL) << "Bad case in OpRegReg";
       UNREACHABLE();
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 968c77e..56bed39 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -339,7 +339,8 @@
                                const InstructionSetFeatures* instruction_set_features,
                                bool image, std::set<std::string>* image_classes,
                                std::set<std::string>* compiled_classes, size_t thread_count,
-                               bool dump_stats, bool dump_passes, CumulativeLogger* timer,
+                               bool dump_stats, bool dump_passes,
+                               const std::string& dump_cfg_file_name, CumulativeLogger* timer,
                                int swap_fd, const std::string& profile_file)
     : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)),
       swap_space_allocator_(new SwapAllocator<void>(swap_space_.get())),
@@ -361,6 +362,7 @@
       stats_(new AOTCompilationStats),
       dump_stats_(dump_stats),
       dump_passes_(dump_passes),
+      dump_cfg_file_name_(dump_cfg_file_name),
       timings_logger_(timer),
       compiler_context_(nullptr),
       support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64),
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 7ddc32c..11b4329 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -97,6 +97,7 @@
                           bool image, std::set<std::string>* image_classes,
                           std::set<std::string>* compiled_classes,
                           size_t thread_count, bool dump_stats, bool dump_passes,
+                          const std::string& dump_cfg_file_name,
                           CumulativeLogger* timer, int swap_fd,
                           const std::string& profile_file);
 
@@ -371,6 +372,10 @@
     return dump_passes_;
   }
 
+  const std::string& GetDumpCfgFileName() const {
+    return dump_cfg_file_name_;
+  }
+
   CumulativeLogger* GetTimingsLogger() const {
     return timings_logger_;
   }
@@ -542,6 +547,7 @@
 
   bool dump_stats_;
   const bool dump_passes_;
+  const std::string& dump_cfg_file_name_;
 
   CumulativeLogger* const timings_logger_;
 
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 281e3fe..f513ea8 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -973,6 +973,9 @@
 }
 
 void JniCompilerTest::UpcallArgumentTypeChecking_InstanceImpl() {
+  // This will lead to error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   SetUpForTest(false, "instanceMethodThatShouldTakeClass", "(ILjava/lang/Class;)V",
                reinterpret_cast<void*>(&Java_MyClassNatives_instanceMethodThatShouldTakeClass));
 
@@ -985,6 +988,9 @@
 JNI_TEST(UpcallArgumentTypeChecking_Instance)
 
 void JniCompilerTest::UpcallArgumentTypeChecking_StaticImpl() {
+  // This will lead to error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   SetUpForTest(true, "staticMethodThatShouldTakeClass", "(ILjava/lang/Class;)V",
                reinterpret_cast<void*>(&Java_MyClassNatives_staticMethodThatShouldTakeClass));
 
@@ -1475,6 +1481,9 @@
 JNI_TEST(MaxParamNumber)
 
 void JniCompilerTest::WithoutImplementationImpl() {
+  // This will lead to error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   SetUpForTest(false, "withoutImplementation", "()V", nullptr);
 
   env_->CallVoidMethod(jobj_, jmethod_);
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index b3ab370..d3d2055 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -92,7 +92,7 @@
                                             method_inliner_map_.get(),
                                             compiler_kind, insn_set,
                                             insn_features.get(), false, nullptr, nullptr, 2, true,
-                                            true, timer_.get(), -1, ""));
+                                            true, "", timer_.get(), -1, ""));
   jobject class_loader = nullptr;
   if (kCompile) {
     TimingLogger timings2("OatTest::WriteRead", false, false);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index f6ca6c7..9c2facb 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -604,7 +604,7 @@
   HInvoke* invoke = nullptr;
   if (optimized_invoke_type == kVirtual) {
     invoke = new (arena_) HInvokeVirtual(
-        arena_, number_of_arguments, return_type, dex_pc, table_index);
+        arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
   } else if (optimized_invoke_type == kInterface) {
     invoke = new (arena_) HInvokeInterface(
         arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 0c1ff9b..9e89070 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -54,6 +54,7 @@
                      + GetGraph()->GetTemporariesVRegSlots()
                      + 1 /* filler */,
                    0, /* the baseline compiler does not have live registers at slow path */
+                   0, /* the baseline compiler does not have live registers at slow path */
                    GetGraph()->GetMaximumNumberOfOutVRegs()
                      + 1 /* current method */);
   GenerateFrameEntry();
@@ -136,14 +137,16 @@
 }
 
 void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots,
-                                     size_t maximum_number_of_live_registers,
+                                     size_t maximum_number_of_live_core_registers,
+                                     size_t maximum_number_of_live_fp_registers,
                                      size_t number_of_out_slots) {
   first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
 
   SetFrameSize(RoundUp(
       number_of_spill_slots * kVRegSize
       + number_of_out_slots * kVRegSize
-      + maximum_number_of_live_registers * GetWordSize()
+      + maximum_number_of_live_core_registers * GetWordSize()
+      + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize()
       + FrameEntrySpillSize(),
       kStackAlignment));
 }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 8d28f3d..88e50b6 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -109,9 +109,11 @@
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
   virtual Assembler* GetAssembler() = 0;
   virtual size_t GetWordSize() const = 0;
+  virtual size_t GetFloatingPointSpillSlotSize() const = 0;
   virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
   void ComputeFrameSize(size_t number_of_spill_slots,
-                        size_t maximum_number_of_live_registers,
+                        size_t maximum_number_of_live_core_registers,
+                        size_t maximum_number_of_live_fp_registers,
                         size_t number_of_out_slots);
   virtual size_t FrameEntrySpillSize() const = 0;
   int32_t GetStackSlot(HLocal* local) const;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1862061..c4ba0fd 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1190,7 +1190,7 @@
       kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
   // temp = temp[index_in_cache]
   __ LoadFromOffset(
-      kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache()));
+      kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
   // LR = temp[offset_of_quick_compiled_code]
   __ LoadFromOffset(kLoadWord, LR, temp,
                      mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 8b29b15..267d9a2 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -169,6 +169,11 @@
     return kArmWordSize;
   }
 
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+    // Allocated in S registers, which are word sized.
+    return kArmWordSize;
+  }
+
   size_t FrameEntrySpillSize() const OVERRIDE;
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 7b19f44..6d2c3de 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1975,7 +1975,7 @@
   // Make sure that ArtMethod* is passed in W0 as per the calling convention
   DCHECK(temp.Is(w0));
   size_t index_in_cache = mirror::Array::DataOffset(kHeapRefSize).SizeValue() +
-    invoke->GetIndexInDexCache() * kHeapRefSize;
+    invoke->GetDexMethodIndex() * kHeapRefSize;
 
   // TODO: Implement all kinds of calls:
   // 1) boot -> boot
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index e4da07b..590bc1d 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -191,6 +191,11 @@
     return kArm64WordSize;
   }
 
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+    // Allocated in D registers, which are word sized.
+    return kArm64WordSize;
+  }
+
   uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
     vixl::Label* block_entry_label = GetLabelOf(block);
     DCHECK(block_entry_label->IsBound());
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 04e36cc..1a0df44 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1135,7 +1135,7 @@
   // temp = temp->dex_cache_resolved_methods_;
   __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
   // temp = temp[index_in_cache]
-  __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache())));
+  __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
   // (temp + offset_of_quick_compiled_code)()
   __ call(Address(
       temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index acde122..2d8adb2 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -166,6 +166,11 @@
     return kX86WordSize;
   }
 
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+    // 8 bytes == 2 words for each spill.
+    return 2 * kX86WordSize;
+  }
+
   size_t FrameEntrySpillSize() const OVERRIDE;
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5fc24f7..3d7f122 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -18,6 +18,8 @@
 
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
+#include "intrinsics.h"
+#include "intrinsics_x86_64.h"
 #include "mirror/array-inl.h"
 #include "mirror/art_method.h"
 #include "mirror/class.h"
@@ -61,20 +63,6 @@
 
 #define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
 
-class SlowPathCodeX86_64 : public SlowPathCode {
- public:
-  SlowPathCodeX86_64() : entry_label_(), exit_label_() {}
-
-  Label* GetEntryLabel() { return &entry_label_; }
-  Label* GetExitLabel() { return &exit_label_; }
-
- private:
-  Label entry_label_;
-  Label exit_label_;
-
-  DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64);
-};
-
 class NullCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
  public:
   explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {}
@@ -375,6 +363,31 @@
   return kEqual;
 }
 
+void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                     CpuRegister temp) {
+  // All registers are assumed to be correctly set up.
+
+  // TODO: Implement all kinds of calls:
+  // 1) boot -> boot
+  // 2) app -> boot
+  // 3) app -> app
+  //
+  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+  // temp = method;
+  LoadCurrentMethod(temp);
+  // temp = temp->dex_cache_resolved_methods_;
+  __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
+  // temp = temp[index_in_cache]
+  __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+  // (temp + offset_of_quick_compiled_code)()
+  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+      kX86_64WordSize).SizeValue()));
+
+  DCHECK(!IsLeafMethod());
+  RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << X86_64ManagedRegister::FromCpuRegister(Register(reg));
 }
@@ -1123,30 +1136,31 @@
 }
 
 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 }
 
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+  if (invoke->GetLocations()->Intrinsified()) {
+    IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
+    intrinsic.Dispatch(invoke);
+    return true;
+  }
+  return false;
+}
+
 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
-  CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
-  // TODO: Implement all kinds of calls:
-  // 1) boot -> boot
-  // 2) app -> boot
-  // 3) app -> app
-  //
-  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
 
-  // temp = method;
-  codegen_->LoadCurrentMethod(temp);
-  // temp = temp->dex_cache_resolved_methods_;
-  __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
-  // temp = temp[index_in_cache]
-  __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache())));
-  // (temp + offset_of_quick_compiled_code)()
-  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-      kX86_64WordSize).SizeValue()));
-
-  DCHECK(!codegen_->IsLeafMethod());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  codegen_->GenerateStaticOrDirectCall(
+      invoke,
+      invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>());
 }
 
 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
@@ -1182,10 +1196,19 @@
 }
 
 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena());
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
   HandleInvoke(invoke);
 }
 
 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
   CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() +
           invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 87f6b0f..c501568 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -36,6 +36,8 @@
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
 static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters);
 
+static constexpr bool kCoalescedImplicitNullCheck = false;
+
 class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> {
  public:
   InvokeDexCallingConvention() : CallingConvention(
@@ -67,7 +69,20 @@
 };
 
 class CodeGeneratorX86_64;
-class SlowPathCodeX86_64;
+
+class SlowPathCodeX86_64 : public SlowPathCode {
+ public:
+  SlowPathCodeX86_64() : entry_label_(), exit_label_() {}
+
+  Label* GetEntryLabel() { return &entry_label_; }
+  Label* GetExitLabel() { return &exit_label_; }
+
+ private:
+  Label entry_label_;
+  Label exit_label_;
+
+  DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64);
+};
 
 class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
  public:
@@ -169,6 +184,10 @@
     return kX86_64WordSize;
   }
 
+  size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+    return kX86_64WordSize;
+  }
+
   size_t FrameEntrySpillSize() const OVERRIDE;
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE {
@@ -222,6 +241,8 @@
     return false;
   }
 
+  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp);
+
  private:
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 493d93f..532167c 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -44,10 +44,10 @@
          instr_it.Advance()) {
       HInvokeStaticOrDirect* current = instr_it.Current()->AsInvokeStaticOrDirect();
       if (current != nullptr) {
-        if (!TryInline(current, current->GetIndexInDexCache(), current->GetInvokeType())) {
+        if (!TryInline(current, current->GetDexMethodIndex(), current->GetInvokeType())) {
           if (kIsDebugBuild) {
             std::string callee_name =
-                PrettyMethod(current->GetIndexInDexCache(), *outer_compilation_unit_.GetDexFile());
+                PrettyMethod(current->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile());
             bool should_inline = callee_name.find("$inline$") != std::string::npos;
             CHECK(!should_inline) << "Could not inline " << callee_name;
           }
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
new file mode 100644
index 0000000..fe0e7f2
--- /dev/null
+++ b/compiler/optimizing/intrinsics.cc
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics.h"
+
+#include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "driver/compiler_driver.h"
+#include "invoke_type.h"
+#include "nodes.h"
+#include "quick/inline_method_analyser.h"
+
+namespace art {
+
+// Function that returns whether an intrinsic is static/direct or virtual.
+static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) {
+  switch (i) {
+    case Intrinsics::kNone:
+      return kInterface;  // Non-sensical for intrinsic.
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+    case Intrinsics::k ## Name:               \
+      return IsStatic;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+  }
+  return kInterface;
+}
+
+
+
+static Primitive::Type GetType(uint64_t data, bool is_op_size) {
+  if (is_op_size) {
+    switch (static_cast<OpSize>(data)) {
+      case kSignedByte:
+        return Primitive::Type::kPrimByte;
+      case kSignedHalf:
+        return Primitive::Type::kPrimShort;
+      case k32:
+        return Primitive::Type::kPrimInt;
+      case k64:
+        return Primitive::Type::kPrimLong;
+      default:
+        LOG(FATAL) << "Unknown/unsupported op size " << data;
+        UNREACHABLE();
+    }
+  } else {
+    if ((data & kIntrinsicFlagIsLong) != 0) {
+      return Primitive::Type::kPrimLong;
+    }
+    if ((data & kIntrinsicFlagIsObject) != 0) {
+      return Primitive::Type::kPrimNot;
+    }
+    return Primitive::Type::kPrimInt;
+  }
+}
+
+static Intrinsics GetIntrinsic(InlineMethod method) {
+  switch (method.opcode) {
+    // Floating-point conversions.
+    case kIntrinsicDoubleCvt:
+      return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ?
+          Intrinsics::kDoubleDoubleToRawLongBits : Intrinsics::kDoubleLongBitsToDouble;
+    case kIntrinsicFloatCvt:
+      return ((method.d.data & kIntrinsicFlagToFloatingPoint) == 0) ?
+          Intrinsics::kFloatFloatToRawIntBits : Intrinsics::kFloatIntBitsToFloat;
+
+    // Bit manipulations.
+    case kIntrinsicReverseBits:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::Type::kPrimInt:
+          return Intrinsics::kIntegerReverse;
+        case Primitive::Type::kPrimLong:
+          return Intrinsics::kLongReverse;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+      break;
+    case kIntrinsicReverseBytes:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::Type::kPrimShort:
+          return Intrinsics::kShortReverseBytes;
+        case Primitive::Type::kPrimInt:
+          return Intrinsics::kIntegerReverseBytes;
+        case Primitive::Type::kPrimLong:
+          return Intrinsics::kLongReverseBytes;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+      break;
+
+    // Abs.
+    case kIntrinsicAbsDouble:
+      return Intrinsics::kMathAbsDouble;
+    case kIntrinsicAbsFloat:
+      return Intrinsics::kMathAbsFloat;
+    case kIntrinsicAbsInt:
+      return Intrinsics::kMathAbsInt;
+    case kIntrinsicAbsLong:
+      return Intrinsics::kMathAbsLong;
+
+    // Min/max.
+    case kIntrinsicMinMaxDouble:
+      return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+          Intrinsics::kMathMaxDoubleDouble : Intrinsics::kMathMinDoubleDouble;
+    case kIntrinsicMinMaxFloat:
+      return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+          Intrinsics::kMathMaxFloatFloat : Intrinsics::kMathMinFloatFloat;
+    case kIntrinsicMinMaxInt:
+      return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+          Intrinsics::kMathMaxIntInt : Intrinsics::kMathMinIntInt;
+    case kIntrinsicMinMaxLong:
+      return ((method.d.data & kIntrinsicFlagMin) == 0) ?
+          Intrinsics::kMathMaxLongLong : Intrinsics::kMathMinLongLong;
+
+    // Misc math.
+    case kIntrinsicSqrt:
+      return Intrinsics::kMathSqrt;
+    case kIntrinsicCeil:
+      return Intrinsics::kMathCeil;
+    case kIntrinsicFloor:
+      return Intrinsics::kMathFloor;
+    case kIntrinsicRint:
+      return Intrinsics::kMathRint;
+    case kIntrinsicRoundDouble:
+      return Intrinsics::kMathRoundDouble;
+    case kIntrinsicRoundFloat:
+      return Intrinsics::kMathRoundFloat;
+
+    // System.arraycopy.
+    case kIntrinsicSystemArrayCopyCharArray:
+      return Intrinsics::kSystemArrayCopyChar;
+
+    // Thread.currentThread.
+    case kIntrinsicCurrentThread:
+      return  Intrinsics::kThreadCurrentThread;
+
+    // Memory.peek.
+    case kIntrinsicPeek:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::Type::kPrimByte:
+          return Intrinsics::kMemoryPeekByte;
+        case Primitive::Type::kPrimShort:
+          return Intrinsics::kMemoryPeekShortNative;
+        case Primitive::Type::kPrimInt:
+          return Intrinsics::kMemoryPeekIntNative;
+        case Primitive::Type::kPrimLong:
+          return Intrinsics::kMemoryPeekLongNative;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+      break;
+
+    // Memory.poke.
+    case kIntrinsicPoke:
+      switch (GetType(method.d.data, true)) {
+        case Primitive::Type::kPrimByte:
+          return Intrinsics::kMemoryPokeByte;
+        case Primitive::Type::kPrimShort:
+          return Intrinsics::kMemoryPokeShortNative;
+        case Primitive::Type::kPrimInt:
+          return Intrinsics::kMemoryPokeIntNative;
+        case Primitive::Type::kPrimLong:
+          return Intrinsics::kMemoryPokeLongNative;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+      break;
+
+    // String.
+    case kIntrinsicCharAt:
+      return Intrinsics::kStringCharAt;
+    case kIntrinsicCompareTo:
+      return Intrinsics::kStringCompareTo;
+    case kIntrinsicIsEmptyOrLength:
+      return ((method.d.data & kIntrinsicFlagIsEmpty) == 0) ?
+          Intrinsics::kStringLength : Intrinsics::kStringIsEmpty;
+    case kIntrinsicIndexOf:
+      return ((method.d.data & kIntrinsicFlagBase0) == 0) ?
+          Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf;
+
+    case kIntrinsicCas:
+      switch (GetType(method.d.data, false)) {
+        case Primitive::Type::kPrimNot:
+          return Intrinsics::kUnsafeCASObject;
+        case Primitive::Type::kPrimInt:
+          return Intrinsics::kUnsafeCASInt;
+        case Primitive::Type::kPrimLong:
+          return Intrinsics::kUnsafeCASLong;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+      break;
+    case kIntrinsicUnsafeGet: {
+      const bool is_volatile = (method.d.data & kIntrinsicFlagIsVolatile);
+      switch (GetType(method.d.data, false)) {
+        case Primitive::Type::kPrimInt:
+          return is_volatile ? Intrinsics::kUnsafeGetVolatile : Intrinsics::kUnsafeGet;
+        case Primitive::Type::kPrimLong:
+          return is_volatile ? Intrinsics::kUnsafeGetLongVolatile : Intrinsics::kUnsafeGetLong;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+      break;
+    }
+    case kIntrinsicUnsafePut: {
+      enum Sync { kNoSync, kVolatile, kOrdered };
+      const Sync sync =
+          ((method.d.data & kIntrinsicFlagIsVolatile) != 0) ? kVolatile :
+          ((method.d.data & kIntrinsicFlagIsOrdered) != 0)  ? kOrdered :
+                                                              kNoSync;
+      switch (GetType(method.d.data, false)) {
+        case Primitive::Type::kPrimInt:
+          switch (sync) {
+            case kNoSync:
+              return Intrinsics::kUnsafePut;
+            case kVolatile:
+              return Intrinsics::kUnsafePutVolatile;
+            case kOrdered:
+              return Intrinsics::kUnsafePutOrdered;
+          }
+          break;
+        case Primitive::Type::kPrimLong:
+          switch (sync) {
+            case kNoSync:
+              return Intrinsics::kUnsafePutLong;
+            case kVolatile:
+              return Intrinsics::kUnsafePutLongVolatile;
+            case kOrdered:
+              return Intrinsics::kUnsafePutLongOrdered;
+          }
+          break;
+        case Primitive::Type::kPrimNot:
+          switch (sync) {
+            case kNoSync:
+              return Intrinsics::kUnsafePutObject;
+            case kVolatile:
+              return Intrinsics::kUnsafePutObjectVolatile;
+            case kOrdered:
+              return Intrinsics::kUnsafePutObjectOrdered;
+          }
+          break;
+        default:
+          LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+          UNREACHABLE();
+      }
+      break;
+    }
+
+    // Virtual cases.
+
+    case kIntrinsicReferenceGetReferent:
+      return Intrinsics::kReferenceGetReferent;
+
+    // Quick inliner cases. Remove after refactoring. They are here so that we can use the
+    // compiler to warn on missing cases.
+
+    case kInlineOpNop:
+    case kInlineOpReturnArg:
+    case kInlineOpNonWideConst:
+    case kInlineOpIGet:
+    case kInlineOpIPut:
+      return Intrinsics::kNone;
+
+    // No default case to make the compiler warn on missing cases.
+  }
+  return Intrinsics::kNone;
+}
+
+static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) {
+  // The DexFileMethodInliner should have checked whether the methods are agreeing with
+  // what we expect, i.e., static methods are called as such. Add another check here for
+  // our expectations:
+  // Whenever the intrinsic is marked as static-or-direct, report an error if we find an
+  // InvokeVirtual. The other direction is not possible: we have intrinsics for virtual
+  // functions that will perform a check inline. If the precise type is known, however,
+  // the instruction will be sharpened to an InvokeStaticOrDirect.
+  InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic);
+  InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ?
+      invoke->AsInvokeStaticOrDirect()->GetInvokeType() :
+      invoke->IsInvokeVirtual() ? kVirtual : kSuper;
+  switch (intrinsic_type) {
+    case kStatic:
+      return (invoke_type == kStatic);
+    case kDirect:
+      return (invoke_type == kDirect);
+    case kVirtual:
+      // Call might be devirtualized.
+      return (invoke_type == kVirtual || invoke_type == kDirect);
+
+    default:
+      return false;
+  }
+}
+
+// TODO: Refactor DexFileMethodInliner and have something nicer than InlineMethod.
+void IntrinsicsRecognizer::Run() {
+  DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(dex_file_);
+  DCHECK(inliner != nullptr);
+
+  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
+         inst_it.Advance()) {
+      HInstruction* inst = inst_it.Current();
+      if (inst->IsInvoke()) {
+        HInvoke* invoke = inst->AsInvoke();
+        InlineMethod method;
+        if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) {
+          Intrinsics intrinsic = GetIntrinsic(method);
+
+          if (intrinsic != Intrinsics::kNone) {
+            if (!CheckInvokeType(intrinsic, invoke)) {
+              LOG(WARNING) << "Found an intrinsic with unexpected invoke type: "
+                           << intrinsic << " for "
+                           << PrettyMethod(invoke->GetDexMethodIndex(), *dex_file_);
+            } else {
+              invoke->SetIntrinsic(intrinsic);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) {
+  switch (intrinsic) {
+    case Intrinsics::kNone:
+      os << "No intrinsic.";
+      break;
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+    case Intrinsics::k ## Name: \
+      os << # Name; \
+      break;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef STATIC_INTRINSICS_LIST
+#undef VIRTUAL_INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+  }
+  return os;
+}
+
+}  // namespace art
+
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
new file mode 100644
index 0000000..29cc8ef
--- /dev/null
+++ b/compiler/optimizing/intrinsics.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+class CompilerDriver;
+class DexFile;
+
+// Recognize intrinsics from HInvoke nodes.
+class IntrinsicsRecognizer : public HOptimization {
+ public:
+  IntrinsicsRecognizer(HGraph* graph, const DexFile* dex_file, CompilerDriver* driver)
+      : HOptimization(graph, true, "intrinsics_recognition"),
+        dex_file_(dex_file), driver_(driver) {}
+
+  void Run() OVERRIDE;
+
+ private:
+  const DexFile* dex_file_;
+  CompilerDriver* driver_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicsRecognizer);
+};
+
+class IntrinsicVisitor : public ValueObject {
+ public:
+  virtual ~IntrinsicVisitor() {}
+
+  // Dispatch logic.
+
+  void Dispatch(HInvoke* invoke) {
+    switch (invoke->GetIntrinsic()) {
+      case Intrinsics::kNone:
+        return;
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+      case Intrinsics::k ## Name:             \
+        Visit ## Name(invoke);                \
+        return;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+      // Do not put a default case. That way the compiler will complain if we missed a case.
+    }
+  }
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic)                    \
+  virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+  }
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ protected:
+  IntrinsicVisitor() {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicVisitor);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_H_
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
new file mode 100644
index 0000000..29ca20c
--- /dev/null
+++ b/compiler/optimizing/intrinsics_list.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
+
+// All intrinsics supported by the optimizing compiler. Format is name, then whether it is expected
+// to be a HInvokeStaticOrDirect node (compared to HInvokeVirtual).
+
+#define INTRINSICS_LIST(V) \
+  V(DoubleDoubleToRawLongBits, kStatic) \
+  V(DoubleLongBitsToDouble, kStatic) \
+  V(FloatFloatToRawIntBits, kStatic) \
+  V(FloatIntBitsToFloat, kStatic) \
+  V(IntegerReverse, kStatic) \
+  V(IntegerReverseBytes, kStatic) \
+  V(LongReverse, kStatic) \
+  V(LongReverseBytes, kStatic) \
+  V(ShortReverseBytes, kStatic) \
+  V(MathAbsDouble, kStatic) \
+  V(MathAbsFloat, kStatic) \
+  V(MathAbsLong, kStatic) \
+  V(MathAbsInt, kStatic) \
+  V(MathMinDoubleDouble, kStatic) \
+  V(MathMinFloatFloat, kStatic) \
+  V(MathMinLongLong, kStatic) \
+  V(MathMinIntInt, kStatic) \
+  V(MathMaxDoubleDouble, kStatic) \
+  V(MathMaxFloatFloat, kStatic) \
+  V(MathMaxLongLong, kStatic) \
+  V(MathMaxIntInt, kStatic) \
+  V(MathSqrt, kStatic) \
+  V(MathCeil, kStatic) \
+  V(MathFloor, kStatic) \
+  V(MathRint, kStatic) \
+  V(MathRoundDouble, kStatic) \
+  V(MathRoundFloat, kStatic) \
+  V(SystemArrayCopyChar, kStatic) \
+  V(ThreadCurrentThread, kStatic) \
+  V(MemoryPeekByte, kStatic) \
+  V(MemoryPeekIntNative, kStatic) \
+  V(MemoryPeekLongNative, kStatic) \
+  V(MemoryPeekShortNative, kStatic) \
+  V(MemoryPokeByte, kStatic) \
+  V(MemoryPokeIntNative, kStatic) \
+  V(MemoryPokeLongNative, kStatic) \
+  V(MemoryPokeShortNative, kStatic) \
+  V(StringCharAt, kDirect) \
+  V(StringCompareTo, kDirect) \
+  V(StringIsEmpty, kDirect) \
+  V(StringIndexOf, kDirect) \
+  V(StringIndexOfAfter, kDirect) \
+  V(StringLength, kDirect) \
+  V(UnsafeCASInt, kDirect) \
+  V(UnsafeCASLong, kDirect) \
+  V(UnsafeCASObject, kDirect) \
+  V(UnsafeGet, kDirect) \
+  V(UnsafeGetVolatile, kDirect) \
+  V(UnsafeGetLong, kDirect) \
+  V(UnsafeGetLongVolatile, kDirect) \
+  V(UnsafePut, kDirect) \
+  V(UnsafePutOrdered, kDirect) \
+  V(UnsafePutVolatile, kDirect) \
+  V(UnsafePutObject, kDirect) \
+  V(UnsafePutObjectOrdered, kDirect) \
+  V(UnsafePutObjectVolatile, kDirect) \
+  V(UnsafePutLong, kDirect) \
+  V(UnsafePutLongOrdered, kDirect) \
+  V(UnsafePutLongVolatile, kDirect) \
+  \
+  V(ReferenceGetReferent, kVirtual)
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_
+#undef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_   // #define is only for lint.
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
new file mode 100644
index 0000000..c1f4c94
--- /dev/null
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -0,0 +1,984 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_x86_64.h"
+
+#include "code_generator_x86_64.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "intrinsics.h"
+#include "mirror/array-inl.h"
+#include "mirror/art_method.h"
+#include "mirror/string.h"
+#include "thread.h"
+#include "utils/x86_64/assembler_x86_64.h"
+#include "utils/x86_64/constants_x86_64.h"
+
+namespace art {
+
+namespace x86_64 {
+
+static constexpr bool kIntrinsified = true;
+
+X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
+  return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+}
+
+ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetArena() {
+  return codegen_->GetGraph()->GetArena();
+}
+
+bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
+  Dispatch(invoke);
+  const LocationSummary* res = invoke->GetLocations();
+  return res != nullptr && res->Intrinsified();
+}
+
+#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
+
+// TODO: trg as memory.
+static void MoveFromReturnRegister(Location trg,
+                                   Primitive::Type type,
+                                   CodeGeneratorX86_64* codegen) {
+  if (!trg.IsValid()) {
+    DCHECK(type == Primitive::kPrimVoid);
+    return;
+  }
+
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
+      if (trg_reg.AsRegister() != RAX) {
+        __ movl(trg_reg, CpuRegister(RAX));
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
+      if (trg_reg.AsRegister() != RAX) {
+        __ movq(trg_reg, CpuRegister(RAX));
+      }
+      break;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected void type for valid location " << trg;
+      UNREACHABLE();
+
+    case Primitive::kPrimDouble: {
+      XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
+      if (trg_reg.AsFloatRegister() != XMM0) {
+        __ movsd(trg_reg, XmmRegister(XMM0));
+      }
+      break;
+    }
+    case Primitive::kPrimFloat: {
+      XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
+      if (trg_reg.AsFloatRegister() != XMM0) {
+        __ movss(trg_reg, XmmRegister(XMM0));
+      }
+      break;
+    }
+  }
+}
+
+static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
+  if (invoke->InputCount() == 0) {
+    return;
+  }
+
+  LocationSummary* locations = invoke->GetLocations();
+  InvokeDexCallingConventionVisitor calling_convention_visitor;
+
+  // We're moving potentially two or more locations to locations that could overlap, so we need
+  // a parallel move resolver.
+  HParallelMove parallel_move(arena);
+
+  for (size_t i = 0; i < invoke->InputCount(); i++) {
+    HInstruction* input = invoke->InputAt(i);
+    Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
+    Location actual_loc = locations->InAt(i);
+
+    parallel_move.AddMove(new (arena) MoveOperands(actual_loc, cc_loc, nullptr));
+  }
+
+  codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+}
+
+// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
+// call. This will copy the arguments into the positions for a regular call.
+//
+// Note: The actual parameters are required to be in the locations given by the invoke's location
+//       summary. If an intrinsic modifies those locations before a slowpath call, they must be
+//       restored!
+class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
+ public:
+  explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
+
+  void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+    CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
+    __ Bind(GetEntryLabel());
+
+    codegen->SaveLiveRegisters(invoke_->GetLocations());
+
+    MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+
+    if (invoke_->IsInvokeStaticOrDirect()) {
+      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
+    } else {
+      UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
+      UNREACHABLE();
+    }
+
+    // Copy the result back to the expected output.
+    Location out = invoke_->GetLocations()->Out();
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
+      DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      MoveFromReturnRegister(out, invoke_->GetType(), codegen);
+    }
+
+    codegen->RestoreLiveRegisters(invoke_->GetLocations());
+    __ jmp(GetExitLabel());
+  }
+
+ private:
+  // The instruction where this slow path is happening.
+  HInvoke* const invoke_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
+};
+
+#undef __
+#define __ assembler->
+
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+  Location input = locations->InAt(0);
+  Location output = locations->Out();
+  __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+  Location input = locations->InAt(0);
+  Location output = locations->Out();
+  __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+static void GenReverseBytes(LocationSummary* locations,
+                            Primitive::Type size,
+                            X86_64Assembler* assembler) {
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  switch (size) {
+    case Primitive::kPrimShort:
+      // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
+      __ bswapl(out);
+      __ sarl(out, Immediate(16));
+      break;
+    case Primitive::kPrimInt:
+      __ bswapl(out);
+      break;
+    case Primitive::kPrimLong:
+      __ bswapq(out);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
+      UNREACHABLE();
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
+  GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+
+// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
+//       need is 64b.
+
+static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
+  // TODO: Enable memory operations when the assembler supports them.
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  // TODO: Allow x86 to work with memory. This requires assembler support, see below.
+  // locations->SetInAt(0, Location::Any());               // X86 can work on memory directly.
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());     // Immediate constant.
+  locations->AddTemp(Location::RequiresFpuRegister());  // FP version of above.
+}
+
+static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+  Location output = locations->Out();
+  CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+  if (output.IsFpuRegister()) {
+    // In-register
+    XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+
+    if (is64bit) {
+      __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
+      __ movd(xmm_temp, cpu_temp);
+      __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
+    } else {
+      __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
+      __ movd(xmm_temp, cpu_temp);
+      __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
+    }
+  } else {
+    // TODO: update when assember support is available.
+    UNIMPLEMENTED(FATAL) << "Needs assembler support.";
+//  Once assembler support is available, in-memory operations look like this:
+//    if (is64bit) {
+//      DCHECK(output.IsDoubleStackSlot());
+//      // No 64b and with literal.
+//      __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
+//      __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
+//    } else {
+//      DCHECK(output.IsStackSlot());
+//      // Can use and with a literal directly.
+//      __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
+//    }
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
+  CreateFloatToFloatPlusTemps(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
+  CreateFloatToFloatPlusTemps(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
+  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+  Location output = locations->Out();
+  CpuRegister out = output.AsRegister<CpuRegister>();
+  CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+  if (is64bit) {
+    // Create mask.
+    __ movq(mask, out);
+    __ sarq(mask, Immediate(63));
+    // Add mask.
+    __ addq(out, mask);
+    __ xorq(out, mask);
+  } else {
+    // Create mask.
+    __ movl(mask, out);
+    __ sarl(mask, Immediate(31));
+    // Add mask.
+    __ addl(out, mask);
+    __ xorl(out, mask);
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
+  CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
+  CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+}
+
+static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
+                        X86_64Assembler* assembler) {
+  Location op1_loc = locations->InAt(0);
+  Location op2_loc = locations->InAt(1);
+  Location out_loc = locations->Out();
+  XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
+
+  // Shortcut for same input locations.
+  if (op1_loc.Equals(op2_loc)) {
+    DCHECK(out_loc.Equals(op1_loc));
+    return;
+  }
+
+  //  (out := op1)
+  //  out <=? op2
+  //  if Nan jmp Nan_label
+  //  if out is min jmp done
+  //  if op2 is min jmp op2_label
+  //  handle -0/+0
+  //  jmp done
+  // Nan_label:
+  //  out := NaN
+  // op2_label:
+  //  out := op2
+  // done:
+  //
+  // This removes one jmp, but needs to copy one input (op1) to out.
+  //
+  // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+
+  XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
+
+  Label nan, done, op2_label;
+  if (is_double) {
+    __ ucomisd(out, op2);
+  } else {
+    __ ucomiss(out, op2);
+  }
+
+  __ j(Condition::kParityEven, &nan);
+
+  __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
+  __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
+
+  // Handle 0.0/-0.0.
+  if (is_min) {
+    if (is_double) {
+      __ orpd(out, op2);
+    } else {
+      __ orps(out, op2);
+    }
+  } else {
+    if (is_double) {
+      __ andpd(out, op2);
+    } else {
+      __ andps(out, op2);
+    }
+  }
+  __ jmp(&done);
+
+  // NaN handling.
+  __ Bind(&nan);
+  CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+  // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
+  if (is_double) {
+    __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
+  } else {
+    __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
+  }
+  __ movd(out, cpu_temp, is_double);
+  __ jmp(&done);
+
+  // out := op2;
+  __ Bind(&op2_label);
+  if (is_double) {
+    __ movsd(out, op2);
+  } else {
+    __ movss(out, op2);
+  }
+
+  // Done.
+  __ Bind(&done);
+}
+
+static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresFpuRegister());
+  // The following is sub-optimal, but all we can do for now. It would be fine to also accept
+  // the second input to be the output (we can simply swap inputs).
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());     // Immediate constant.
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
+  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
+  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
+  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
+  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+}
+
+static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
+                      X86_64Assembler* assembler) {
+  Location op1_loc = locations->InAt(0);
+  Location op2_loc = locations->InAt(1);
+
+  // Shortcut for same input locations.
+  if (op1_loc.Equals(op2_loc)) {
+    // Can return immediately, as op1_loc == out_loc.
+    // Note: if we ever support separate registers, e.g., output into memory, we need to check for
+    //       a copy here.
+    DCHECK(locations->Out().Equals(op1_loc));
+    return;
+  }
+
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
+
+  //  (out := op1)
+  //  out <=? op2
+  //  if out is min jmp done
+  //  out := op2
+  // done:
+
+  if (is_long) {
+    __ cmpq(out, op2);
+  } else {
+    __ cmpl(out, op2);
+  }
+
+  __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
+  GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
+}
+
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+
+  GetAssembler()->sqrtsd(out, in);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
+  // The inputs plus one temp.
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnSlowPath,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Location of reference to data array
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+  // Location of count
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  // Starting offset within data array
+  const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
+  // Start of char data with array_
+  const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
+
+  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  Location temp_loc = locations->GetTemp(0);
+  CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+
+  // Note: Nullcheck has been done before in a HNullCheck before the HInvokeVirtual. If/when we
+  //       move to (coalesced) implicit checks, we have to do a null check below.
+  DCHECK(!kCoalescedImplicitNullCheck);
+
+  // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
+  //       the cost.
+  // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
+  //       we will not optimize the code for constants (which would save a register).
+
+  SlowPathCodeX86_64* slow_path = new (GetArena()) IntrinsicSlowPathX86_64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  X86_64Assembler* assembler = GetAssembler();
+
+  __ cmpl(idx, Address(obj, count_offset));
+  __ j(kAboveEqual, slow_path->GetEntryLabel());
+
+  // Get the actual element.
+  __ movl(temp, idx);                          // temp := idx.
+  __ addl(temp, Address(obj, offset_offset));  // temp := offset + idx.
+  __ movl(out, Address(obj, value_offset));    // obj := obj.array.
+  // out = out[2*temp].
+  __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
+static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
+  CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();  // == address, here for clarity.
+  // x86 allows unaligned access. We do not have to check the input or use specific instructions
+  // to avoid a SIGBUS.
+  switch (size) {
+    case Primitive::kPrimByte:
+      __ movsxb(out, Address(address, 0));
+      break;
+    case Primitive::kPrimShort:
+      __ movsxw(out, Address(address, 0));
+      break;
+    case Primitive::kPrimInt:
+      __ movl(out, Address(address, 0));
+      break;
+    case Primitive::kPrimLong:
+      __ movq(out, Address(address, 0));
+      break;
+    default:
+      LOG(FATAL) << "Type not recognized for peek: " << size;
+      UNREACHABLE();
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
+  GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
+  CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
+  // x86 allows unaligned access. We do not have to check the input or use specific instructions
+  // to avoid a SIGBUS.
+  switch (size) {
+    case Primitive::kPrimByte:
+      __ movb(Address(address, 0), value);
+      break;
+    case Primitive::kPrimShort:
+      __ movw(Address(address, 0), value);
+      break;
+    case Primitive::kPrimInt:
+      __ movl(Address(address, 0), value);
+      break;
+    case Primitive::kPrimLong:
+      __ movq(Address(address, 0), value);
+      break;
+    default:
+      LOG(FATAL) << "Type not recognized for poke: " << size;
+      UNREACHABLE();
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
+  GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
+  CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
+  GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
+}
+
+static void GenUnsafeGet(LocationSummary* locations, bool is_long,
+                         bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
+  CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
+  CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
+
+  if (is_long) {
+    __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+  } else {
+    // TODO: Distinguish object. In case we move to an actual compressed heap, retrieving an object
+    //       pointer will entail an unpack operation.
+    __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+  }
+}
+
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
+  GenUnsafeGet(invoke->GetLocations(), false, false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke->GetLocations(), false, true, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
+  GenUnsafeGet(invoke->GetLocations(), true, false, GetAssembler());
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke->GetLocations(), true, true, GetAssembler());
+}
+
+static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
+                                                       Primitive::Type type,
+                                                       HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  if (type == Primitive::kPrimNot) {
+    // Need temp registers for card-marking.
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
+}
+void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
+}
+
+// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
+// memory model.
+static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
+                         CodeGeneratorX86_64* codegen) {
+  X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
+  CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
+  CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
+
+  if (type == Primitive::kPrimLong) {
+    __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
+  } else {
+    __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
+  }
+
+  if (is_volatile) {
+    __ mfence();
+  }
+
+  if (type == Primitive::kPrimNot) {
+    codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
+                        locations->GetTemp(1).AsRegister<CpuRegister>(),
+                        base,
+                        value);
+  }
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
+}
+void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
+}
+
+// Unimplemented intrinsics.
+
+#define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
+void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+}                                                                                       \
+void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
+}
+
+UNIMPLEMENTED_INTRINSIC(IntegerReverse)
+UNIMPLEMENTED_INTRINSIC(LongReverse)
+UNIMPLEMENTED_INTRINSIC(MathFloor)
+UNIMPLEMENTED_INTRINSIC(MathCeil)
+UNIMPLEMENTED_INTRINSIC(MathRint)
+UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
+UNIMPLEMENTED_INTRINSIC(StringIsEmpty)  // Might not want to do these two anyways, inlining should
+UNIMPLEMENTED_INTRINSIC(StringLength)   // be good enough here.
+UNIMPLEMENTED_INTRINSIC(StringCompareTo)
+UNIMPLEMENTED_INTRINSIC(StringIndexOf)
+UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
+UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
+UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+
+}  // namespace x86_64
+}  // namespace art
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
new file mode 100644
index 0000000..c1fa99c
--- /dev/null
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
+
+#include "intrinsics.h"
+
+namespace art {
+
+class ArenaAllocator;
+class HInvokeStaticOrDirect;
+class HInvokeVirtual;
+
+namespace x86_64 {
+
+class CodeGeneratorX86_64;
+class X86_64Assembler;
+
+class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicLocationsBuilderX86_64(ArenaAllocator* arena) : arena_(arena) {}
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic)   \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+  // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+  // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+  // the invoke.
+  bool TryDispatch(HInvoke* invoke);
+
+ private:
+  ArenaAllocator* arena_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64);
+};
+
+class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicCodeGeneratorX86_64(CodeGeneratorX86_64* codegen) : codegen_(codegen) {}
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic)   \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+  X86_64Assembler* GetAssembler();
+
+  ArenaAllocator* GetArena();
+
+  CodeGeneratorX86_64* codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorX86_64);
+};
+
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index ed5e260..9f2f9ec 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -20,7 +20,9 @@
 
 namespace art {
 
-LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind)
+LocationSummary::LocationSummary(HInstruction* instruction,
+                                 CallKind call_kind,
+                                 bool intrinsified)
     : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
       temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0),
       environment_(instruction->GetBlock()->GetGraph()->GetArena(),
@@ -29,7 +31,8 @@
       call_kind_(call_kind),
       stack_mask_(nullptr),
       register_mask_(0),
-      live_registers_() {
+      live_registers_(),
+      intrinsified_(intrinsified) {
   inputs_.SetSize(instruction->InputCount());
   for (size_t i = 0; i < instruction->InputCount(); ++i) {
     inputs_.Put(i, Location());
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 7df99d4..d41b3ae 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -463,7 +463,9 @@
     kCall
   };
 
-  LocationSummary(HInstruction* instruction, CallKind call_kind = kNoCall);
+  LocationSummary(HInstruction* instruction,
+                  CallKind call_kind = kNoCall,
+                  bool intrinsified = false);
 
   void SetInAt(uint32_t at, Location location) {
     DCHECK(inputs_.Get(at).IsUnallocated() || inputs_.Get(at).IsInvalid());
@@ -574,6 +576,10 @@
     return output_overlaps_;
   }
 
+  bool Intrinsified() const {
+    return intrinsified_;
+  }
+
  private:
   GrowableArray<Location> inputs_;
   GrowableArray<Location> temps_;
@@ -593,6 +599,9 @@
   // Registers that are in use at this position.
   RegisterSet live_registers_;
 
+  // Whether these are locations for an intrinsified call.
+  const bool intrinsified_;
+
   ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint);
   ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint);
   DISALLOW_COPY_AND_ASSIGN(LocationSummary);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index b98bc70..4838e1a 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1580,19 +1580,18 @@
   DISALLOW_COPY_AND_ASSIGN(HLongConstant);
 };
 
+enum class Intrinsics {
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) k ## Name,
+#include "intrinsics_list.h"
+  kNone,
+  INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+};
+std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic);
+
 class HInvoke : public HInstruction {
  public:
-  HInvoke(ArenaAllocator* arena,
-          uint32_t number_of_arguments,
-          Primitive::Type return_type,
-          uint32_t dex_pc)
-    : HInstruction(SideEffects::All()),
-      inputs_(arena, number_of_arguments),
-      return_type_(return_type),
-      dex_pc_(dex_pc) {
-    inputs_.SetSize(number_of_arguments);
-  }
-
   virtual size_t InputCount() const { return inputs_.Size(); }
   virtual HInstruction* InputAt(size_t i) const { return inputs_.Get(i); }
 
@@ -1612,12 +1611,38 @@
 
   uint32_t GetDexPc() const { return dex_pc_; }
 
+  uint32_t GetDexMethodIndex() const { return dex_method_index_; }
+
+  Intrinsics GetIntrinsic() {
+    return intrinsic_;
+  }
+
+  void SetIntrinsic(Intrinsics intrinsic) {
+    intrinsic_ = intrinsic;
+  }
+
   DECLARE_INSTRUCTION(Invoke);
 
  protected:
+  HInvoke(ArenaAllocator* arena,
+          uint32_t number_of_arguments,
+          Primitive::Type return_type,
+          uint32_t dex_pc,
+          uint32_t dex_method_index)
+    : HInstruction(SideEffects::All()),
+      inputs_(arena, number_of_arguments),
+      return_type_(return_type),
+      dex_pc_(dex_pc),
+      dex_method_index_(dex_method_index),
+      intrinsic_(Intrinsics::kNone) {
+    inputs_.SetSize(number_of_arguments);
+  }
+
   GrowableArray<HInstruction*> inputs_;
   const Primitive::Type return_type_;
   const uint32_t dex_pc_;
+  const uint32_t dex_method_index_;
+  Intrinsics intrinsic_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HInvoke);
@@ -1629,19 +1654,16 @@
                         uint32_t number_of_arguments,
                         Primitive::Type return_type,
                         uint32_t dex_pc,
-                        uint32_t index_in_dex_cache,
+                        uint32_t dex_method_index,
                         InvokeType invoke_type)
-      : HInvoke(arena, number_of_arguments, return_type, dex_pc),
-        index_in_dex_cache_(index_in_dex_cache),
+      : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
         invoke_type_(invoke_type) {}
 
-  uint32_t GetIndexInDexCache() const { return index_in_dex_cache_; }
   InvokeType GetInvokeType() const { return invoke_type_; }
 
   DECLARE_INSTRUCTION(InvokeStaticOrDirect);
 
  private:
-  const uint32_t index_in_dex_cache_;
   const InvokeType invoke_type_;
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
@@ -1653,8 +1675,9 @@
                  uint32_t number_of_arguments,
                  Primitive::Type return_type,
                  uint32_t dex_pc,
+                 uint32_t dex_method_index,
                  uint32_t vtable_index)
-      : HInvoke(arena, number_of_arguments, return_type, dex_pc),
+      : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
         vtable_index_(vtable_index) {}
 
   uint32_t GetVTableIndex() const { return vtable_index_; }
@@ -1675,8 +1698,7 @@
                    uint32_t dex_pc,
                    uint32_t dex_method_index,
                    uint32_t imt_index)
-      : HInvoke(arena, number_of_arguments, return_type, dex_pc),
-        dex_method_index_(dex_method_index),
+      : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
         imt_index_(imt_index) {}
 
   uint32_t GetImtIndex() const { return imt_index_; }
@@ -1685,7 +1707,6 @@
   DECLARE_INSTRUCTION(InvokeInterface);
 
  private:
-  const uint32_t dex_method_index_;
   const uint32_t imt_index_;
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeInterface);
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 692d452..6056373 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -25,6 +25,7 @@
 #include "compiler.h"
 #include "constant_folding.h"
 #include "dead_code_elimination.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "elf_writer_quick.h"
@@ -32,6 +33,7 @@
 #include "gvn.h"
 #include "inliner.h"
 #include "instruction_simplifier.h"
+#include "intrinsics.h"
 #include "jni/quick/jni_compiler.h"
 #include "mirror/art_method-inl.h"
 #include "nodes.h"
@@ -149,11 +151,12 @@
   // Enable C1visualizer output. Must be done in Init() because the compiler
   // driver is not fully initialized when passed to the compiler's constructor.
   CompilerDriver* driver = GetCompilerDriver();
-  if (driver->GetDumpPasses()) {
+  const std::string cfg_file_name = driver->GetDumpCfgFileName();
+  if (!cfg_file_name.empty()) {
     CHECK_EQ(driver->GetThreadCount(), 1U)
       << "Graph visualizer requires the compiler to run single-threaded. "
       << "Invoke the compiler with '-j1'.";
-    visualizer_output_.reset(new std::ofstream("art.cfg"));
+    visualizer_output_.reset(new std::ofstream(cfg_file_name));
   }
 }
 
@@ -214,9 +217,12 @@
   BoundsCheckElimination bce(graph);
   InstructionSimplifier simplify2(graph);
 
+  IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver);
+
   HOptimization* optimizations[] = {
     &redundant_phi,
     &dead_phi,
+    &intrinsics,
     &dce,
     &fold,
     &simplify1,
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 1efc52b..1d155f9 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -56,7 +56,8 @@
         blocked_core_registers_(codegen->GetBlockedCoreRegisters()),
         blocked_fp_registers_(codegen->GetBlockedFloatingPointRegisters()),
         reserved_out_slots_(0),
-        maximum_number_of_live_registers_(0) {
+        maximum_number_of_live_core_registers_(0),
+        maximum_number_of_live_fp_registers_(0) {
   codegen->SetupBlockedRegisters();
   physical_core_register_intervals_.SetSize(codegen->GetNumberOfCoreRegisters());
   physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters());
@@ -185,9 +186,6 @@
   }
   LinearScan();
 
-  size_t saved_maximum_number_of_live_registers = maximum_number_of_live_registers_;
-  maximum_number_of_live_registers_ = 0;
-
   inactive_.Reset();
   active_.Reset();
   handled_.Reset();
@@ -207,7 +205,6 @@
     }
   }
   LinearScan();
-  maximum_number_of_live_registers_ += saved_maximum_number_of_live_registers;
 }
 
 void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
@@ -602,8 +599,13 @@
     if (current->IsSlowPathSafepoint()) {
       // Synthesized interval to record the maximum number of live registers
       // at safepoints. No need to allocate a register for it.
-      maximum_number_of_live_registers_ =
-          std::max(maximum_number_of_live_registers_, active_.Size());
+      if (processing_core_registers_) {
+        maximum_number_of_live_core_registers_ =
+          std::max(maximum_number_of_live_core_registers_, active_.Size());
+      } else {
+        maximum_number_of_live_fp_registers_ =
+          std::max(maximum_number_of_live_fp_registers_, active_.Size());
+      }
       DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() > current->GetStart());
       continue;
     }
@@ -1215,10 +1217,17 @@
         locations->SetEnvironmentAt(use->GetInputIndex(), source);
       } else {
         Location expected_location = locations->InAt(use->GetInputIndex());
-        if (expected_location.IsUnallocated()) {
-          locations->SetInAt(use->GetInputIndex(), source);
-        } else if (!expected_location.IsConstant()) {
-          AddInputMoveFor(use->GetUser(), source, expected_location);
+        // The expected (actual) location may be invalid in case the input is unused. Currently
+        // this only happens for intrinsics.
+        if (expected_location.IsValid()) {
+          if (expected_location.IsUnallocated()) {
+            locations->SetInAt(use->GetInputIndex(), source);
+          } else if (!expected_location.IsConstant()) {
+            AddInputMoveFor(use->GetUser(), source, expected_location);
+          }
+        } else {
+          DCHECK(use->GetUser()->IsInvoke());
+          DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
         }
       }
       use = use->GetNext();
@@ -1255,8 +1264,9 @@
       switch (source.GetKind()) {
         case Location::kRegister: {
           locations->AddLiveRegister(source);
-          DCHECK_LE(locations->GetNumberOfLiveRegisters(), maximum_number_of_live_registers_);
-
+          DCHECK_LE(locations->GetNumberOfLiveRegisters(),
+                    maximum_number_of_live_core_registers_ +
+                    maximum_number_of_live_fp_registers_);
           if (current->GetType() == Primitive::kPrimNot) {
             locations->SetRegisterBit(source.reg());
           }
@@ -1349,7 +1359,8 @@
 
 void RegisterAllocator::Resolve() {
   codegen_->ComputeFrameSize(
-      spill_slots_.Size(), maximum_number_of_live_registers_, reserved_out_slots_);
+      spill_slots_.Size(), maximum_number_of_live_core_registers_,
+      maximum_number_of_live_fp_registers_, reserved_out_slots_);
 
   // Adjust the Out Location of instructions.
   // TODO: Use pointers of Location inside LiveInterval to avoid doing another iteration.
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index c152a8b..70841b8 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -190,8 +190,11 @@
   // Slots reserved for out arguments.
   size_t reserved_out_slots_;
 
-  // The maximum live registers at safepoints.
-  size_t maximum_number_of_live_registers_;
+  // The maximum live core registers at safepoints.
+  size_t maximum_number_of_live_core_registers_;
+
+  // The maximum live FP registers at safepoints.
+  size_t maximum_number_of_live_fp_registers_;
 
   ART_FRIEND_TEST(RegisterAllocatorTest, FreeUntil);
 
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 2b55120..6f8b301 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -29,6 +29,10 @@
 
 namespace art {
 
+// If you want to take a look at the differences between the ART assembler and GCC, set this flag
+// to true. The disassembled files will then remain in the tmp directory.
+static constexpr bool kKeepDisassembledFiles = false;
+
 // Helper for a constexpr string length.
 constexpr size_t ConstexprStrLen(char const* str, size_t count = 0) {
   return ('\0' == str[0]) ? count : ConstexprStrLen(str+1, count+1);
@@ -685,12 +689,12 @@
 
     bool result = CompareFiles(data_name + ".dis", as_name + ".dis");
 
-    // If you want to take a look at the differences between the ART assembler and GCC, comment
-    // out the removal code.
-//    std::remove(data_name.c_str());
-//    std::remove(as_name.c_str());
-//    std::remove((data_name + ".dis").c_str());
-//    std::remove((as_name + ".dis").c_str());
+    if (!kKeepDisassembledFiles) {
+      std::remove(data_name.c_str());
+      std::remove(as_name.c_str());
+      std::remove((data_name + ".dis").c_str());
+      std::remove((as_name + ".dis").c_str());
+    }
 
     return result;
   }
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index d843a72..c7414a1 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -184,6 +184,20 @@
   EmitImmediate(imm);
 }
 
+
+void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
+  cmov(c, dst, src, true);
+}
+
+void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
+  EmitUint8(0x0F);
+  EmitUint8(0x40 + c);
+  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+
 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalByteRegNormalizingRex32(dst, src);
@@ -369,19 +383,26 @@
 
 
 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
+  movd(dst, src, true);
+}
+
+void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
+  movd(dst, src, true);
+}
+
+void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
-  EmitRex64(dst, src);
+  EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
   EmitUint8(0x0F);
   EmitUint8(0x6E);
   EmitOperand(dst.LowBits(), Operand(src));
 }
 
-
-void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
+void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
-  EmitRex64(src, dst);
+  EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex());
   EmitUint8(0x0F);
   EmitUint8(0x7E);
   EmitOperand(src.LowBits(), Operand(dst));
@@ -826,6 +847,39 @@
   EmitOperand(dst.LowBits(), src);
 }
 
+void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x54);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x54);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x56);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x56);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
 
 void X86_64Assembler::fldl(const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1757,6 +1811,20 @@
   EmitUint8(0xC0 + dst.LowBits());
 }
 
+void X86_64Assembler::bswapl(CpuRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex(false, false, false, false, dst.NeedsRex());
+  EmitUint8(0x0F);
+  EmitUint8(0xC8 + dst.LowBits());
+}
+
+void X86_64Assembler::bswapq(CpuRegister dst) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex(false, true, false, false, dst.NeedsRex());
+  EmitUint8(0x0F);
+  EmitUint8(0xC8 + dst.LowBits());
+}
+
 
 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
   // TODO: Need to have a code constants table.
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index ac8bc9a..5c8d608 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -276,6 +276,9 @@
   void movl(const Address& dst, CpuRegister src);
   void movl(const Address& dst, const Immediate& imm);
 
+  void cmov(Condition c, CpuRegister dst, CpuRegister src);  // This is the 64b version.
+  void cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit);
+
   void movzxb(CpuRegister dst, CpuRegister src);
   void movzxb(CpuRegister dst, const Address& src);
   void movsxb(CpuRegister dst, CpuRegister src);
@@ -303,8 +306,10 @@
   void movsxd(CpuRegister dst, CpuRegister src);
   void movsxd(CpuRegister dst, const Address& src);
 
-  void movd(XmmRegister dst, CpuRegister src);
-  void movd(CpuRegister dst, XmmRegister src);
+  void movd(XmmRegister dst, CpuRegister src);  // Note: this is the r64 version, formally movq.
+  void movd(CpuRegister dst, XmmRegister src);  // Note: this is the r64 version, formally movq.
+  void movd(XmmRegister dst, CpuRegister src, bool is64bit);
+  void movd(CpuRegister dst, XmmRegister src, bool is64bit);
 
   void addss(XmmRegister dst, XmmRegister src);
   void addss(XmmRegister dst, const Address& src);
@@ -360,6 +365,11 @@
   void xorps(XmmRegister dst, XmmRegister src);
 
   void andpd(XmmRegister dst, const Address& src);
+  void andpd(XmmRegister dst, XmmRegister src);
+  void andps(XmmRegister dst, XmmRegister src);
+
+  void orpd(XmmRegister dst, XmmRegister src);
+  void orps(XmmRegister dst, XmmRegister src);
 
   void flds(const Address& src);
   void fstps(const Address& dst);
@@ -504,6 +514,9 @@
 
   void setcc(Condition condition, CpuRegister dst);
 
+  void bswapl(CpuRegister dst);
+  void bswapq(CpuRegister dst);
+
   //
   // Macros for High-level operations.
   //
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index b8d724d..6df4144 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -330,7 +330,6 @@
     assembler->shlq(*reg, shifter);
     str << "shlq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
   }
-  printf("%s\n", str.str().c_str());
 
   return str.str();
 }
@@ -690,6 +689,22 @@
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::xorpd, "xorpd %{reg2}, %{reg1}"), "xorpd");
 }
 
+TEST_F(AssemblerX86_64Test, Andps) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::andps, "andps %{reg2}, %{reg1}"), "andps");
+}
+
+TEST_F(AssemblerX86_64Test, Andpd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::andpd, "andpd %{reg2}, %{reg1}"), "andpd");
+}
+
+TEST_F(AssemblerX86_64Test, Orps) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::orps, "orps %{reg2}, %{reg1}"), "orps");
+}
+
+TEST_F(AssemblerX86_64Test, Orpd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::orpd, "orpd %{reg2}, %{reg1}"), "orpd");
+}
+
 // X87
 
 std::string x87_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
@@ -758,6 +773,14 @@
 // MISC //
 //////////
 
+TEST_F(AssemblerX86_64Test, Bswapl) {
+  DriverStr(Repeatr(&x86_64::X86_64Assembler::bswapl, "bswap %{reg}"), "bswapl");
+}
+
+TEST_F(AssemblerX86_64Test, Bswapq) {
+  DriverStr(RepeatR(&x86_64::X86_64Assembler::bswapq, "bswap %{reg}"), "bswapq");
+}
+
 std::string setcc_test_fn(AssemblerX86_64Test::Base* assembler_test,
                           x86_64::X86_64Assembler* assembler) {
   // From Condition
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 6b9ebfa..b576ca2 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -668,6 +668,8 @@
         dump_timing_ = true;
       } else if (option == "--dump-passes") {
         dump_passes_ = true;
+      } else if (option.starts_with("--dump-cfg=")) {
+        dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data();
       } else if (option == "--dump-stats") {
         dump_stats_ = true;
       } else if (option == "--include-debug-symbols" || option == "--no-strip-symbols") {
@@ -1237,6 +1239,7 @@
                                      thread_count_,
                                      dump_stats_,
                                      dump_passes_,
+                                     dump_cfg_file_name_,
                                      compiler_phases_timings_.get(),
                                      swap_fd_,
                                      profile_file_));
@@ -1694,6 +1697,7 @@
   bool dump_passes_;
   bool dump_timing_;
   bool dump_slow_timing_;
+  std::string dump_cfg_file_name_;
   std::string swap_file_name_;
   int swap_fd_;
   std::string profile_file_;  // Profile file to use
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 285007c..986b7ec 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -799,6 +799,9 @@
 }
 
 TEST_F(StubTest, UnlockObject) {
+  // This will lead to monitor error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   TestUnlockObject(this);
 }
 
@@ -992,6 +995,9 @@
   TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
 
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
+  // This will lead to OOM  error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   // TODO: Check the "Unresolved" allocation stubs
 
   Thread* self = Thread::Current();
@@ -1116,6 +1122,9 @@
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   // TODO: Check the "Unresolved" allocation stubs
 
+  // This will lead to OOM  error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   Thread* self = Thread::Current();
   // Create an object
   ScopedObjectAccess soa(self);
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index f3e0918..0764b87 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -303,4 +303,13 @@
 #endif
 }
 
+ScopedLogSeverity::ScopedLogSeverity(LogSeverity level) {
+  old_ = gMinimumLogSeverity;
+  gMinimumLogSeverity = level;
+}
+
+ScopedLogSeverity::~ScopedLogSeverity() {
+  gMinimumLogSeverity = old_;
+}
+
 }  // namespace art
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index ae83e33..cc1a4a1 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -254,6 +254,16 @@
   DISALLOW_COPY_AND_ASSIGN(LogMessage);
 };
 
+// Allows to temporarily change the minimum severity level for logging.
+class ScopedLogSeverity {
+ public:
+  explicit ScopedLogSeverity(LogSeverity level);
+  ~ScopedLogSeverity();
+
+ private:
+  LogSeverity old_;
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_LOGGING_H_
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index a4eb318..17b2ac9 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -612,7 +612,7 @@
 #if ART_USE_FUTEXES
   bool done = false;
   timespec end_abs_ts;
-  InitTimeSpec(true, CLOCK_REALTIME, ms, ns, &end_abs_ts);
+  InitTimeSpec(true, CLOCK_MONOTONIC, ms, ns, &end_abs_ts);
   do {
     int32_t cur_state = state_.LoadRelaxed();
     if (cur_state == 0) {
@@ -621,7 +621,7 @@
     } else {
       // Failed to acquire, hang up.
       timespec now_abs_ts;
-      InitTimeSpec(true, CLOCK_REALTIME, 0, 0, &now_abs_ts);
+      InitTimeSpec(true, CLOCK_MONOTONIC, 0, 0, &now_abs_ts);
       timespec rel_ts;
       if (ComputeRelativeTimeSpec(&rel_ts, end_abs_ts, now_abs_ts)) {
         return false;  // Timed out.
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 438cebf..377a3c3 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -4486,6 +4486,171 @@
   return true;
 }
 
+static void CountMethodsAndFields(ClassDataItemIterator& dex_data,
+                                  size_t* virtual_methods,
+                                  size_t* direct_methods,
+                                  size_t* static_fields,
+                                  size_t* instance_fields) {
+  *virtual_methods = *direct_methods = *static_fields = *instance_fields = 0;
+
+  while (dex_data.HasNextStaticField()) {
+    dex_data.Next();
+    (*static_fields)++;
+  }
+  while (dex_data.HasNextInstanceField()) {
+    dex_data.Next();
+    (*instance_fields)++;
+  }
+  while (dex_data.HasNextDirectMethod()) {
+    (*direct_methods)++;
+    dex_data.Next();
+  }
+  while (dex_data.HasNextVirtualMethod()) {
+    (*virtual_methods)++;
+    dex_data.Next();
+  }
+  DCHECK(!dex_data.HasNext());
+}
+
+static void DumpClass(std::ostream& os,
+                      const DexFile& dex_file, const DexFile::ClassDef& dex_class_def,
+                      const char* suffix) {
+  ClassDataItemIterator dex_data(dex_file, dex_file.GetClassData(dex_class_def));
+  os << dex_file.GetClassDescriptor(dex_class_def) << suffix << ":\n";
+  os << " Static fields:\n";
+  while (dex_data.HasNextStaticField()) {
+    const DexFile::FieldId& id = dex_file.GetFieldId(dex_data.GetMemberIndex());
+    os << "  " << dex_file.GetFieldTypeDescriptor(id) << " " << dex_file.GetFieldName(id) << "\n";
+    dex_data.Next();
+  }
+  os << " Instance fields:\n";
+  while (dex_data.HasNextInstanceField()) {
+    const DexFile::FieldId& id = dex_file.GetFieldId(dex_data.GetMemberIndex());
+    os << "  " << dex_file.GetFieldTypeDescriptor(id) << " " << dex_file.GetFieldName(id) << "\n";
+    dex_data.Next();
+  }
+  os << " Direct methods:\n";
+  while (dex_data.HasNextDirectMethod()) {
+    const DexFile::MethodId& id = dex_file.GetMethodId(dex_data.GetMemberIndex());
+    os << "  " << dex_file.GetMethodName(id) << dex_file.GetMethodSignature(id).ToString() << "\n";
+    dex_data.Next();
+  }
+  os << " Virtual methods:\n";
+  while (dex_data.HasNextVirtualMethod()) {
+    const DexFile::MethodId& id = dex_file.GetMethodId(dex_data.GetMemberIndex());
+    os << "  " << dex_file.GetMethodName(id) << dex_file.GetMethodSignature(id).ToString() << "\n";
+    dex_data.Next();
+  }
+}
+
+static std::string DumpClasses(const DexFile& dex_file1, const DexFile::ClassDef& dex_class_def1,
+                               const DexFile& dex_file2, const DexFile::ClassDef& dex_class_def2) {
+  std::ostringstream os;
+  DumpClass(os, dex_file1, dex_class_def1, " (Compile time)");
+  DumpClass(os, dex_file2, dex_class_def2, " (Runtime)");
+  return os.str();
+}
+
+
+// Very simple structural check on whether the classes match. Only compares the number of
+// methods and fields.
+static bool SimpleStructuralCheck(const DexFile& dex_file1, const DexFile::ClassDef& dex_class_def1,
+                                  const DexFile& dex_file2, const DexFile::ClassDef& dex_class_def2,
+                                  std::string* error_msg) {
+  ClassDataItemIterator dex_data1(dex_file1, dex_file1.GetClassData(dex_class_def1));
+  ClassDataItemIterator dex_data2(dex_file2, dex_file2.GetClassData(dex_class_def2));
+
+  // Counters for current dex file.
+  size_t dex_virtual_methods1, dex_direct_methods1, dex_static_fields1, dex_instance_fields1;
+  CountMethodsAndFields(dex_data1, &dex_virtual_methods1, &dex_direct_methods1, &dex_static_fields1,
+                        &dex_instance_fields1);
+  // Counters for compile-time dex file.
+  size_t dex_virtual_methods2, dex_direct_methods2, dex_static_fields2, dex_instance_fields2;
+  CountMethodsAndFields(dex_data2, &dex_virtual_methods2, &dex_direct_methods2, &dex_static_fields2,
+                        &dex_instance_fields2);
+
+  if (dex_virtual_methods1 != dex_virtual_methods2) {
+    std::string class_dump = DumpClasses(dex_file1, dex_class_def1, dex_file2, dex_class_def2);
+    *error_msg = StringPrintf("Virtual method count off: %zu vs %zu\n%s", dex_virtual_methods1,
+                              dex_virtual_methods2, class_dump.c_str());
+    return false;
+  }
+  if (dex_direct_methods1 != dex_direct_methods2) {
+    std::string class_dump = DumpClasses(dex_file1, dex_class_def1, dex_file2, dex_class_def2);
+    *error_msg = StringPrintf("Direct method count off: %zu vs %zu\n%s", dex_direct_methods1,
+                              dex_direct_methods2, class_dump.c_str());
+    return false;
+  }
+  if (dex_static_fields1 != dex_static_fields2) {
+    std::string class_dump = DumpClasses(dex_file1, dex_class_def1, dex_file2, dex_class_def2);
+    *error_msg = StringPrintf("Static field count off: %zu vs %zu\n%s", dex_static_fields1,
+                              dex_static_fields2, class_dump.c_str());
+    return false;
+  }
+  if (dex_instance_fields1 != dex_instance_fields2) {
+    std::string class_dump = DumpClasses(dex_file1, dex_class_def1, dex_file2, dex_class_def2);
+    *error_msg = StringPrintf("Instance field count off: %zu vs %zu\n%s", dex_instance_fields1,
+                              dex_instance_fields2, class_dump.c_str());
+    return false;
+  }
+
+  return true;
+}
+
+// Checks whether a the super-class changed from what we had at compile-time. This would
+// invalidate quickening.
+static bool CheckSuperClassChange(Handle<mirror::Class> klass,
+                                  const DexFile& dex_file,
+                                  const DexFile::ClassDef& class_def,
+                                  mirror::Class* super_class)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  // Check for unexpected changes in the superclass.
+  // Quick check 1) is the super_class class-loader the boot class loader? This always has
+  // precedence.
+  if (super_class->GetClassLoader() != nullptr &&
+      // Quick check 2) different dex cache? Breaks can only occur for different dex files,
+      // which is implied by different dex cache.
+      klass->GetDexCache() != super_class->GetDexCache()) {
+    // Now comes the expensive part: things can be broken if (a) the klass' dex file has a
+    // definition for the super-class, and (b) the files are in separate oat files. The oat files
+    // are referenced from the dex file, so do (b) first. Only relevant if we have oat files.
+    const OatFile* class_oat_file = dex_file.GetOatFile();
+    if (class_oat_file != nullptr) {
+      const OatFile* loaded_super_oat_file = super_class->GetDexFile().GetOatFile();
+      if (loaded_super_oat_file != nullptr && class_oat_file != loaded_super_oat_file) {
+        // Now check (a).
+        const DexFile::ClassDef* super_class_def = dex_file.FindClassDef(class_def.superclass_idx_);
+        if (super_class_def != nullptr) {
+          // Uh-oh, we found something. Do our check.
+          std::string error_msg;
+          if (!SimpleStructuralCheck(dex_file, *super_class_def,
+                                     super_class->GetDexFile(), *super_class->GetClassDef(),
+                                     &error_msg)) {
+            // Print a warning to the log. This exception might be caught, e.g., as common in test
+            // drivers. When the class is later tried to be used, we re-throw a new instance, as we
+            // only save the type of the exception.
+            LOG(WARNING) << "Incompatible structural change detected: " <<
+                StringPrintf(
+                    "Structural change of %s is hazardous (%s at compile time, %s at runtime): %s",
+                    PrettyType(super_class_def->class_idx_, dex_file).c_str(),
+                    class_oat_file->GetLocation().c_str(),
+                    loaded_super_oat_file->GetLocation().c_str(),
+                    error_msg.c_str());
+            ThrowIncompatibleClassChangeError(klass.Get(),
+                "Structural change of %s is hazardous (%s at compile time, %s at runtime): %s",
+                PrettyType(super_class_def->class_idx_, dex_file).c_str(),
+                class_oat_file->GetLocation().c_str(),
+                loaded_super_oat_file->GetLocation().c_str(),
+                error_msg.c_str());
+            return false;
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+
 bool ClassLinker::LoadSuperAndInterfaces(Handle<mirror::Class> klass, const DexFile& dex_file) {
   CHECK_EQ(mirror::Class::kStatusIdx, klass->GetStatus());
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(klass->GetDexClassDefIndex());
@@ -4505,6 +4670,11 @@
     }
     CHECK(super_class->IsResolved());
     klass->SetSuperClass(super_class);
+
+    if (!CheckSuperClassChange(klass, dex_file, class_def, super_class)) {
+      DCHECK(Thread::Current()->IsExceptionPending());
+      return false;
+    }
   }
   const DexFile::TypeList* interfaces = dex_file.GetInterfacesList(class_def);
   if (interfaces != nullptr) {
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index e017699..d48ac9d 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -20,6 +20,7 @@
 #include <dlfcn.h>
 #include <fcntl.h>
 #include <ScopedLocalRef.h>
+#include <stdlib.h>
 
 #include "../../external/icu/icu4c/source/common/unicode/uvernum.h"
 #include "base/macros.h"
@@ -43,6 +44,11 @@
 #include "well_known_classes.h"
 
 int main(int argc, char **argv) {
+  // Gtests can be very noisy. For example, an executable with multiple tests will trigger native
+  // bridge warnings. The following line reduces the minimum log severity to ERROR and suppresses
+  // everything else. In case you want to see all messages, comment out the line.
+  setenv("ANDROID_LOG_TAGS", "*:e", 1);
+
   art::InitLogging(argv);
   LOG(::art::INFO) << "Running main() from common_runtime_test.cc...";
   testing::InitGoogleTest(&argc, argv);
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 3f6175f..dc85f6c 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -250,6 +250,7 @@
                     location,
                     location_checksum,
                     mem_map,
+                    nullptr,
                     error_msg);
 }
 
@@ -337,9 +338,12 @@
                                                    size_t size,
                                                    const std::string& location,
                                                    uint32_t location_checksum,
-                                                   MemMap* mem_map, std::string* error_msg) {
+                                                   MemMap* mem_map,
+                                                   const OatFile* oat_file,
+                                                   std::string* error_msg) {
   CHECK_ALIGNED(base, 4);  // various dex file structures must be word aligned
-  std::unique_ptr<DexFile> dex_file(new DexFile(base, size, location, location_checksum, mem_map));
+  std::unique_ptr<DexFile> dex_file(
+      new DexFile(base, size, location, location_checksum, mem_map, oat_file));
   if (!dex_file->Init(error_msg)) {
     dex_file.reset();
   }
@@ -349,7 +353,8 @@
 DexFile::DexFile(const uint8_t* base, size_t size,
                  const std::string& location,
                  uint32_t location_checksum,
-                 MemMap* mem_map)
+                 MemMap* mem_map,
+                 const OatFile* oat_file)
     : begin_(base),
       size_(size),
       location_(location),
@@ -363,7 +368,8 @@
       proto_ids_(reinterpret_cast<const ProtoId*>(base + header_->proto_ids_off_)),
       class_defs_(reinterpret_cast<const ClassDef*>(base + header_->class_defs_off_)),
       find_class_def_misses_(0),
-      class_def_index_(nullptr) {
+      class_def_index_(nullptr),
+      oat_file_(oat_file) {
   CHECK(begin_ != NULL) << GetLocation();
   CHECK_GT(size_, 0U) << GetLocation();
 }
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 019c8e6..9b8f254 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -44,6 +44,7 @@
 }  // namespace mirror
 class ClassLinker;
 class MemMap;
+class OatFile;
 class Signature;
 template<class T> class Handle;
 class StringPiece;
@@ -391,8 +392,9 @@
   static std::unique_ptr<const DexFile> Open(const uint8_t* base, size_t size,
                                              const std::string& location,
                                              uint32_t location_checksum,
+                                             const OatFile* oat_file,
                                              std::string* error_msg) {
-    return OpenMemory(base, size, location, location_checksum, NULL, error_msg);
+    return OpenMemory(base, size, location, location_checksum, NULL, oat_file, error_msg);
   }
 
   // Open all classesXXX.dex files from a zip archive.
@@ -891,6 +893,10 @@
   //     the dex_location where it's file name part has been made canonical.
   static std::string GetDexCanonicalLocation(const char* dex_location);
 
+  const OatFile* GetOatFile() const {
+    return oat_file_;
+  }
+
  private:
   // Opens a .dex file
   static std::unique_ptr<const DexFile> OpenFile(int fd, const char* location,
@@ -927,12 +933,14 @@
                                                    const std::string& location,
                                                    uint32_t location_checksum,
                                                    MemMap* mem_map,
+                                                   const OatFile* oat_file,
                                                    std::string* error_msg);
 
   DexFile(const uint8_t* base, size_t size,
           const std::string& location,
           uint32_t location_checksum,
-          MemMap* mem_map);
+          MemMap* mem_map,
+          const OatFile* oat_file);
 
   // Top-level initializer that calls other Init methods.
   bool Init(std::string* error_msg);
@@ -1015,6 +1023,10 @@
   };
   typedef HashMap<const char*, const ClassDef*, UTF16EmptyFn, UTF16HashCmp, UTF16HashCmp> Index;
   mutable Atomic<Index*> class_def_index_;
+
+  // The oat file this dex file was loaded from. May be null in case the dex file is not coming
+  // from an oat file, e.g., directly from an apk.
+  const OatFile* oat_file_;
 };
 std::ostream& operator<<(std::ostream& os, const DexFile& dex_file);
 
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index 9f39b80..09d10dd 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -127,6 +127,9 @@
 }
 
 void SpaceTest::InitTestBody(CreateSpaceFn create_space) {
+  // This will lead to error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   {
     // Init < max == growth
     std::unique_ptr<Space> space(create_space("test", 16 * MB, 32 * MB, 32 * MB, nullptr));
diff --git a/runtime/indirect_reference_table_test.cc b/runtime/indirect_reference_table_test.cc
index 99ee597..1156cf5 100644
--- a/runtime/indirect_reference_table_test.cc
+++ b/runtime/indirect_reference_table_test.cc
@@ -43,6 +43,9 @@
 }
 
 TEST_F(IndirectReferenceTableTest, BasicTest) {
+  // This will lead to error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   ScopedObjectAccess soa(Thread::Current());
   static const size_t kTableInitial = 10;
   static const size_t kTableMax = 20;
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 8e32968..906aa4c 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -634,6 +634,9 @@
 }
 
 TEST_F(JniInternalTest, FindClass) {
+  // This tests leads to warnings in the log.
+  ScopedLogSeverity sls(LogSeverity::ERROR);
+
   FindClassTest(false);
   FindClassTest(true);
 }
@@ -890,40 +893,44 @@
   // Sanity check that no exceptions are pending.
   ASSERT_FALSE(env_->ExceptionCheck());
 
-  // Check that registering method without name causes a NoSuchMethodError.
+  // The following can print errors to the log we'd like to ignore.
   {
-    JNINativeMethod methods[] = { { nullptr, "()V", native_function } };
-    EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
-  }
-  ExpectException(jlnsme);
+    ScopedLogSeverity sls(LogSeverity::FATAL);
+    // Check that registering method without name causes a NoSuchMethodError.
+    {
+      JNINativeMethod methods[] = { { nullptr, "()V", native_function } };
+      EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
+    }
+    ExpectException(jlnsme);
 
-  // Check that registering method without signature causes a NoSuchMethodError.
-  {
-    JNINativeMethod methods[] = { { "notify", nullptr, native_function } };
-    EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
-  }
-  ExpectException(jlnsme);
+    // Check that registering method without signature causes a NoSuchMethodError.
+    {
+      JNINativeMethod methods[] = { { "notify", nullptr, native_function } };
+      EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
+    }
+    ExpectException(jlnsme);
 
-  // Check that registering method without function causes a NoSuchMethodError.
-  {
-    JNINativeMethod methods[] = { { "notify", "()V", nullptr } };
-    EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
-  }
-  ExpectException(jlnsme);
+    // Check that registering method without function causes a NoSuchMethodError.
+    {
+      JNINativeMethod methods[] = { { "notify", "()V", nullptr } };
+      EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
+    }
+    ExpectException(jlnsme);
 
-  // Check that registering to a non-existent java.lang.Object.foo() causes a NoSuchMethodError.
-  {
-    JNINativeMethod methods[] = { { "foo", "()V", native_function } };
-    EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
-  }
-  ExpectException(jlnsme);
+    // Check that registering to a non-existent java.lang.Object.foo() causes a NoSuchMethodError.
+    {
+      JNINativeMethod methods[] = { { "foo", "()V", native_function } };
+      EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
+    }
+    ExpectException(jlnsme);
 
-  // Check that registering non-native methods causes a NoSuchMethodError.
-  {
-    JNINativeMethod methods[] = { { "equals", "(Ljava/lang/Object;)Z", native_function } };
-    EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
+    // Check that registering non-native methods causes a NoSuchMethodError.
+    {
+      JNINativeMethod methods[] = { { "equals", "(Ljava/lang/Object;)Z", native_function } };
+      EXPECT_EQ(env_->RegisterNatives(jlobject, methods, 1), JNI_ERR);
+    }
+    ExpectException(jlnsme);
   }
-  ExpectException(jlnsme);
 
   // Check that registering native methods is successful.
   {
@@ -1707,6 +1714,9 @@
 }
 
 TEST_F(JniInternalTest, DeleteLocalRef) {
+  // This tests leads to warnings and errors in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   jstring s = env_->NewStringUTF("");
   ASSERT_NE(s, nullptr);
   env_->DeleteLocalRef(s);
@@ -1743,6 +1753,9 @@
   ASSERT_EQ(JNI_OK, env_->PushLocalFrame(0));
   env_->PopLocalFrame(nullptr);
 
+  // The following two tests will print errors to the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   // Negative capacities are not allowed.
   ASSERT_EQ(JNI_ERR, env_->PushLocalFrame(-1));
 
@@ -1751,6 +1764,9 @@
 }
 
 TEST_F(JniInternalTest, PushLocalFrame_PopLocalFrame) {
+  // This tests leads to errors in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   jobject original = env_->NewStringUTF("");
   ASSERT_NE(original, nullptr);
 
@@ -1815,6 +1831,9 @@
 }
 
 TEST_F(JniInternalTest, DeleteGlobalRef) {
+  // This tests leads to warnings and errors in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   jstring s = env_->NewStringUTF("");
   ASSERT_NE(s, nullptr);
 
@@ -1865,6 +1884,9 @@
 }
 
 TEST_F(JniInternalTest, DeleteWeakGlobalRef) {
+  // This tests leads to warnings and errors in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   jstring s = env_->NewStringUTF("");
   ASSERT_NE(s, nullptr);
 
@@ -1989,6 +2011,9 @@
 }
 
 TEST_F(JniInternalTest, MonitorEnterExit) {
+  // This will print some error messages. Suppress.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   // Create an object to torture.
   jclass object_class = env_->FindClass("java/lang/Object");
   ASSERT_NE(object_class, nullptr);
diff --git a/runtime/monitor_test.cc b/runtime/monitor_test.cc
index adc7848..6d1e721 100644
--- a/runtime/monitor_test.cc
+++ b/runtime/monitor_test.cc
@@ -356,6 +356,8 @@
 TEST_F(MonitorTest, CheckExceptionsWait1) {
   // Make the CreateTask wait 10ms, the UseTask wait 10ms.
   // => The use task will get the lock first and get to self == owner check.
+  // This will lead to OOM and monitor error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
   CommonWaitSetup(this, class_linker_, 10, 50, false, false, 2, 50, true,
                   "Monitor test thread pool 1");
 }
@@ -364,6 +366,8 @@
 TEST_F(MonitorTest, CheckExceptionsWait2) {
   // Make the CreateTask wait 0ms, the UseTask wait 10ms.
   // => The create task will get the lock first and get to ms >= 0
+  // This will lead to OOM and monitor error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
   CommonWaitSetup(this, class_linker_, 0, -1, true, false, 10, 50, true,
                   "Monitor test thread pool 2");
 }
@@ -373,6 +377,8 @@
   // Make the CreateTask wait 0ms, then Wait for a long time. Make the InterruptTask wait 10ms,
   // after which it will interrupt the create task and then wait another 10ms.
   // => The create task will get to the interrupted-exception throw.
+  // This will lead to OOM and monitor error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
   CommonWaitSetup(this, class_linker_, 0, 500, true, true, 10, 50, true,
                   "Monitor test thread pool 3");
 }
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 358519b..9061bb3 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -456,7 +456,7 @@
 
 std::unique_ptr<const DexFile> OatFile::OatDexFile::OpenDexFile(std::string* error_msg) const {
   return DexFile::Open(dex_file_pointer_, FileSize(), dex_file_location_,
-                       dex_file_location_checksum_, error_msg);
+                       dex_file_location_checksum_, GetOatFile(), error_msg);
 }
 
 uint32_t OatFile::OatDexFile::GetOatClassOffset(uint16_t class_def_index) const {
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 72b696b..3463025 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -103,6 +103,9 @@
   kIntrinsicFlagIsObject   = 4,
   // kIntrinsicUnsafePut
   kIntrinsicFlagIsOrdered  = 8,
+
+  // kIntrinsicDoubleCvt, kIntrinsicFloatCvt.
+  kIntrinsicFlagToFloatingPoint = kIntrinsicFlagMin,
 };
 
 struct InlineIGetIPutData {
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index a5a0204..35d944f 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -25,14 +25,17 @@
 
 #include "base/dumpable.h"
 #include "base/logging.h"
+#include "base/macros.h"
 #include "base/mutex.h"
 #include "base/stringprintf.h"
 #include "thread-inl.h"
+#include "thread_list.h"
 #include "utils.h"
 
 namespace art {
 
 static constexpr bool kDumpHeapObjectOnSigsevg = false;
+static constexpr bool kUseSigRTTimeout = true;
 
 struct Backtrace {
  public:
@@ -283,10 +286,29 @@
   mcontext_t& context;
 };
 
+// Return the signal number we recognize as timeout. -1 means not active/supported.
+static int GetTimeoutSignal() {
+#if defined(__APPLE__)
+  // Mac does not support realtime signals.
+  UNUSED(kUseSigRTTimeout);
+  return -1;
+#else
+  return kUseSigRTTimeout ? (SIGRTMIN + 2) : -1;
+#endif
+}
+
+static bool IsTimeoutSignal(int signal_number) {
+  return signal_number == GetTimeoutSignal();
+}
+
 void HandleUnexpectedSignal(int signal_number, siginfo_t* info, void* raw_context) {
   static bool handlingUnexpectedSignal = false;
   if (handlingUnexpectedSignal) {
     LogMessage::LogLine(__FILE__, __LINE__, INTERNAL_FATAL, "HandleUnexpectedSignal reentered\n");
+    if (IsTimeoutSignal(signal_number)) {
+      // Ignore a recursive timeout.
+      return;
+    }
     _exit(1);
   }
   handlingUnexpectedSignal = true;
@@ -320,6 +342,10 @@
                       << "Backtrace:\n" << Dumpable<Backtrace>(thread_backtrace);
   Runtime* runtime = Runtime::Current();
   if (runtime != nullptr) {
+    if (IsTimeoutSignal(signal_number)) {
+      // Special timeout signal. Try to dump all threads.
+      runtime->GetThreadList()->DumpForSigQuit(LOG(INTERNAL_FATAL));
+    }
     gc::Heap* heap = runtime->GetHeap();
     LOG(INTERNAL_FATAL) << "Fault message: " << runtime->GetFaultMessage();
     if (kDumpHeapObjectOnSigsevg && heap != nullptr && info != nullptr) {
@@ -374,6 +400,10 @@
   rc += sigaction(SIGSTKFLT, &action, NULL);
 #endif
   rc += sigaction(SIGTRAP, &action, NULL);
+  // Special dump-all timeout.
+  if (GetTimeoutSignal() != -1) {
+    rc += sigaction(GetTimeoutSignal(), &action, NULL);
+  }
   CHECK_EQ(rc, 0);
 }
 
diff --git a/runtime/utils.cc b/runtime/utils.cc
index d7d4ec2..908cfbd2 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1208,7 +1208,7 @@
   if (kUseAddr2line) {
     // Try to run it to see whether we have it. Push an argument so that it doesn't assume a.out
     // and print to stderr.
-    use_addr2line = RunCommand("addr2line -h", nullptr, nullptr);
+    use_addr2line = (gAborting > 0) && RunCommand("addr2line -h", nullptr, nullptr);
   } else {
     use_addr2line = false;
   }
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index a98bc90..a3dd13c 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -392,6 +392,9 @@
 }
 
 TEST_F(UtilsTest, ExecError) {
+  // This will lead to error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
   std::vector<std::string> command;
   command.push_back("bogus");
   std::string error_msg;
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 56972ff..862fe06 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -119,6 +119,9 @@
     }
   }
 
+  // Break up the charAt tests. The optimizing compiler doesn't optimize methods with try-catch yet,
+  // so we need to separate out the tests that are expected to throw exception
+
   public static void test_String_charAt() {
     String testStr = "Now is the time";
 
@@ -127,6 +130,12 @@
     Assert.assertEquals(' ', testStr.charAt(10));
     Assert.assertEquals('e', testStr.charAt(testStr.length()-1));
 
+    test_String_charAtExc();
+    test_String_charAtExc2();
+  }
+
+  private static void test_String_charAtExc() {
+    String testStr = "Now is the time";
     try {
       testStr.charAt(-1);
       Assert.fail();
@@ -146,6 +155,19 @@
     }
   }
 
+  private static void test_String_charAtExc2() {
+    try {
+      test_String_charAtExc3();
+      Assert.fail();
+    } catch (StringIndexOutOfBoundsException expected) {
+    }
+  }
+
+  private static void test_String_charAtExc3() {
+    String testStr = "Now is the time";
+    Assert.assertEquals('N', testStr.charAt(-1));
+  }
+
   static int start;
   private static int[] negIndex = { -100000 };
   public static void test_String_indexOf() {
diff --git a/test/114-ParallelGC/src/Main.java b/test/114-ParallelGC/src/Main.java
index 8e2519d..963fdac 100644
--- a/test/114-ParallelGC/src/Main.java
+++ b/test/114-ParallelGC/src/Main.java
@@ -24,7 +24,10 @@
 
 public class Main implements Runnable {
 
-    public final static long TIMEOUT_VALUE = 5;  // Timeout in minutes.
+    // Timeout in minutes. Make it larger than the run-test timeout to get a native thread dump by
+    // ART on timeout when running on the host.
+    public final static long TIMEOUT_VALUE = 7;
+
     public final static long MAX_SIZE = 1000;  // Maximum size of array-list to allocate.
 
     public static void main(String[] args) throws Exception {
diff --git a/test/131-structural-change/build b/test/131-structural-change/build
new file mode 100755
index 0000000..7ddc81d
--- /dev/null
+++ b/test/131-structural-change/build
@@ -0,0 +1,31 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stop if something fails.
+set -e
+
+mkdir classes
+${JAVAC} -d classes `find src -name '*.java'`
+
+mkdir classes-ex
+${JAVAC} -d classes-ex `find src-ex -name '*.java'`
+
+if [ ${NEED_DEX} = "true" ]; then
+  ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
+  zip $TEST_NAME.jar classes.dex
+  ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
+  zip ${TEST_NAME}-ex.jar classes.dex
+fi
diff --git a/test/131-structural-change/expected.txt b/test/131-structural-change/expected.txt
new file mode 100644
index 0000000..cc7713d
--- /dev/null
+++ b/test/131-structural-change/expected.txt
@@ -0,0 +1,2 @@
+Should really reach here.
+Done.
diff --git a/test/131-structural-change/info.txt b/test/131-structural-change/info.txt
new file mode 100644
index 0000000..6d5817b
--- /dev/null
+++ b/test/131-structural-change/info.txt
@@ -0,0 +1 @@
+Check whether a structural change in a (non-native) multi-dex scenario is detected.
diff --git a/test/131-structural-change/run b/test/131-structural-change/run
new file mode 100755
index 0000000..63fdb8c
--- /dev/null
+++ b/test/131-structural-change/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use secondary switch to add secondary dex file to class path.
+exec ${RUN} "${@}" --secondary
diff --git a/test/131-structural-change/src-ex/A.java b/test/131-structural-change/src-ex/A.java
new file mode 100644
index 0000000..800347b
--- /dev/null
+++ b/test/131-structural-change/src-ex/A.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A {
+    public void bar() {
+    }
+}
diff --git a/test/131-structural-change/src-ex/B.java b/test/131-structural-change/src-ex/B.java
new file mode 100644
index 0000000..61369db
--- /dev/null
+++ b/test/131-structural-change/src-ex/B.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class B extends A {
+    public void test() {
+        System.out.println("Should not reach this!");
+    }
+}
diff --git a/test/131-structural-change/src/A.java b/test/131-structural-change/src/A.java
new file mode 100644
index 0000000..b07de58
--- /dev/null
+++ b/test/131-structural-change/src/A.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class A {
+    public void foo() {
+    }
+
+    public void bar() {
+    }
+
+    public void baz() {
+    }
+}
diff --git a/test/131-structural-change/src/Main.java b/test/131-structural-change/src/Main.java
new file mode 100644
index 0000000..8dfa280
--- /dev/null
+++ b/test/131-structural-change/src/Main.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+
+/**
+ * Structural hazard test.
+ */
+public class Main {
+    public static void main(String[] args) {
+        new Main().run();
+    }
+
+    private void run() {
+        try {
+            Class<?> bClass = getClass().getClassLoader().loadClass("A");
+            System.out.println("Should really reach here.");
+        } catch (Exception e) {
+            e.printStackTrace(System.out);
+        }
+
+        boolean haveOatFile = hasOat();
+        boolean gotError = false;
+        try {
+            Class<?> bClass = getClass().getClassLoader().loadClass("B");
+        } catch (IncompatibleClassChangeError icce) {
+            gotError = true;
+        } catch (Exception e) {
+            e.printStackTrace(System.out);
+        }
+        if (haveOatFile ^ gotError) {
+            System.out.println("Did not get expected error.");
+        }
+        System.out.println("Done.");
+    }
+
+    static {
+        System.loadLibrary("arttest");
+    }
+
+    private native static boolean hasOat();
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index dc4ec66..04c590e 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -282,6 +282,7 @@
   117-nopatchoat \
   118-noimage-dex2oat \
   119-noimage-patchoat \
+  131-structural-change \
 
 ifneq (,$(filter ndebug,$(RUN_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),ndebug,$(PREBUILD_TYPES), \
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 5c0f83f..2710df8 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -37,7 +37,8 @@
 RELOCATE="y"
 SECONDARY_DEX=""
 TIME_OUT="y"
-TIME_OUT_VALUE=5m
+# Value in minutes.
+TIME_OUT_VALUE=5
 USE_GDB="n"
 USE_JVM="n"
 VERIFY="y"
@@ -377,7 +378,13 @@
 
     if [ "$TIME_OUT" = "y" ]; then
       # Add timeout command if time out is desired.
-      cmdline="timeout $TIME_OUT_VALUE $cmdline"
+      #
+      # Note: We use nested timeouts. The inner timeout sends SIGRTMIN+2 (usually 36) to ART, which
+      #       will induce a full thread dump before abort. However, dumping threads might deadlock,
+      #       so the outer timeout sends the regular SIGTERM after an additional minute to ensure
+      #       termination (without dumping all threads).
+      TIME_PLUS_ONE=$(($TIME_OUT_VALUE + 1))
+      cmdline="timeout ${TIME_PLUS_ONE}m timeout -s SIGRTMIN+2 ${TIME_OUT_VALUE}m $cmdline"
     fi
 
     if [ "$DEV_MODE" = "y" ]; then
diff --git a/tools/checker.py b/tools/checker.py
index 406a311..bca0707 100755
--- a/tools/checker.py
+++ b/tools/checker.py
@@ -80,7 +80,9 @@
 from subprocess import check_call
 
 class Logger(object):
-  SilentMode = False
+
+  class Level(object):
+    NoOutput, Error, Info = range(3)
 
   class Color(object):
     Default, Blue, Gray, Purple, Red = range(5)
@@ -100,13 +102,15 @@
       else:
         return '\033[0m'
 
+  Verbosity = Level.Info
+
   @staticmethod
-  def log(text, color=Color.Default, newLine=True, out=sys.stdout):
-    if not Logger.SilentMode:
+  def log(text, level=Level.Info, color=Color.Default, newLine=True, out=sys.stdout):
+    if level <= Logger.Verbosity:
       text = Logger.Color.terminalCode(color, out) + text + \
              Logger.Color.terminalCode(Logger.Color.Default, out)
       if newLine:
-        print(text, file=out)
+        print(text, flush=True, file=out)
       else:
         print(text, end="", flush=True, file=out)
 
@@ -120,9 +124,9 @@
     if location:
       location += " "
 
-    Logger.log(location, color=Logger.Color.Gray, newLine=False, out=sys.stderr)
-    Logger.log("error: ", color=Logger.Color.Red, newLine=False, out=sys.stderr)
-    Logger.log(msg, out=sys.stderr)
+    Logger.log(location, Logger.Level.Error, color=Logger.Color.Gray, newLine=False, out=sys.stderr)
+    Logger.log("error: ", Logger.Level.Error, color=Logger.Color.Red, newLine=False, out=sys.stderr)
+    Logger.log(msg, Logger.Level.Error, out=sys.stderr)
     sys.exit(1)
 
   @staticmethod
@@ -692,6 +696,8 @@
                       help="print a list of all groups found in the test output")
   parser.add_argument("--dump-group", dest="dump_group", metavar="GROUP",
                       help="print the contents of an output group")
+  parser.add_argument("-q", "--quiet", action="store_true",
+                      help="print only errors")
   return parser.parse_args()
 
 
@@ -713,7 +719,7 @@
   classFolder = tempFolder + "/classes"
   dexFile = tempFolder + "/test.dex"
   oatFile = tempFolder + "/test.oat"
-  outputFile = tempFolder + "/art.cfg"
+  outputFile = tempFolder + "/test.cfg"
   os.makedirs(classFolder)
 
   # Build a DEX from the source file. We pass "--no-optimize" to dx to avoid
@@ -723,7 +729,7 @@
 
   # Run dex2oat and export the HGraph. The output is stored into ${PWD}/art.cfg.
   with cd(tempFolder):
-    check_call(["dex2oat", "-j1", "--dump-passes", "--compiler-backend=Optimizing",
+    check_call(["dex2oat", "-j1", "--dump-cfg=" + outputFile, "--compiler-backend=Optimizing",
                 "--android-root=" + os.environ["ANDROID_HOST_OUT"],
                 "--boot-image=" + os.environ["ANDROID_HOST_OUT"] + "/framework/core-optimizing.art",
                 "--runtime-arg", "-Xnorelocate", "--dex-file=" + dexFile, "--oat-file=" + oatFile])
@@ -762,8 +768,10 @@
 
 if __name__ == "__main__":
   args = ParseArguments()
-  tempFolder = tempfile.mkdtemp()
+  if args.quiet:
+    Logger.Verbosity = Logger.Level.Error
 
+  tempFolder = tempfile.mkdtemp()
   try:
     outputFile = CompileTest(args.test_file, tempFolder)
     if args.list_groups:
diff --git a/tools/checker_test.py b/tools/checker_test.py
index 3c659c2..1466b93 100755
--- a/tools/checker_test.py
+++ b/tools/checker_test.py
@@ -457,5 +457,5 @@
                                                          ("def", CheckVariant.DAG) ])) ])
 
 if __name__ == '__main__':
-  checker.Logger.SilentMode = True
+  checker.Logger.Verbosity = checker.Logger.Level.NoOutput
   unittest.main()