6 files changed, 44 insertions, 26 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 91426f347b..4d8154e6a0 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -327,11 +327,13 @@ bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) con
 
 CodeGenerator* CodeGenerator::Create(ArenaAllocator* allocator,
                                      HGraph* graph,
-                                     InstructionSet instruction_set) {
+                                     InstructionSet instruction_set,
+                                     const InstructionSetFeatures& isa_features) {
   switch (instruction_set) {
     case kArm:
     case kThumb2: {
-      return new (allocator) arm::CodeGeneratorARM(graph);
+      return new (allocator) arm::CodeGeneratorARM(graph,
+          isa_features.AsArmInstructionSetFeatures());
     }
     case kArm64: {
       return new (allocator) arm64::CodeGeneratorARM64(graph);
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 2e7eca2ead..4205ebebf9 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
 
 #include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
 #include "base/bit_field.h"
 #include "globals.h"
 #include "locations.h"
@@ -84,7 +85,8 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> {
   void CompileOptimized(CodeAllocator* allocator);
   static CodeGenerator* Create(ArenaAllocator* allocator,
                                HGraph* graph,
-                               InstructionSet instruction_set);
+                               InstructionSet instruction_set,
+                               const InstructionSetFeatures& isa_features);
 
   HGraph* GetGraph() const { return graph_; }
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 8c107f302e..3b3fb64763 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -16,6 +16,7 @@
 
 #include "code_generator_arm.h"
 
+#include "arch/arm/instruction_set_features_arm.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
 #include "mirror/array-inl.h"
@@ -372,13 +373,15 @@ size_t CodeGeneratorARM::RestoreCoreRegister(size_t stack_index, uint32_t reg_id
   return kArmWordSize;
 }
 
-CodeGeneratorARM::CodeGeneratorARM(HGraph* graph)
+CodeGeneratorARM::CodeGeneratorARM(HGraph* graph,
+                                   const ArmInstructionSetFeatures* isa_features)
     : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, kNumberOfRegisterPairs),
       block_labels_(graph->GetArena(), 0),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
-      assembler_(true) {}
+      assembler_(true),
+      isa_features_(isa_features) {}
 
 size_t CodeGeneratorARM::FrameEntrySpillSize() const {
   return kNumberOfPushedRegistersAtEntry * kArmWordSize;
@@ -2615,16 +2618,18 @@ void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldI
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
 
-  bool is_volatile = field_info.IsVolatile();
+
   Primitive::Type field_type = field_info.GetFieldType();
   bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble;
-
+  bool generate_volatile = field_info.IsVolatile()
+      && is_wide
+      && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd();
   // Temporary registers for the write barrier.
   // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
     locations->AddTemp(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresRegister());
-  } else if (is_volatile && is_wide) {
+  } else if (generate_volatile) {
     // Arm encoding have some additional constraints for ldrexd/strexd:
     // - registers need to be consecutive
     // - the first register should be even but not R14.
@@ -2651,6 +2656,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction,
   Location value = locations->InAt(1);
 
   bool is_volatile = field_info.IsVolatile();
+  bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd();
   Primitive::Type field_type = field_info.GetFieldType();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
@@ -2684,10 +2690,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction,
     }
 
     case Primitive::kPrimLong: {
-      if (is_volatile) {
-        // TODO: We could use ldrd and strd that are atomic with Large Physical Address Extension
-        // support. This info is stored in the compiler driver (HasAtomicLdrdAndStrd) and we should
-        // pass it around to be able to optimize.
+      if (is_volatile && !atomic_ldrd_strd) {
         GenerateWideAtomicStore(base, offset,
                                 value.AsRegisterPairLow<Register>(),
                                 value.AsRegisterPairHigh<Register>(),
@@ -2706,7 +2709,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction,
 
     case Primitive::kPrimDouble: {
       DRegister value_reg = FromLowSToD(value.AsFpuRegisterPairLow<SRegister>());
-      if (is_volatile) {
+      if (is_volatile && !atomic_ldrd_strd) {
         Register value_reg_lo = locations->GetTemp(0).AsRegister<Register>();
         Register value_reg_hi = locations->GetTemp(1).AsRegister<Register>();
 
@@ -2740,7 +2743,10 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 
-  if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimDouble)) {
+  bool generate_volatile = field_info.IsVolatile()
+      && (field_info.GetFieldType() == Primitive::kPrimDouble)
+      && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd();
+  if (generate_volatile) {
     // Arm encoding have some additional constraints for ldrexd/strexd:
     // - registers need to be consecutive
     // - the first register should be even but not R14.
@@ -2760,6 +2766,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
   Register base = locations->InAt(0).AsRegister<Register>();
   Location out = locations->Out();
   bool is_volatile = field_info.IsVolatile();
+  bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd();
   Primitive::Type field_type = field_info.GetFieldType();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
 
@@ -2791,7 +2798,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
     }
 
     case Primitive::kPrimLong: {
-      if (is_volatile) {
+      if (is_volatile && !atomic_ldrd_strd) {
         GenerateWideAtomicLoad(base, offset,
                                out.AsRegisterPairLow<Register>(),
                                out.AsRegisterPairHigh<Register>());
@@ -2808,7 +2815,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
 
     case Primitive::kPrimDouble: {
       DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>());
-      if (is_volatile) {
+      if (is_volatile && !atomic_ldrd_strd) {
         Register lo = locations->GetTemp(0).AsRegister<Register>();
         Register hi = locations->GetTemp(1).AsRegister<Register>();
         GenerateWideAtomicLoad(base, offset, lo, hi);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index b86670d939..40f4edc4eb 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -159,7 +159,7 @@ class InstructionCodeGeneratorARM : public HGraphVisitor {
 
 class CodeGeneratorARM : public CodeGenerator {
  public:
-  explicit CodeGeneratorARM(HGraph* graph);
+  CodeGeneratorARM(HGraph* graph, const ArmInstructionSetFeatures* isa_features);
   virtual ~CodeGeneratorARM() {}
 
   void GenerateFrameEntry() OVERRIDE;
@@ -233,6 +233,10 @@ class CodeGeneratorARM : public CodeGenerator {
     block_labels_.SetSize(GetGraph()->GetBlocks().Size());
   }
 
+  const ArmInstructionSetFeatures* GetInstructionSetFeatures() const {
+    return isa_features_;
+  }
+
  private:
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
@@ -240,6 +244,7 @@ class CodeGeneratorARM : public CodeGenerator {
   InstructionCodeGeneratorARM instruction_visitor_;
   ParallelMoveResolverARM move_resolver_;
   Thumb2Assembler assembler_;
+  const ArmInstructionSetFeatures* isa_features_;
 
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 8b75cc7c65..4c06e23bbc 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -17,6 +17,7 @@
 #include <functional>
 
 #include "arch/instruction_set.h"
+#include "arch/arm/instruction_set_features_arm.h"
 #include "base/macros.h"
 #include "builder.h"
 #include "code_generator_arm.h"
@@ -87,7 +88,7 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) {
     Run(allocator, codegenX86, has_result, expected);
   }
 
-  arm::CodeGeneratorARM codegenARM(graph);
+  arm::CodeGeneratorARM codegenARM(graph, ArmInstructionSetFeatures::FromCppDefines());
   codegenARM.CompileBaseline(&allocator, true);
   if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
     Run(allocator, codegenARM, has_result, expected);
@@ -130,7 +131,7 @@ static void RunCodeOptimized(HGraph* graph,
                              bool has_result,
                              Expected expected) {
   if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
-    arm::CodeGeneratorARM codegenARM(graph);
+    arm::CodeGeneratorARM codegenARM(graph, ArmInstructionSetFeatures::FromCppDefines());
     RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
   } else if (kRuntimeISA == kArm64) {
     arm64::CodeGeneratorARM64 codegenARM64(graph);
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 87f2b90775..1a27724d33 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -289,7 +289,9 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
     return nullptr;
   }
 
-  CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set);
+  CompilerDriver* compiler_driver = GetCompilerDriver();
+  CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set,
+      *compiler_driver->GetInstructionSetFeatures());
   if (codegen == nullptr) {
     CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler";
     compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
@@ -315,7 +317,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
       return nullptr;
     }
     RunOptimizations(
-        graph, GetCompilerDriver(), &compilation_stats_, dex_compilation_unit, visualizer);
+        graph, compiler_driver, &compilation_stats_, dex_compilation_unit, visualizer);
 
     PrepareForRegisterAllocation(graph).Run();
     SsaLivenessAnalysis liveness(*graph, codegen);
@@ -333,7 +335,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
 
     compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized);
     return CompiledMethod::SwapAllocCompiledMethodStackMap(
-        GetCompilerDriver(),
+        compiler_driver,
         instruction_set,
         ArrayRef<const uint8_t>(allocator.GetMemory()),
         codegen->GetFrameSize(),
@@ -358,16 +360,15 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
 
     std::vector<uint8_t> mapping_table;
     DefaultSrcMap src_mapping_table;
-    codegen->BuildMappingTable(&mapping_table,
-            GetCompilerDriver()->GetCompilerOptions().GetIncludeDebugSymbols() ?
-                 &src_mapping_table : nullptr);
+    bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols();
+    codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr);
     std::vector<uint8_t> vmap_table;
     codegen->BuildVMapTable(&vmap_table);
     std::vector<uint8_t> gc_map;
     codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
 
     compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline);
-    return CompiledMethod::SwapAllocCompiledMethod(GetCompilerDriver(),
+    return CompiledMethod::SwapAllocCompiledMethod(compiler_driver,
                                                    instruction_set,
                                                    ArrayRef<const uint8_t>(allocator.GetMemory()),
                                                    codegen->GetFrameSize(),