Look at instruction set features when generating volatiles code
Change-Id: Ia882405719fdd60b63e4102af7e085f7cbe0bb2a
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 91426f3..4d8154e 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -327,11 +327,13 @@
CodeGenerator* CodeGenerator::Create(ArenaAllocator* allocator,
HGraph* graph,
- InstructionSet instruction_set) {
+ InstructionSet instruction_set,
+ const InstructionSetFeatures& isa_features) {
switch (instruction_set) {
case kArm:
case kThumb2: {
- return new (allocator) arm::CodeGeneratorARM(graph);
+ return new (allocator) arm::CodeGeneratorARM(graph,
+ isa_features.AsArmInstructionSetFeatures());
}
case kArm64: {
return new (allocator) arm64::CodeGeneratorARM64(graph);
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 2e7eca2..4205ebe 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
#include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
#include "base/bit_field.h"
#include "globals.h"
#include "locations.h"
@@ -84,7 +85,8 @@
void CompileOptimized(CodeAllocator* allocator);
static CodeGenerator* Create(ArenaAllocator* allocator,
HGraph* graph,
- InstructionSet instruction_set);
+ InstructionSet instruction_set,
+ const InstructionSetFeatures& isa_features);
HGraph* GetGraph() const { return graph_; }
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 063dc7c..2f49107 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -16,6 +16,7 @@
#include "code_generator_arm.h"
+#include "arch/arm/instruction_set_features_arm.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "mirror/array-inl.h"
@@ -372,13 +373,15 @@
return kArmWordSize;
}
-CodeGeneratorARM::CodeGeneratorARM(HGraph* graph)
+CodeGeneratorARM::CodeGeneratorARM(HGraph* graph,
+ const ArmInstructionSetFeatures* isa_features)
: CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, kNumberOfRegisterPairs),
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetArena(), this),
- assembler_(true) {}
+ assembler_(true),
+ isa_features_(isa_features) {}
size_t CodeGeneratorARM::FrameEntrySpillSize() const {
return kNumberOfPushedRegistersAtEntry * kArmWordSize;
@@ -2615,16 +2618,18 @@
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- bool is_volatile = field_info.IsVolatile();
+
Primitive::Type field_type = field_info.GetFieldType();
bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble;
-
+ bool generate_volatile = field_info.IsVolatile()
+ && is_wide
+ && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd();
// Temporary registers for the write barrier.
// TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
- } else if (is_volatile && is_wide) {
+ } else if (generate_volatile) {
// Arm encoding have some additional constraints for ldrexd/strexd:
// - registers need to be consecutive
// - the first register should be even but not R14.
@@ -2651,6 +2656,7 @@
Location value = locations->InAt(1);
bool is_volatile = field_info.IsVolatile();
+ bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd();
Primitive::Type field_type = field_info.GetFieldType();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
@@ -2684,10 +2690,7 @@
}
case Primitive::kPrimLong: {
- if (is_volatile) {
- // TODO: We could use ldrd and strd that are atomic with Large Physical Address Extension
- // support. This info is stored in the compiler driver (HasAtomicLdrdAndStrd) and we should
- // pass it around to be able to optimize.
+ if (is_volatile && !atomic_ldrd_strd) {
GenerateWideAtomicStore(base, offset,
value.AsRegisterPairLow<Register>(),
value.AsRegisterPairHigh<Register>(),
@@ -2706,7 +2709,7 @@
case Primitive::kPrimDouble: {
DRegister value_reg = FromLowSToD(value.AsFpuRegisterPairLow<SRegister>());
- if (is_volatile) {
+ if (is_volatile && !atomic_ldrd_strd) {
Register value_reg_lo = locations->GetTemp(0).AsRegister<Register>();
Register value_reg_hi = locations->GetTemp(1).AsRegister<Register>();
@@ -2740,7 +2743,10 @@
locations->SetInAt(0, Location::RequiresRegister());
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
- if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimDouble)) {
+ bool generate_volatile = field_info.IsVolatile()
+ && (field_info.GetFieldType() == Primitive::kPrimDouble)
+ && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd();
+ if (generate_volatile) {
// Arm encoding have some additional constraints for ldrexd/strexd:
// - registers need to be consecutive
// - the first register should be even but not R14.
@@ -2760,6 +2766,7 @@
Register base = locations->InAt(0).AsRegister<Register>();
Location out = locations->Out();
bool is_volatile = field_info.IsVolatile();
+ bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd();
Primitive::Type field_type = field_info.GetFieldType();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
@@ -2791,7 +2798,7 @@
}
case Primitive::kPrimLong: {
- if (is_volatile) {
+ if (is_volatile && !atomic_ldrd_strd) {
GenerateWideAtomicLoad(base, offset,
out.AsRegisterPairLow<Register>(),
out.AsRegisterPairHigh<Register>());
@@ -2808,7 +2815,7 @@
case Primitive::kPrimDouble: {
DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>());
- if (is_volatile) {
+ if (is_volatile && !atomic_ldrd_strd) {
Register lo = locations->GetTemp(0).AsRegister<Register>();
Register hi = locations->GetTemp(1).AsRegister<Register>();
GenerateWideAtomicLoad(base, offset, lo, hi);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index b86670d..40f4edc 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -159,7 +159,7 @@
class CodeGeneratorARM : public CodeGenerator {
public:
- explicit CodeGeneratorARM(HGraph* graph);
+ CodeGeneratorARM(HGraph* graph, const ArmInstructionSetFeatures* isa_features);
virtual ~CodeGeneratorARM() {}
void GenerateFrameEntry() OVERRIDE;
@@ -233,6 +233,10 @@
block_labels_.SetSize(GetGraph()->GetBlocks().Size());
}
+ const ArmInstructionSetFeatures* GetInstructionSetFeatures() const {
+ return isa_features_;
+ }
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
@@ -240,6 +244,7 @@
InstructionCodeGeneratorARM instruction_visitor_;
ParallelMoveResolverARM move_resolver_;
Thumb2Assembler assembler_;
+ const ArmInstructionSetFeatures* isa_features_;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
};
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 8b75cc7..4c06e23 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -17,6 +17,7 @@
#include <functional>
#include "arch/instruction_set.h"
+#include "arch/arm/instruction_set_features_arm.h"
#include "base/macros.h"
#include "builder.h"
#include "code_generator_arm.h"
@@ -87,7 +88,7 @@
Run(allocator, codegenX86, has_result, expected);
}
- arm::CodeGeneratorARM codegenARM(graph);
+ arm::CodeGeneratorARM codegenARM(graph, ArmInstructionSetFeatures::FromCppDefines());
codegenARM.CompileBaseline(&allocator, true);
if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
Run(allocator, codegenARM, has_result, expected);
@@ -130,7 +131,7 @@
bool has_result,
Expected expected) {
if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) {
- arm::CodeGeneratorARM codegenARM(graph);
+ arm::CodeGeneratorARM codegenARM(graph, ArmInstructionSetFeatures::FromCppDefines());
RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected);
} else if (kRuntimeISA == kArm64) {
arm64::CodeGeneratorARM64 codegenARM64(graph);
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 87f2b90..1a27724 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -289,7 +289,9 @@
return nullptr;
}
- CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set);
+ CompilerDriver* compiler_driver = GetCompilerDriver();
+ CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set,
+ *compiler_driver->GetInstructionSetFeatures());
if (codegen == nullptr) {
CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler";
compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
@@ -315,7 +317,7 @@
return nullptr;
}
RunOptimizations(
- graph, GetCompilerDriver(), &compilation_stats_, dex_compilation_unit, visualizer);
+ graph, compiler_driver, &compilation_stats_, dex_compilation_unit, visualizer);
PrepareForRegisterAllocation(graph).Run();
SsaLivenessAnalysis liveness(*graph, codegen);
@@ -333,7 +335,7 @@
compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized);
return CompiledMethod::SwapAllocCompiledMethodStackMap(
- GetCompilerDriver(),
+ compiler_driver,
instruction_set,
ArrayRef<const uint8_t>(allocator.GetMemory()),
codegen->GetFrameSize(),
@@ -358,16 +360,15 @@
std::vector<uint8_t> mapping_table;
DefaultSrcMap src_mapping_table;
- codegen->BuildMappingTable(&mapping_table,
- GetCompilerDriver()->GetCompilerOptions().GetIncludeDebugSymbols() ?
- &src_mapping_table : nullptr);
+ bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols();
+ codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr);
std::vector<uint8_t> vmap_table;
codegen->BuildVMapTable(&vmap_table);
std::vector<uint8_t> gc_map;
codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline);
- return CompiledMethod::SwapAllocCompiledMethod(GetCompilerDriver(),
+ return CompiledMethod::SwapAllocCompiledMethod(compiler_driver,
instruction_set,
ArrayRef<const uint8_t>(allocator.GetMemory()),
codegen->GetFrameSize(),