diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator.cc | 6 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 33 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.h | 7 | ||||
-rw-r--r-- | compiler/optimizing/codegen_test.cc | 5 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 15 |
6 files changed, 44 insertions, 26 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 91426f347b..4d8154e6a0 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -327,11 +327,13 @@ bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) con CodeGenerator* CodeGenerator::Create(ArenaAllocator* allocator, HGraph* graph, - InstructionSet instruction_set) { + InstructionSet instruction_set, + const InstructionSetFeatures& isa_features) { switch (instruction_set) { case kArm: case kThumb2: { - return new (allocator) arm::CodeGeneratorARM(graph); + return new (allocator) arm::CodeGeneratorARM(graph, + isa_features.AsArmInstructionSetFeatures()); } case kArm64: { return new (allocator) arm64::CodeGeneratorARM64(graph); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 2e7eca2ead..4205ebebf9 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ #include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" #include "base/bit_field.h" #include "globals.h" #include "locations.h" @@ -84,7 +85,8 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> { void CompileOptimized(CodeAllocator* allocator); static CodeGenerator* Create(ArenaAllocator* allocator, HGraph* graph, - InstructionSet instruction_set); + InstructionSet instruction_set, + const InstructionSetFeatures& isa_features); HGraph* GetGraph() const { return graph_; } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 8c107f302e..3b3fb64763 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -16,6 +16,7 @@ #include "code_generator_arm.h" +#include "arch/arm/instruction_set_features_arm.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "mirror/array-inl.h" @@ -372,13 +373,15 @@ size_t CodeGeneratorARM::RestoreCoreRegister(size_t stack_index, uint32_t reg_id return kArmWordSize; } -CodeGeneratorARM::CodeGeneratorARM(HGraph* graph) +CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, + const ArmInstructionSetFeatures* isa_features) : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, kNumberOfRegisterPairs), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - assembler_(true) {} + assembler_(true), + isa_features_(isa_features) {} size_t CodeGeneratorARM::FrameEntrySpillSize() const { return kNumberOfPushedRegistersAtEntry * kArmWordSize; @@ -2615,16 +2618,18 @@ void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldI locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble; - + bool generate_volatile = field_info.IsVolatile() + && is_wide + && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); // Temporary registers for the write barrier. // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark. if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - } else if (is_volatile && is_wide) { + } else if (generate_volatile) { // Arm encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive // - the first register should be even but not R14. @@ -2651,6 +2656,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, Location value = locations->InAt(1); bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); Primitive::Type field_type = field_info.GetFieldType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); @@ -2684,10 +2690,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, } case Primitive::kPrimLong: { - if (is_volatile) { - // TODO: We could use ldrd and strd that are atomic with Large Physical Address Extension - // support. This info is stored in the compiler driver (HasAtomicLdrdAndStrd) and we should - // pass it around to be able to optimize. + if (is_volatile && !atomic_ldrd_strd) { GenerateWideAtomicStore(base, offset, value.AsRegisterPairLow<Register>(), value.AsRegisterPairHigh<Register>(), @@ -2706,7 +2709,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimDouble: { DRegister value_reg = FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()); - if (is_volatile) { + if (is_volatile && !atomic_ldrd_strd) { Register value_reg_lo = locations->GetTemp(0).AsRegister<Register>(); Register value_reg_hi = locations->GetTemp(1).AsRegister<Register>(); @@ -2740,7 +2743,10 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimDouble)) { + bool generate_volatile = field_info.IsVolatile() + && (field_info.GetFieldType() == Primitive::kPrimDouble) + && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); + if (generate_volatile) { // Arm encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive // - the first register should be even but not R14. @@ -2760,6 +2766,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, Register base = locations->InAt(0).AsRegister<Register>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); Primitive::Type field_type = field_info.GetFieldType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); @@ -2791,7 +2798,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, } case Primitive::kPrimLong: { - if (is_volatile) { + if (is_volatile && !atomic_ldrd_strd) { GenerateWideAtomicLoad(base, offset, out.AsRegisterPairLow<Register>(), out.AsRegisterPairHigh<Register>()); @@ -2808,7 +2815,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, case Primitive::kPrimDouble: { DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()); - if (is_volatile) { + if (is_volatile && !atomic_ldrd_strd) { Register lo = locations->GetTemp(0).AsRegister<Register>(); Register hi = locations->GetTemp(1).AsRegister<Register>(); GenerateWideAtomicLoad(base, offset, lo, hi); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index b86670d939..40f4edc4eb 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -159,7 +159,7 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { class CodeGeneratorARM : public CodeGenerator { public: - explicit CodeGeneratorARM(HGraph* graph); + CodeGeneratorARM(HGraph* graph, const ArmInstructionSetFeatures* isa_features); virtual ~CodeGeneratorARM() {} void GenerateFrameEntry() OVERRIDE; @@ -233,6 +233,10 @@ class CodeGeneratorARM : public CodeGenerator { block_labels_.SetSize(GetGraph()->GetBlocks().Size()); } + const ArmInstructionSetFeatures* GetInstructionSetFeatures() const { + return isa_features_; + } + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; @@ -240,6 +244,7 @@ class CodeGeneratorARM : public CodeGenerator { InstructionCodeGeneratorARM instruction_visitor_; ParallelMoveResolverARM move_resolver_; Thumb2Assembler assembler_; + const ArmInstructionSetFeatures* isa_features_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM); }; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 8b75cc7c65..4c06e23bbc 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -17,6 +17,7 @@ #include <functional> #include "arch/instruction_set.h" +#include "arch/arm/instruction_set_features_arm.h" #include "base/macros.h" #include "builder.h" #include "code_generator_arm.h" @@ -87,7 +88,7 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { Run(allocator, codegenX86, has_result, expected); } - arm::CodeGeneratorARM codegenARM(graph); + arm::CodeGeneratorARM codegenARM(graph, ArmInstructionSetFeatures::FromCppDefines()); codegenARM.CompileBaseline(&allocator, true); if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { Run(allocator, codegenARM, has_result, expected); @@ -130,7 +131,7 @@ static void RunCodeOptimized(HGraph* graph, bool has_result, Expected expected) { if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { - arm::CodeGeneratorARM codegenARM(graph); + arm::CodeGeneratorARM codegenARM(graph, ArmInstructionSetFeatures::FromCppDefines()); RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kArm64) { arm64::CodeGeneratorARM64 codegenARM64(graph); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 87f2b90775..1a27724d33 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -289,7 +289,9 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, return nullptr; } - CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set); + CompilerDriver* compiler_driver = GetCompilerDriver(); + CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set, + *compiler_driver->GetInstructionSetFeatures()); if (codegen == nullptr) { CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler"; compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen); @@ -315,7 +317,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, return nullptr; } RunOptimizations( - graph, GetCompilerDriver(), &compilation_stats_, dex_compilation_unit, visualizer); + graph, compiler_driver, &compilation_stats_, dex_compilation_unit, visualizer); PrepareForRegisterAllocation(graph).Run(); SsaLivenessAnalysis liveness(*graph, codegen); @@ -333,7 +335,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized); return CompiledMethod::SwapAllocCompiledMethodStackMap( - GetCompilerDriver(), + compiler_driver, instruction_set, ArrayRef<const uint8_t>(allocator.GetMemory()), codegen->GetFrameSize(), @@ -358,16 +360,15 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, std::vector<uint8_t> mapping_table; DefaultSrcMap src_mapping_table; - codegen->BuildMappingTable(&mapping_table, - GetCompilerDriver()->GetCompilerOptions().GetIncludeDebugSymbols() ? - &src_mapping_table : nullptr); + bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols(); + codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr); std::vector<uint8_t> vmap_table; codegen->BuildVMapTable(&vmap_table); std::vector<uint8_t> gc_map; codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline); - return CompiledMethod::SwapAllocCompiledMethod(GetCompilerDriver(), + return CompiledMethod::SwapAllocCompiledMethod(compiler_driver, instruction_set, ArrayRef<const uint8_t>(allocator.GetMemory()), codegen->GetFrameSize(), |