diff options
| author | 2015-01-05 18:30:49 +0000 | |
|---|---|---|
| committer | 2015-01-05 18:30:50 +0000 | |
| commit | b994c437ae8ec6b9d85ceb1fb11e4c97eace32ef (patch) | |
| tree | 8bce70d3d44dcc9384d72e4edc1505e4d1a6ea07 /compiler | |
| parent | 8558375377946aabbbda6ab584e13f754590bd89 (diff) | |
| parent | 02d81cc8d162a31f0664249535456775e397b608 (diff) | |
Merge "Opt Compiler: ARM64: Add support for rem-float, rem-double and volatile."
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/optimizing/builder.cc | 16 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 259 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 4 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_compiler_stats.h | 2 |
4 files changed, 233 insertions, 48 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index dde0dfe394..f6ca6c740e 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -671,13 +671,6 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction, return false; } -#if defined(__aarch64__) - if (resolved_field->IsVolatile()) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile); - return false; - } -#endif - Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType(); HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot); @@ -706,8 +699,6 @@ bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction, return true; } - - bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put) { @@ -728,13 +719,6 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, return false; } -#if defined(__aarch64__) - if (resolved_field->IsVolatile()) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledVolatile); - return false; - } -#endif - Handle<mirror::Class> referrer_class(hs.NewHandle(compiler_driver_->ResolveCompilingMethodsClass( soa, dex_cache, class_loader, outer_compilation_unit_))); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index c7517d3abc..e10b47cde8 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -38,6 +38,9 @@ namespace art { namespace arm64 { +// TODO: Tune the use of Load-Acquire, Store-Release vs Data Memory Barriers. +// For now we prefer the use of load-acquire, store-release over explicit memory barriers. +static constexpr bool kUseAcquireRelease = true; static constexpr bool kExplicitStackOverflowCheck = false; static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>); static constexpr int kCurrentMethodStackOffset = 0; @@ -233,8 +236,9 @@ Location ARM64ReturnLocation(Primitive::Type return_type) { static const Register kRuntimeParameterCoreRegisters[] = { x0, x1, x2, x3, x4, x5, x6, x7 }; static constexpr size_t kRuntimeParameterCoreRegistersLength = arraysize(kRuntimeParameterCoreRegisters); -static const FPRegister kRuntimeParameterFpuRegisters[] = { }; -static constexpr size_t kRuntimeParameterFpuRegistersLength = 0; +static const FPRegister kRuntimeParameterFpuRegisters[] = { d0, d1, d2, d3, d4, d5, d6, d7 }; +static constexpr size_t kRuntimeParameterFpuRegistersLength = + arraysize(kRuntimeParameterCoreRegisters); class InvokeRuntimeCallingConvention : public CallingConvention<Register, FPRegister> { public: @@ -949,8 +953,8 @@ void CodeGeneratorARM64::SwapLocations(Location loc1, Location loc2) { } void CodeGeneratorARM64::Load(Primitive::Type type, - vixl::CPURegister dst, - const vixl::MemOperand& src) { + CPURegister dst, + const MemOperand& src) { switch (type) { case Primitive::kPrimBoolean: __ Ldrb(Register(dst), src); @@ -969,7 +973,7 @@ void CodeGeneratorARM64::Load(Primitive::Type type, case Primitive::kPrimLong: case Primitive::kPrimFloat: case Primitive::kPrimDouble: - DCHECK(dst.Is64Bits() == Is64BitType(type)); + DCHECK_EQ(dst.Is64Bits(), Is64BitType(type)); __ Ldr(dst, src); break; case Primitive::kPrimVoid: @@ -977,26 +981,118 @@ void CodeGeneratorARM64::Load(Primitive::Type type, } } +void CodeGeneratorARM64::LoadAcquire(Primitive::Type type, + CPURegister dst, + const MemOperand& src) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp_base = temps.AcquireX(); + + DCHECK(!src.IsRegisterOffset()); + DCHECK(!src.IsPreIndex()); + DCHECK(!src.IsPostIndex()); + + // TODO(vixl): Let the MacroAssembler handle MemOperand. + __ Add(temp_base, src.base(), src.offset()); + MemOperand base = MemOperand(temp_base); + switch (type) { + case Primitive::kPrimBoolean: + __ Ldarb(Register(dst), base); + break; + case Primitive::kPrimByte: + __ Ldarb(Register(dst), base); + __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); + break; + case Primitive::kPrimChar: + __ Ldarh(Register(dst), base); + break; + case Primitive::kPrimShort: + __ Ldarh(Register(dst), base); + __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); + break; + case Primitive::kPrimInt: + case Primitive::kPrimNot: + case Primitive::kPrimLong: + DCHECK_EQ(dst.Is64Bits(), Is64BitType(type)); + __ Ldar(Register(dst), base); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + DCHECK(dst.IsFPRegister()); + DCHECK_EQ(dst.Is64Bits(), Is64BitType(type)); + + Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); + __ Ldar(temp, base); + __ Fmov(FPRegister(dst), temp); + break; + } + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << type; + } +} + void CodeGeneratorARM64::Store(Primitive::Type type, - vixl::CPURegister rt, - const vixl::MemOperand& dst) { + CPURegister src, + const MemOperand& dst) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - __ Strb(Register(rt), dst); + __ Strb(Register(src), dst); break; case Primitive::kPrimChar: case Primitive::kPrimShort: - __ Strh(Register(rt), dst); + __ Strh(Register(src), dst); break; case Primitive::kPrimInt: case Primitive::kPrimNot: case Primitive::kPrimLong: case Primitive::kPrimFloat: case Primitive::kPrimDouble: - DCHECK(rt.Is64Bits() == Is64BitType(type)); - __ Str(rt, dst); + DCHECK_EQ(src.Is64Bits(), Is64BitType(type)); + __ Str(src, dst); + break; + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << type; + } +} + +void CodeGeneratorARM64::StoreRelease(Primitive::Type type, + CPURegister src, + const MemOperand& dst) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp_base = temps.AcquireX(); + + DCHECK(!dst.IsRegisterOffset()); + DCHECK(!dst.IsPreIndex()); + DCHECK(!dst.IsPostIndex()); + + // TODO(vixl): Let the MacroAssembler handle this. + __ Add(temp_base, dst.base(), dst.offset()); + MemOperand base = MemOperand(temp_base); + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + __ Stlrb(Register(src), base); break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + __ Stlrh(Register(src), base); + break; + case Primitive::kPrimInt: + case Primitive::kPrimNot: + case Primitive::kPrimLong: + DCHECK_EQ(src.Is64Bits(), Is64BitType(type)); + __ Stlr(Register(src), base); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + DCHECK(src.IsFPRegister()); + DCHECK_EQ(src.Is64Bits(), Is64BitType(type)); + + Register temp = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); + __ Fmov(temp, FPRegister(src)); + __ Stlr(temp, base); + break; + } case Primitive::kPrimVoid: LOG(FATAL) << "Unreachable type " << type; } @@ -1026,14 +1122,47 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod vixl::Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireW(); - __ Ldr(temp, HeapOperand(class_reg, mirror::Class::StatusOffset())); - __ Cmp(temp, mirror::Class::kStatusInitialized); - __ B(lt, slow_path->GetEntryLabel()); + size_t status_offset = mirror::Class::StatusOffset().SizeValue(); + // Even if the initialized flag is set, we need to ensure consistent memory ordering. - __ Dmb(InnerShareable, BarrierReads); + if (kUseAcquireRelease) { + // TODO(vixl): Let the MacroAssembler handle MemOperand. + __ Add(temp, class_reg, status_offset); + __ Ldar(temp, HeapOperand(temp)); + __ Cmp(temp, mirror::Class::kStatusInitialized); + __ B(lt, slow_path->GetEntryLabel()); + } else { + __ Ldr(temp, HeapOperand(class_reg, status_offset)); + __ Cmp(temp, mirror::Class::kStatusInitialized); + __ B(lt, slow_path->GetEntryLabel()); + __ Dmb(InnerShareable, BarrierReads); + } __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { + BarrierType type = BarrierAll; + + switch (kind) { + case MemBarrierKind::kAnyAny: + case MemBarrierKind::kAnyStore: { + type = BarrierAll; + break; + } + case MemBarrierKind::kLoadAny: { + type = BarrierReads; + break; + } + case MemBarrierKind::kStoreStore: { + type = BarrierWrites; + break; + } + default: + LOG(FATAL) << "Unexpected memory barrier " << kind; + } + __ Dmb(InnerShareable, type); +} + void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathARM64* slow_path = @@ -1660,28 +1789,54 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { } void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset()); - codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + + if (instruction->IsVolatile()) { + if (kUseAcquireRelease) { + codegen_->LoadAcquire(instruction->GetType(), OutputCPURegister(instruction), field); + } else { + codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + // For IRIW sequential consistency kLoadAny is not sufficient. + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + } } void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); } void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - Primitive::Type field_type = instruction->GetFieldType(); - CPURegister value = InputCPURegisterAt(instruction, 1); Register obj = InputRegisterAt(instruction, 0); - codegen_->Store(field_type, value, HeapOperand(obj, instruction->GetFieldOffset())); - if (field_type == Primitive::kPrimNot) { + CPURegister value = InputCPURegisterAt(instruction, 1); + Offset offset = instruction->GetFieldOffset(); + Primitive::Type field_type = instruction->GetFieldType(); + + if (instruction->IsVolatile()) { + if (kUseAcquireRelease) { + codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset)); + } else { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->Store(field_type, value, HeapOperand(obj, offset)); + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + codegen_->Store(field_type, value, HeapOperand(obj, offset)); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { codegen_->MarkGCCard(obj, Register(value)); } } @@ -2175,9 +2330,12 @@ void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction) { } void LocationsBuilderARM64::VisitRem(HRem* rem) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); - switch (rem->GetResultType()) { + Primitive::Type type = rem->GetResultType(); + LocationSummary::CallKind call_kind = IsFPType(type) ? LocationSummary::kCall + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + + switch (type) { case Primitive::kPrimInt: case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); @@ -2185,13 +2343,24 @@ void LocationsBuilderARM64::VisitRem(HRem* rem) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1))); + locations->SetOut(calling_convention.GetReturnLocation(type)); + + break; + } + default: - LOG(FATAL) << "Unexpected rem type " << rem->GetResultType(); + LOG(FATAL) << "Unexpected rem type " << type; } } void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); + switch (type) { case Primitive::kPrimInt: case Primitive::kPrimLong: { @@ -2206,6 +2375,14 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { break; } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: { + int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) + : QUICK_ENTRY_POINT(pFmod); + codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc()); + break; + } + default: LOG(FATAL) << "Unexpected rem type " << type; } @@ -2294,7 +2471,18 @@ void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset()); - codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + + if (instruction->IsVolatile()) { + if (kUseAcquireRelease) { + codegen_->LoadAcquire(instruction->GetType(), OutputCPURegister(instruction), field); + } else { + codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + // For IRIW sequential consistency kLoadAny is not sufficient. + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field); + } } void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { @@ -2305,13 +2493,24 @@ void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { } void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - CPURegister value = InputCPURegisterAt(instruction, 1); Register cls = InputRegisterAt(instruction, 0); + CPURegister value = InputCPURegisterAt(instruction, 1); Offset offset = instruction->GetFieldOffset(); Primitive::Type field_type = instruction->GetFieldType(); - codegen_->Store(field_type, value, HeapOperand(cls, offset)); - if (field_type == Primitive::kPrimNot) { + if (instruction->IsVolatile()) { + if (kUseAcquireRelease) { + codegen_->StoreRelease(field_type, value, HeapOperand(cls, offset)); + } else { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->Store(field_type, value, HeapOperand(cls, offset)); + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + codegen_->Store(field_type, value, HeapOperand(cls, offset)); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { codegen_->MarkGCCard(cls, Register(value)); } } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 1d5bfb734e..19488a4ba2 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_ #include "code_generator.h" +#include "dex/compiler_enums.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/arm64/assembler_arm64.h" @@ -108,6 +109,7 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { private: void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg); + void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* instr); void HandleShift(HBinaryOperation* instr); @@ -257,6 +259,8 @@ class CodeGeneratorARM64 : public CodeGenerator { void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); void LoadCurrentMethod(vixl::Register current_method); + void LoadAcquire(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); + void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); // Generate code to invoke a runtime entry point. void InvokeRuntime(int32_t offset, HInstruction* instruction, uint32_t dex_pc); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 7993b19850..cc2723df99 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -38,7 +38,6 @@ enum MethodCompilationStat { kNotCompiledUnresolvedMethod, kNotCompiledUnresolvedField, kNotCompiledNonSequentialRegPair, - kNotCompiledVolatile, kNotOptimizedTryCatch, kNotOptimizedDisabled, kNotCompiledCantAccesType, @@ -92,7 +91,6 @@ class OptimizingCompilerStats { case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod"; case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField"; case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair"; - case kNotCompiledVolatile : return "kNotCompiledVolatile"; case kNotOptimizedDisabled : return "kNotOptimizedDisabled"; case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch"; case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType"; |