diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 57 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.h | 6 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 5 |
3 files changed, 51 insertions, 17 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 1cc2dcc9b8..1862061bcf 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -49,6 +49,9 @@ static constexpr SRegister kRuntimeParameterFpuRegisters[] = { S0, S1, S2, S3 }; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); +static constexpr DRegister DTMP = D7; +static constexpr SRegister STMP = S14; + class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> { public: InvokeRuntimeCallingConvention() @@ -472,6 +475,11 @@ void CodeGeneratorARM::SetupBlockedRegisters() const { blocked_core_registers_[R10] = true; blocked_core_registers_[R11] = true; + // Don't allocate our temporary double register. + blocked_fpu_registers_[STMP] = true; + blocked_fpu_registers_[STMP + 1] = true; + DCHECK_EQ(FromLowSToD(STMP), DTMP); + blocked_fpu_registers_[S16] = true; blocked_fpu_registers_[S17] = true; blocked_fpu_registers_[S18] = true; @@ -590,9 +598,17 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type gp_index_ += 2; stack_index_ += 2; if (index + 1 < calling_convention.GetNumberOfRegisters()) { - ArmManagedRegister pair = ArmManagedRegister::FromRegisterPair( - calling_convention.GetRegisterPairAt(index)); - return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh()); + if (calling_convention.GetRegisterAt(index) == R1) { + // Skip R1, and use R2_R3 instead. + gp_index_++; + index++; + } + } + if (index + 1 < calling_convention.GetNumberOfRegisters()) { + DCHECK_EQ(calling_convention.GetRegisterAt(index) + 1, + calling_convention.GetRegisterAt(index + 1)); + return Location::RegisterPairLocation(calling_convention.GetRegisterAt(index), + calling_convention.GetRegisterAt(index + 1)); } else { return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index)); } @@ -617,6 +633,9 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) { uint32_t index = double_index_; double_index_ += 2; + DCHECK_EQ(calling_convention.GetFpuRegisterAt(index) + 1, + calling_convention.GetFpuRegisterAt(index + 1)); + DCHECK_EQ(calling_convention.GetFpuRegisterAt(index) & 1, 0); return Location::FpuRegisterPairLocation( calling_convention.GetFpuRegisterAt(index), calling_convention.GetFpuRegisterAt(index + 1)); @@ -3364,9 +3383,9 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) { } else if (source.IsStackSlot() && destination.IsStackSlot()) { Exchange(source.GetStackIndex(), destination.GetStackIndex()); } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { - __ vmovrs(IP, source.AsFpuRegister<SRegister>()); + __ vmovs(STMP, source.AsFpuRegister<SRegister>()); __ vmovs(source.AsFpuRegister<SRegister>(), destination.AsFpuRegister<SRegister>()); - __ vmovsr(destination.AsFpuRegister<SRegister>(), IP); + __ vmovs(destination.AsFpuRegister<SRegister>(), STMP); } else if (source.IsFpuRegister() || destination.IsFpuRegister()) { SRegister reg = source.IsFpuRegister() ? source.AsFpuRegister<SRegister>() : destination.AsFpuRegister<SRegister>(); @@ -3374,11 +3393,33 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) { ? destination.GetStackIndex() : source.GetStackIndex(); - __ vmovrs(IP, reg); + __ vmovs(STMP, reg); __ LoadSFromOffset(reg, SP, mem); - __ StoreToOffset(kStoreWord, IP, SP, mem); + __ StoreSToOffset(STMP, SP, mem); + } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) { + __ vmovd(DTMP, FromLowSToD(source.AsFpuRegisterPairLow<SRegister>())); + __ vmovd(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()), + FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>())); + __ vmovd(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), DTMP); + } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) { + DRegister reg = source.IsFpuRegisterPair() + ? FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()) + : FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()); + int mem = source.IsFpuRegisterPair() + ? destination.GetStackIndex() + : source.GetStackIndex(); + + __ vmovd(DTMP, reg); + __ LoadDFromOffset(reg, SP, mem); + __ StoreDToOffset(DTMP, SP, mem); + } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { + // TODO: We could use DTMP and ask for a pair scratch register (float or core). + // This would save four instructions if two scratch registers are available, and + // two instructions if not. + Exchange(source.GetStackIndex(), destination.GetStackIndex()); + Exchange(source.GetHighStackIndex(kArmWordSize), destination.GetHighStackIndex(kArmWordSize)); } else { - LOG(FATAL) << "Unimplemented"; + LOG(FATAL) << "Unimplemented" << source << " <-> " << destination; } } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index c1b4eda3a4..8b29b159ab 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -33,7 +33,6 @@ class SlowPathCodeARM; static constexpr size_t kArmWordSize = kArmPointerSize; static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 }; -static constexpr RegisterPair kParameterCorePairRegisters[] = { R1_R2, R2_R3 }; static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); static constexpr SRegister kParameterFpuRegisters[] = { S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15 }; @@ -47,11 +46,6 @@ class InvokeDexCallingConvention : public CallingConvention<Register, SRegister> kParameterFpuRegisters, kParameterFpuRegistersLength) {} - RegisterPair GetRegisterPairAt(size_t argument_index) { - DCHECK_LT(argument_index + 1, GetNumberOfRegisters()); - return kParameterCorePairRegisters[argument_index]; - } - private: DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index eaecbb04ae..692d452f54 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -149,12 +149,11 @@ void OptimizingCompiler::Init() { // Enable C1visualizer output. Must be done in Init() because the compiler // driver is not fully initialized when passed to the compiler's constructor. CompilerDriver* driver = GetCompilerDriver(); - const std::string cfg_file_name = driver->GetDumpCfgFileName(); - if (!cfg_file_name.empty()) { + if (driver->GetDumpPasses()) { CHECK_EQ(driver->GetThreadCount(), 1U) << "Graph visualizer requires the compiler to run single-threaded. " << "Invoke the compiler with '-j1'."; - visualizer_output_.reset(new std::ofstream(cfg_file_name)); + visualizer_output_.reset(new std::ofstream("art.cfg")); } } |