diff options
-rw-r--r-- | compiler/optimizing/code_generator_riscv64.cc | 118 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_riscv64.h | 10 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 10 |
3 files changed, 100 insertions, 38 deletions
diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc index e8496f5536..b4f0caf607 100644 --- a/compiler/optimizing/code_generator_riscv64.cc +++ b/compiler/optimizing/code_generator_riscv64.cc @@ -46,6 +46,13 @@ namespace riscv64 { // We switch to the table-based method starting with 6 entries. static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 6; +// FCLASS returns a 10-bit classification mask with the two highest bits marking NaNs +// (signaling and quiet). To detect a NaN, we can compare (either BGE or BGEU, the sign +// bit is always clear) the result with the `kFClassNaNMinValue`. +static_assert(kSignalingNaN == 0x100); +static_assert(kQuietNaN == 0x200); +static constexpr int32_t kFClassNaNMinValue = 0x100; + static constexpr XRegister kCoreCalleeSaves[] = { // S1(TR) is excluded as the ART thread register. S0, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, RA @@ -465,6 +472,11 @@ inline void InstructionCodeGeneratorRISCV64::FDiv( FpBinOp<FRegister, &Riscv64Assembler::FDivS, &Riscv64Assembler::FDivD>(rd, rs1, rs2, type); } +inline void InstructionCodeGeneratorRISCV64::FMul( + FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { + FpBinOp<FRegister, &Riscv64Assembler::FMulS, &Riscv64Assembler::FMulD>(rd, rs1, rs2, type); +} + inline void InstructionCodeGeneratorRISCV64::FMin( FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) { FpBinOp<FRegister, &Riscv64Assembler::FMinS, &Riscv64Assembler::FMinD>(rd, rs1, rs2, type); @@ -490,6 +502,40 @@ inline void InstructionCodeGeneratorRISCV64::FLe( FpBinOp<XRegister, &Riscv64Assembler::FLeS, &Riscv64Assembler::FLeD>(rd, rs1, rs2, type); } +template <typename Reg, + void (Riscv64Assembler::*opS)(Reg, FRegister), + void (Riscv64Assembler::*opD)(Reg, FRegister)> +inline void InstructionCodeGeneratorRISCV64::FpUnOp( + Reg rd, FRegister rs1, DataType::Type type) { + Riscv64Assembler* assembler = down_cast<CodeGeneratorRISCV64*>(codegen_)->GetAssembler(); + if (type == DataType::Type::kFloat32) { + (assembler->*opS)(rd, rs1); + } else { + DCHECK_EQ(type, DataType::Type::kFloat64); + (assembler->*opD)(rd, rs1); + } +} + +inline void InstructionCodeGeneratorRISCV64::FAbs( + FRegister rd, FRegister rs1, DataType::Type type) { + FpUnOp<FRegister, &Riscv64Assembler::FAbsS, &Riscv64Assembler::FAbsD>(rd, rs1, type); +} + +inline void InstructionCodeGeneratorRISCV64::FNeg( + FRegister rd, FRegister rs1, DataType::Type type) { + FpUnOp<FRegister, &Riscv64Assembler::FNegS, &Riscv64Assembler::FNegD>(rd, rs1, type); +} + +inline void InstructionCodeGeneratorRISCV64::FMv( + FRegister rd, FRegister rs1, DataType::Type type) { + FpUnOp<FRegister, &Riscv64Assembler::FMvS, &Riscv64Assembler::FMvD>(rd, rs1, type); +} + +inline void InstructionCodeGeneratorRISCV64::FClass( + XRegister rd, FRegister rs1, DataType::Type type) { + FpUnOp<XRegister, &Riscv64Assembler::FClassS, &Riscv64Assembler::FClassD>(rd, rs1, type); +} + Riscv64Assembler* ParallelMoveResolverRISCV64::GetAssembler() const { return codegen_->GetAssembler(); } @@ -1299,7 +1345,11 @@ void LocationsBuilderRISCV64::HandleBinaryOp(HBinaryOperation* instruction) { case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + if (instruction->IsMin() || instruction->IsMax()) { + locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); + } else { + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + } break; default: @@ -1380,11 +1430,32 @@ void InstructionCodeGeneratorRISCV64::HandleBinaryOp(HBinaryOperation* instructi FAdd(rd, rs1, rs2, type); } else if (instruction->IsSub()) { FSub(rd, rs1, rs2, type); - } else if (instruction->IsMin()) { - FMin(rd, rs1, rs2, type); } else { - DCHECK(instruction->IsMax()); - FMax(rd, rs1, rs2, type); + DCHECK(instruction->IsMin() || instruction->IsMax()); + // If one of the operands is NaN and the other is not, riscv64 instructions FMIN/FMAX + // return the other operand while we want to return the NaN operand. + DCHECK_NE(rd, rs1); // Requested `Location::kOutputOverlap`. + DCHECK_NE(rd, rs2); // Requested `Location::kOutputOverlap`. + ScratchRegisterScope srs(GetAssembler()); + XRegister tmp = srs.AllocateXRegister(); + XRegister tmp2 = srs.AllocateXRegister(); + Riscv64Label done; + // Return `rs1` if it's NaN. + FClass(tmp, rs1, type); + __ Li(tmp2, kFClassNaNMinValue); + FMv(rd, rs1, type); + __ Bgeu(tmp, tmp2, &done); + // Return `rs2` if it's NaN. + FClass(tmp, rs2, type); + FMv(rd, rs2, type); + __ Bgeu(tmp, tmp2, &done); + // Calculate Min/Max for non-NaN arguments. + if (instruction->IsMin()) { + FMin(rd, rs1, rs2, type); + } else { + FMax(rd, rs1, rs2, type); + } + __ Bind(&done); } break; } @@ -1657,18 +1728,12 @@ void InstructionCodeGeneratorRISCV64::VisitAbs(HAbs* abs) { __ Sub(out, out, tmp); break; } - case DataType::Type::kFloat32: { - FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - __ FAbsS(out, in); - break; - } - case DataType::Type::kFloat64: { - FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - __ FAbsD(out, in); + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + FAbs(locations->Out().AsFpuRegister<FRegister>(), + locations->InAt(0).AsFpuRegister<FRegister>(), + abs->GetResultType()); break; - } default: LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); } @@ -2586,15 +2651,11 @@ void InstructionCodeGeneratorRISCV64::VisitMul(HMul* instruction) { break; case DataType::Type::kFloat32: - __ FMulS(locations->Out().AsFpuRegister<FRegister>(), - locations->InAt(0).AsFpuRegister<FRegister>(), - locations->InAt(1).AsFpuRegister<FRegister>()); - break; - case DataType::Type::kFloat64: - __ FMulD(locations->Out().AsFpuRegister<FRegister>(), - locations->InAt(0).AsFpuRegister<FRegister>(), - locations->InAt(1).AsFpuRegister<FRegister>()); + FMul(locations->Out().AsFpuRegister<FRegister>(), + locations->InAt(0).AsFpuRegister<FRegister>(), + locations->InAt(1).AsFpuRegister<FRegister>(), + instruction->GetResultType()); break; default: @@ -2635,13 +2696,10 @@ void InstructionCodeGeneratorRISCV64::VisitNeg(HNeg* instruction) { break; case DataType::Type::kFloat32: - __ FNegS(locations->Out().AsFpuRegister<FRegister>(), - locations->InAt(0).AsFpuRegister<FRegister>()); - break; - case DataType::Type::kFloat64: - __ FNegD(locations->Out().AsFpuRegister<FRegister>(), - locations->InAt(0).AsFpuRegister<FRegister>()); + FNeg(locations->Out().AsFpuRegister<FRegister>(), + locations->InAt(0).AsFpuRegister<FRegister>(), + instruction->GetResultType()); break; default: diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h index d408e754b5..8bda4a0ce4 100644 --- a/compiler/optimizing/code_generator_riscv64.h +++ b/compiler/optimizing/code_generator_riscv64.h @@ -474,12 +474,22 @@ class InstructionCodeGeneratorRISCV64 : public InstructionCodeGenerator { void FAdd(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); void FSub(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); void FDiv(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + void FMul(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); void FMin(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); void FMax(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); void FEq(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); void FLt(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); void FLe(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type); + template <typename Reg, + void (Riscv64Assembler::*opS)(Reg, FRegister), + void (Riscv64Assembler::*opD)(Reg, FRegister)> + void FpUnOp(Reg rd, FRegister rs1, DataType::Type type); + void FAbs(FRegister rd, FRegister rs1, DataType::Type type); + void FNeg(FRegister rd, FRegister rs1, DataType::Type type); + void FMv(FRegister rd, FRegister rs1, DataType::Type type); + void FClass(XRegister rd, FRegister rs1, DataType::Type type); + Riscv64Assembler* const assembler_; CodeGeneratorRISCV64* const codegen_; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 911a429d0b..0829bc2912 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -777,6 +777,8 @@ static bool CanAssembleGraphForRiscv64(HGraph* graph) { case HInstruction::kMul: case HInstruction::kNeg: case HInstruction::kNot: + case HInstruction::kMin: + case HInstruction::kMax: case HInstruction::kInvokeVirtual: case HInstruction::kInvokeInterface: case HInstruction::kCurrentMethod: @@ -792,14 +794,6 @@ static bool CanAssembleGraphForRiscv64(HGraph* graph) { return false; } break; - case HInstruction::kMin: - case HInstruction::kMax: - if (DataType::IsFloatingPointType(it.Current()->GetType())) { - // FIXME(riscv64): If one of the operands is NaN and the other is not, riscv64 - // FMIN/FMAX yield the non-NaN operand but we want the result to be the NaN operand. - return false; - } - break; default: // Unimplemented instruction. return false; |