MIPS64: Fuse long and FP compare & condition in Optimizing.

Bug: 25559148

Change-Id: I2d14ac75460a76848c71c08cffff6d7a18f5f580
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 107d5bb..cfd8421 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -616,6 +616,14 @@
   EmitI21(0x3E, rs, imm21);
 }
 
+void Mips64Assembler::Bc1eqz(FpuRegister ft, uint16_t imm16) {
+  EmitFI(0x11, 0x9, ft, imm16);
+}
+
+void Mips64Assembler::Bc1nez(FpuRegister ft, uint16_t imm16) {
+  EmitFI(0x11, 0xD, ft, imm16);
+}
+
 void Mips64Assembler::EmitBcondc(BranchCondition cond,
                                  GpuRegister rs,
                                  GpuRegister rt,
@@ -669,6 +677,14 @@
     case kCondGEU:
       Bgeuc(rs, rt, imm16_21);
       break;
+    case kCondF:
+      CHECK_EQ(rt, ZERO);
+      Bc1eqz(static_cast<FpuRegister>(rs), imm16_21);
+      break;
+    case kCondT:
+      CHECK_EQ(rt, ZERO);
+      Bc1nez(static_cast<FpuRegister>(rs), imm16_21);
+      break;
     case kUncond:
       LOG(FATAL) << "Unexpected branch condition " << cond;
       UNREACHABLE();
@@ -827,6 +843,86 @@
   EmitFR(0x11, 0x11, ft, fs, fd, 0x1e);
 }
 
+void Mips64Assembler::CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x01);
+}
+
+void Mips64Assembler::CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x02);
+}
+
+void Mips64Assembler::CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x03);
+}
+
+void Mips64Assembler::CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x04);
+}
+
+void Mips64Assembler::CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x05);
+}
+
+void Mips64Assembler::CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x06);
+}
+
+void Mips64Assembler::CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x07);
+}
+
+void Mips64Assembler::CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x11);
+}
+
+void Mips64Assembler::CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x12);
+}
+
+void Mips64Assembler::CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x14, ft, fs, fd, 0x13);
+}
+
+void Mips64Assembler::CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x01);
+}
+
+void Mips64Assembler::CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x02);
+}
+
+void Mips64Assembler::CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x03);
+}
+
+void Mips64Assembler::CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x04);
+}
+
+void Mips64Assembler::CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x05);
+}
+
+void Mips64Assembler::CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x06);
+}
+
+void Mips64Assembler::CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x07);
+}
+
+void Mips64Assembler::CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x11);
+}
+
+void Mips64Assembler::CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x12);
+}
+
+void Mips64Assembler::CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+  EmitFR(0x11, 0x15, ft, fs, fd, 0x13);
+}
+
 void Mips64Assembler::Cvtsw(FpuRegister fd, FpuRegister fs) {
   EmitFR(0x11, 0x14, static_cast<FpuRegister>(0), fs, fd, 0x20);
 }
@@ -1134,6 +1230,10 @@
       CHECK_NE(lhs_reg, ZERO);
       CHECK_EQ(rhs_reg, ZERO);
       break;
+    case kCondF:
+    case kCondT:
+      CHECK_EQ(rhs_reg, ZERO);
+      break;
     case kUncond:
       UNREACHABLE();
   }
@@ -1188,6 +1288,10 @@
       return kCondGEU;
     case kCondGEU:
       return kCondLTU;
+    case kCondF:
+      return kCondT;
+    case kCondT:
+      return kCondF;
     case kUncond:
       LOG(FATAL) << "Unexpected branch condition " << cond;
   }
@@ -1567,7 +1671,7 @@
     case Branch::kCondBranch:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
       EmitBcondc(condition, lhs, rhs, offset);
-      Nop();  // TODO: improve by filling the forbidden slot.
+      Nop();  // TODO: improve by filling the forbidden/delay slot.
       break;
     case Branch::kCall:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
@@ -1657,6 +1761,14 @@
   Bcond(label, kCondNEZ, rs);
 }
 
+void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label) {
+  Bcond(label, kCondF, static_cast<GpuRegister>(ft), ZERO);
+}
+
+void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) {
+  Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO);
+}
+
 void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
                                      int32_t offset) {
   if (!IsInt<16>(offset)) {
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 57fc19a..883f013 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -227,6 +227,8 @@
   void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
   void Beqzc(GpuRegister rs, uint32_t imm21);
   void Bnezc(GpuRegister rs, uint32_t imm21);
+  void Bc1eqz(FpuRegister ft, uint16_t imm16);
+  void Bc1nez(FpuRegister ft, uint16_t imm16);
 
   void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
@@ -266,6 +268,26 @@
   void MinD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void MaxS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
   void MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+  void CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
 
   void Cvtsw(FpuRegister fd, FpuRegister fs);
   void Cvtdw(FpuRegister fd, FpuRegister fs);
@@ -317,6 +339,8 @@
   void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
   void Beqzc(GpuRegister rs, Mips64Label* label);
   void Bnezc(GpuRegister rs, Mips64Label* label);
+  void Bc1eqz(FpuRegister ft, Mips64Label* label);
+  void Bc1nez(FpuRegister ft, Mips64Label* label);
 
   void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
   void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
@@ -474,6 +498,8 @@
     kCondNEZ,
     kCondLTU,
     kCondGEU,
+    kCondF,    // Floating-point predicate false.
+    kCondT,    // Floating-point predicate true.
     kUncond,
   };
   friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 29a5a88..bac4375 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -403,6 +403,106 @@
   DriverStr(RepeatFFF(&mips64::Mips64Assembler::MaxD, "max.d ${reg1}, ${reg2}, ${reg3}"), "max.d");
 }
 
+TEST_F(AssemblerMIPS64Test, CmpUnS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnS, "cmp.un.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.un.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpEqS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqS, "cmp.eq.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.eq.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUeqS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqS, "cmp.ueq.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ueq.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLtS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtS, "cmp.lt.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.lt.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUltS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltS, "cmp.ult.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ult.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLeS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeS, "cmp.le.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.le.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUleS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleS, "cmp.ule.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ule.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpOrS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrS, "cmp.or.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.or.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUneS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneS, "cmp.une.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.une.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpNeS) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeS, "cmp.ne.s ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ne.s");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUnD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnD, "cmp.un.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.un.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpEqD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqD, "cmp.eq.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.eq.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUeqD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqD, "cmp.ueq.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ueq.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLtD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtD, "cmp.lt.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.lt.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUltD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltD, "cmp.ult.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ult.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpLeD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeD, "cmp.le.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.le.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUleD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleD, "cmp.ule.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ule.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpOrD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrD, "cmp.or.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.or.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpUneD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneD, "cmp.une.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.une.d");
+}
+
+TEST_F(AssemblerMIPS64Test, CmpNeD) {
+  DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeD, "cmp.ne.d ${reg1}, ${reg2}, ${reg3}"),
+            "cmp.ne.d");
+}
+
 TEST_F(AssemblerMIPS64Test, CvtDL) {
   DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "cvt.d.l");
 }
@@ -591,6 +691,58 @@
   BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc");
 }
 
+TEST_F(AssemblerMIPS64Test, Bc1eqz) {
+    mips64::Mips64Label label;
+    __ Bc1eqz(mips64::F0, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bc1eqz(mips64::F31, &label);
+
+    std::string expected =
+        ".set noreorder\n"
+        "bc1eqz $f0, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        "bc1eqz $f31, 1b\n"
+        "nop\n";
+    DriverStr(expected, "Bc1eqz");
+}
+
+TEST_F(AssemblerMIPS64Test, Bc1nez) {
+    mips64::Mips64Label label;
+    __ Bc1nez(mips64::F0, &label);
+    constexpr size_t kAdduCount1 = 63;
+    for (size_t i = 0; i != kAdduCount1; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bind(&label);
+    constexpr size_t kAdduCount2 = 64;
+    for (size_t i = 0; i != kAdduCount2; ++i) {
+      __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+    }
+    __ Bc1nez(mips64::F31, &label);
+
+    std::string expected =
+        ".set noreorder\n"
+        "bc1nez $f0, 1f\n"
+        "nop\n" +
+        RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+        "1:\n" +
+        RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+        "bc1nez $f31, 1b\n"
+        "nop\n";
+    DriverStr(expected, "Bc1nez");
+}
+
 TEST_F(AssemblerMIPS64Test, LongBeqc) {
   mips64::Mips64Label label;
   __ Beqc(mips64::A0, mips64::A1, &label);