[optimizing compiler] Add REM_FLOAT and REM_DOUBLE

- for arm, x86, x86_64 backends
- reinstated fmod quick entry points for x86. This is a partial revert
of bd3682eada753de52975ae2b4a712bd87dc139a6 which added inline assembly
for floting point rem on x86. Note that Quick still uses the inline
version.
- fix rem tests for longs

Change-Id: I73be19a9f2f2bcf3f718d9ca636e67bdd72b5440
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index ca72f3f..0a3f830 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -1435,6 +1435,16 @@
       break;
     }
 
+    case Instruction::REM_FLOAT: {
+      Binop_23x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_DOUBLE: {
+      Binop_23x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
     case Instruction::AND_INT: {
       Binop_23x<HAnd>(instruction, Primitive::kPrimInt);
       break;
@@ -1574,6 +1584,16 @@
       break;
     }
 
+    case Instruction::REM_FLOAT_2ADDR: {
+      Binop_12x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_DOUBLE_2ADDR: {
+      Binop_12x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
     case Instruction::SHL_INT_2ADDR: {
       Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt);
       break;