New instruction simplifications. Extra dce pass. Allow more per block repeats.

Rationale:
We were missing some obvious simplifications, which left performance
at the table for e.g. CaffeineLogic compiled with dx (4200us->2700us).
The constant for allowing a repeat on a BB seemed very low, at the
very least it should depend on the BB size.

Test: test-art-host

Change-Id: Ic234566e117593e12c936d556222e4cd4f928105
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index e4d280f..e06fdee 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -111,9 +111,11 @@
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
   int simplifications_at_current_position_ = 0;
-  // We ensure we do not loop infinitely. The value is a finger in the air guess
-  // that should allow enough simplification.
-  static constexpr int kMaxSamePositionSimplifications = 10;
+  // We ensure we do not loop infinitely. The value should not be too high, since that
+  // would allow looping around the same basic block too many times. The value should
+  // not be too low either, however, since we want to allow revisiting a basic block
+  // with many statements and simplifications at least once.
+  static constexpr int kMaxSamePositionSimplifications = 50;
 };
 
 void InstructionSimplifier::Run() {
@@ -605,11 +607,23 @@
   return nullptr;
 }
 
+static bool CmpHasBoolType(HInstruction* input, HInstruction* cmp) {
+  if (input->GetType() == Primitive::kPrimBoolean) {
+    return true;  // input has direct boolean type
+  } else if (cmp->GetUses().HasExactlyOneElement()) {
+    // Comparison also has boolean type if both its input and the instruction
+    // itself feed into the same phi node.
+    HInstruction* user = cmp->GetUses().front().GetUser();
+    return user->IsPhi() && user->HasInput(input) && user->HasInput(cmp);
+  }
+  return false;
+}
+
 void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) {
   HInstruction* input_const = equal->GetConstantRight();
   if (input_const != nullptr) {
     HInstruction* input_value = equal->GetLeastConstantLeft();
-    if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) {
+    if (CmpHasBoolType(input_value, equal) && input_const->IsIntConstant()) {
       HBasicBlock* block = equal->GetBlock();
       // We are comparing the boolean to a constant which is of type int and can
       // be any constant.
@@ -619,6 +633,7 @@
         block->RemoveInstruction(equal);
         RecordSimplification();
       } else if (input_const->AsIntConstant()->IsFalse()) {
+        // Replace (bool_value == false) with !bool_value
         equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, equal));
         block->RemoveInstruction(equal);
         RecordSimplification();
@@ -640,11 +655,12 @@
   HInstruction* input_const = not_equal->GetConstantRight();
   if (input_const != nullptr) {
     HInstruction* input_value = not_equal->GetLeastConstantLeft();
-    if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) {
+    if (CmpHasBoolType(input_value, not_equal) && input_const->IsIntConstant()) {
       HBasicBlock* block = not_equal->GetBlock();
       // We are comparing the boolean to a constant which is of type int and can
       // be any constant.
       if (input_const->AsIntConstant()->IsTrue()) {
+        // Replace (bool_value != true) with !bool_value
         not_equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, not_equal));
         block->RemoveInstruction(not_equal);
         RecordSimplification();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 6a45149..ce2edde 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1855,6 +1855,15 @@
   size_t InputCount() const { return GetInputRecords().size(); }
   HInstruction* InputAt(size_t i) const { return InputRecordAt(i).GetInstruction(); }
 
+  bool HasInput(HInstruction* input) const {
+    for (const HInstruction* i : GetInputs()) {
+      if (i == input) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   void SetRawInputAt(size_t index, HInstruction* input) {
     SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input));
   }
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 19fd6f9..a484760 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -755,6 +755,8 @@
   HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
       graph, stats, "dead_code_elimination$initial");
   HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination(
+      graph, stats, "dead_code_elimination$after_inlining");
+  HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination(
       graph, stats, "dead_code_elimination$final");
   HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
@@ -795,6 +797,7 @@
     select_generator,
     fold2,  // TODO: if we don't inline we can also skip fold2.
     simplify2,
+    dce2,
     side_effects,
     gvn,
     licm,
@@ -804,7 +807,7 @@
     fold3,  // evaluates code generated by dynamic bce
     simplify3,
     lse,
-    dce2,
+    dce3,
     // The codegen has a few assumptions that only the instruction simplifier
     // can satisfy. For example, the code generator does not expect to see a
     // HTypeConversion from a type to the same type.