Optimizing: ARM64 negated bitwise operations simplification

Use negated instructions on ARM64 to replace [bitwise operation + not]
patterns, that is:
a & ~b (BIC)
a | ~b (ORN)
a ^ ~b (EON)

The simplification only happens if the Not is only used by the bitwise
operation. It does not happen if both inputs are Not's (this should be
handled by a generic simplification applying De Morgan's laws).

Change-Id: I0e112b23fd8b8e10f09bfeff5994508a8ff96e9c
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index beb75f0..25487d2 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1862,6 +1862,36 @@
   HandleBinaryOp(instruction);
 }
 
+void LocationsBuilderARM64::VisitArm64BitwiseNegatedRight(HArm64BitwiseNegatedRight* instr) {
+  DCHECK(Primitive::IsIntegralType(instr->GetType())) << instr->GetType();
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+  locations->SetInAt(0, Location::RequiresRegister());
+  // There is no immediate variant of negated bitwise instructions in AArch64.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitArm64BitwiseNegatedRight(
+    HArm64BitwiseNegatedRight* instr) {
+  Register dst = OutputRegister(instr);
+  Register lhs = InputRegisterAt(instr, 0);
+  Register rhs = InputRegisterAt(instr, 1);
+
+  switch (instr->GetOpKind()) {
+    case HInstruction::kAnd:
+      __ Bic(dst, lhs, rhs);
+      break;
+    case HInstruction::kOr:
+      __ Orn(dst, lhs, rhs);
+      break;
+    case HInstruction::kXor:
+      __ Eon(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unreachable";
+  }
+}
+
 void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp(
     HArm64DataProcWithShifterOp* instruction) {
   DCHECK(instruction->GetType() == Primitive::kPrimInt ||
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index a3d6bcf..b9638f2 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -443,6 +443,10 @@
 #endif
 
 #ifdef ART_ENABLE_CODEGEN_arm64
+  void VisitArm64BitwiseNegatedRight(HArm64BitwiseNegatedRight* instruction) OVERRIDE {
+    StartAttributeStream("kind") << instruction->GetOpKind();
+  }
+
   void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
     if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 83126a5..c2bbdcc 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -180,6 +180,53 @@
   return true;
 }
 
+bool InstructionSimplifierArm64Visitor::TryMergeNegatedInput(HBinaryOperation* op) {
+  DCHECK(op->IsAnd() || op->IsOr() || op->IsXor()) << op->DebugName();
+  HInstruction* left = op->GetLeft();
+  HInstruction* right = op->GetRight();
+
+  // Only consider the case where there is exactly one Not, with 2 Not's De
+  // Morgan's laws should be applied instead.
+  if (left->IsNot() ^ right->IsNot()) {
+    HInstruction* hnot = (left->IsNot() ? left : right);
+    HInstruction* hother = (left->IsNot() ? right : left);
+
+    // Only do the simplification if the Not has only one use and can thus be
+    // safely removed. Even though ARM64 negated bitwise operations do not have
+    // an immediate variant (only register), we still do the simplification when
+    // `hother` is a constant, because it removes an instruction if the constant
+    // cannot be encoded as an immediate:
+    //   mov r0, #large_constant
+    //   neg r2, r1
+    //   and r0, r0, r2
+    // becomes:
+    //   mov r0, #large_constant
+    //   bic r0, r0, r1
+    if (hnot->HasOnlyOneNonEnvironmentUse()) {
+      // Replace code looking like
+      //    NOT tmp, mask
+      //    AND dst, src, tmp   (respectively ORR, EOR)
+      // with
+      //    BIC dst, src, mask  (respectively ORN, EON)
+      HInstruction* src = hnot->AsNot()->GetInput();
+
+      HArm64BitwiseNegatedRight* neg_op = new (GetGraph()->GetArena())
+          HArm64BitwiseNegatedRight(op->GetType(), op->GetKind(), hother, src, op->GetDexPc());
+
+      op->GetBlock()->ReplaceAndRemoveInstructionWith(op, neg_op);
+      hnot->GetBlock()->RemoveInstruction(hnot);
+      RecordSimplification();
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void InstructionSimplifierArm64Visitor::VisitAnd(HAnd* instruction) {
+  TryMergeNegatedInput(instruction);
+}
+
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
   TryExtractArrayAccessAddress(instruction,
                                instruction->GetArray(),
@@ -200,6 +247,10 @@
   }
 }
 
+void InstructionSimplifierArm64Visitor::VisitOr(HOr* instruction) {
+  TryMergeNegatedInput(instruction);
+}
+
 void InstructionSimplifierArm64Visitor::VisitShl(HShl* instruction) {
   if (instruction->InputAt(1)->IsConstant()) {
     TryMergeIntoUsersShifterOperand(instruction);
@@ -232,5 +283,9 @@
   }
 }
 
+void InstructionSimplifierArm64Visitor::VisitXor(HXor* instruction) {
+  TryMergeNegatedInput(instruction);
+}
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 37a34c0..cf84587 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -51,14 +51,21 @@
     return TryMergeIntoShifterOperand(use, bitfield_op, true);
   }
 
+  // For bitwise operations (And/Or/Xor) with a negated input, try to use
+  // a negated bitwise instruction.
+  bool TryMergeNegatedInput(HBinaryOperation* op);
+
   // HInstruction visitors, sorted alphabetically.
+  void VisitAnd(HAnd* instruction) OVERRIDE;
   void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
   void VisitArraySet(HArraySet* instruction) OVERRIDE;
   void VisitMul(HMul* instruction) OVERRIDE;
+  void VisitOr(HOr* instruction) OVERRIDE;
   void VisitShl(HShl* instruction) OVERRIDE;
   void VisitShr(HShr* instruction) OVERRIDE;
   void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
   void VisitUShr(HUShr* instruction) OVERRIDE;
+  void VisitXor(HXor* instruction) OVERRIDE;
 
   OptimizingCompilerStats* stats_;
 };
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 4185b2f..c4764cc 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1268,6 +1268,7 @@
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)                          \
+  M(Arm64BitwiseNegatedRight, Instruction)                              \
   M(Arm64DataProcWithShifterOp, Instruction)                            \
   M(Arm64IntermediateAddress, Instruction)
 #endif
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 173852a..75a71e7 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -118,6 +118,66 @@
   DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
 };
 
+class HArm64BitwiseNegatedRight : public HBinaryOperation {
+ public:
+  HArm64BitwiseNegatedRight(Primitive::Type result_type,
+                            InstructionKind op,
+                            HInstruction* left,
+                            HInstruction* right,
+                            uint32_t dex_pc = kNoDexPc)
+    : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc),
+      op_kind_(op) {
+    DCHECK(op == HInstruction::kAnd || op == HInstruction::kOr || op == HInstruction::kXor) << op;
+  }
+
+  template <typename T, typename U>
+  auto Compute(T x, U y) const -> decltype(x & ~y) {
+    static_assert(std::is_same<decltype(x & ~y), decltype(x | ~y)>::value &&
+                  std::is_same<decltype(x & ~y), decltype(x ^ ~y)>::value,
+                  "Inconsistent negated bitwise types");
+    switch (op_kind_) {
+      case HInstruction::kAnd:
+        return x & ~y;
+      case HInstruction::kOr:
+        return x | ~y;
+      case HInstruction::kXor:
+        return x ^ ~y;
+      default:
+        LOG(FATAL) << "Unreachable";
+        UNREACHABLE();
+    }
+  }
+
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetLongConstant(
+        Compute(x->GetValue(), y->GetValue()), GetDexPc());
+  }
+  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+                      HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for float values";
+    UNREACHABLE();
+  }
+  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+                      HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+    LOG(FATAL) << DebugName() << " is not defined for double values";
+    UNREACHABLE();
+  }
+
+  InstructionKind GetOpKind() const { return op_kind_; }
+
+  DECLARE_INSTRUCTION(Arm64BitwiseNegatedRight);
+
+ private:
+  // Specifies the bitwise operation, which will be then negated.
+  const InstructionKind op_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(HArm64BitwiseNegatedRight);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_