Improve code sinking near "always throwing" method calls

Rationale:
With simple dex bytecode analysis, the inliner marks methods
that always throw to help subsequent code sinking. This reduces
overhead of non-nullable enforcing calls found in e.g the Kotlin
runtime library (1%-2% improvement on tree microbenchmark, about
5% on Denis' benchmark).

Test: test-art-host test-art-target

Change-Id: I45348f049721476828eb5443738021720d2857c0
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 28f4816..13bbffa 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -3491,7 +3491,11 @@
 }
 
 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
-  DCHECK(!successor->IsExitBlock());
+  if (successor->IsExitBlock()) {
+    DCHECK(got->GetPrevious()->AlwaysThrows());
+    return;  // no code needed
+  }
+
   HBasicBlock* block = got->GetBlock();
   HInstruction* previous = got->GetPrevious();
   HLoopInformation* info = block->GetLoopInformation();
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index f1ad4e1..577fe00 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2776,7 +2776,11 @@
 }
 
 void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) {
-  DCHECK(!successor->IsExitBlock());
+  if (successor->IsExitBlock()) {
+    DCHECK(got->GetPrevious()->AlwaysThrows());
+    return;  // no code needed
+  }
+
   HBasicBlock* block = got->GetBlock();
   HInstruction* previous = got->GetPrevious();
   HLoopInformation* info = block->GetLoopInformation();
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index c8bd5d4..5c8e46e 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -4034,7 +4034,11 @@
 }
 
 void InstructionCodeGeneratorMIPS::HandleGoto(HInstruction* got, HBasicBlock* successor) {
-  DCHECK(!successor->IsExitBlock());
+  if (successor->IsExitBlock()) {
+    DCHECK(got->GetPrevious()->AlwaysThrows());
+    return;  // no code needed
+  }
+
   HBasicBlock* block = got->GetBlock();
   HInstruction* previous = got->GetPrevious();
   HLoopInformation* info = block->GetLoopInformation();
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index bbdc3be..bcfe051 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3562,7 +3562,11 @@
 }
 
 void InstructionCodeGeneratorMIPS64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
-  DCHECK(!successor->IsExitBlock());
+  if (successor->IsExitBlock()) {
+    DCHECK(got->GetPrevious()->AlwaysThrows());
+    return;  // no code needed
+  }
+
   HBasicBlock* block = got->GetBlock();
   HInstruction* previous = got->GetPrevious();
   HLoopInformation* info = block->GetLoopInformation();
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 537e97a..cbe9e0a 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1347,7 +1347,10 @@
 }
 
 void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) {
-  DCHECK(!successor->IsExitBlock());
+  if (successor->IsExitBlock()) {
+    DCHECK(got->GetPrevious()->AlwaysThrows());
+    return;  // no code needed
+  }
 
   HBasicBlock* block = got->GetBlock();
   HInstruction* previous = got->GetPrevious();
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 4a64285..510eec4 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1449,7 +1449,10 @@
 }
 
 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
-  DCHECK(!successor->IsExitBlock());
+  if (successor->IsExitBlock()) {
+    DCHECK(got->GetPrevious()->AlwaysThrows());
+    return;  // no code needed
+  }
 
   HBasicBlock* block = got->GetBlock();
   HInstruction* previous = got->GetPrevious();
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index d8ebac9..f4760d6 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -34,7 +34,9 @@
   // TODO(ngeoffray): we do not profile branches yet, so use throw instructions
   // as an indicator of an uncommon branch.
   for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) {
-    if (exit_predecessor->GetLastInstruction()->IsThrow()) {
+    HInstruction* last = exit_predecessor->GetLastInstruction();
+    // Any predecessor of the exit that does not return, throws an exception.
+    if (!last->IsReturn() && !last->IsReturnVoid()) {
       SinkCodeToUncommonBranch(exit_predecessor);
     }
   }
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 3cc7b0e..cca1055 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -148,6 +148,77 @@
 
 // Simplify the pattern:
 //
+//           B1
+//          /  \
+//          |   foo()  // always throws
+//          \   goto B2
+//           \ /
+//            B2
+//
+// Into:
+//
+//           B1
+//          /  \
+//          |  foo()
+//          |  goto Exit
+//          |   |
+//         B2  Exit
+//
+// Rationale:
+// Removal of the never taken edge to B2 may expose
+// other optimization opportunities, such as code sinking.
+bool HDeadCodeElimination::SimplifyAlwaysThrows() {
+  // Make sure exceptions go to exit.
+  if (graph_->HasTryCatch()) {
+    return false;
+  }
+  HBasicBlock* exit = graph_->GetExitBlock();
+  if (exit == nullptr) {
+    return false;
+  }
+
+  bool rerun_dominance_and_loop_analysis = false;
+
+  // Order does not matter, just pick one.
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+    HInstruction* first = block->GetFirstInstruction();
+    HInstruction* last = block->GetLastInstruction();
+    // Ensure only one throwing instruction appears before goto.
+    if (first->AlwaysThrows() &&
+        first->GetNext() == last &&
+        last->IsGoto() &&
+        block->GetPhis().IsEmpty() &&
+        block->GetPredecessors().size() == 1u) {
+      DCHECK_EQ(block->GetSuccessors().size(), 1u);
+      HBasicBlock* pred = block->GetSinglePredecessor();
+      HBasicBlock* succ = block->GetSingleSuccessor();
+      // Ensure no computations are merged through throwing block.
+      // This does not prevent the optimization per se, but would
+      // require an elaborate clean up of the SSA graph.
+      if (succ != exit &&
+          !block->Dominates(pred) &&
+          pred->Dominates(succ) &&
+          succ->GetPredecessors().size() > 1u &&
+          succ->GetPhis().IsEmpty()) {
+        block->ReplaceSuccessor(succ, exit);
+        rerun_dominance_and_loop_analysis = true;
+        MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke);
+      }
+    }
+  }
+
+  // We need to re-analyze the graph in order to run DCE afterwards.
+  if (rerun_dominance_and_loop_analysis) {
+    graph_->ClearLoopInformation();
+    graph_->ClearDominanceInformation();
+    graph_->BuildDominatorTree();
+    return true;
+  }
+  return false;
+}
+
+// Simplify the pattern:
+//
 //        B1    B2    ...
 //       goto  goto  goto
 //         \    |    /
@@ -381,6 +452,7 @@
     // Simplify graph to generate more dead block patterns.
     ConnectSuccessiveBlocks();
     bool did_any_simplification = false;
+    did_any_simplification |= SimplifyAlwaysThrows();
     did_any_simplification |= SimplifyIfs();
     did_any_simplification |= RemoveDeadBlocks();
     if (did_any_simplification) {
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 84fd890..92a7f56 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -40,6 +40,7 @@
   void MaybeRecordSimplifyIf();
   bool RemoveDeadBlocks();
   void RemoveDeadInstructions();
+  bool SimplifyAlwaysThrows();
   bool SimplifyIfs();
   void ConnectSuccessiveBlocks();
 
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index b1ac027..c88baa8 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -31,7 +31,15 @@
 using android::base::StringPrintf;
 
 static bool IsAllowedToJumpToExitBlock(HInstruction* instruction) {
-  return instruction->IsThrow() || instruction->IsReturn() || instruction->IsReturnVoid();
+  // Anything that returns is allowed to jump into the exit block.
+  if (instruction->IsReturn() || instruction->IsReturnVoid()) {
+    return true;
+  }
+  // Anything that always throws is allowed to jump into the exit block.
+  if (instruction->IsGoto() && instruction->GetPrevious() != nullptr) {
+    instruction = instruction->GetPrevious();
+  }
+  return instruction->AlwaysThrows();
 }
 
 static bool IsExitTryBoundaryIntoExitBlock(HBasicBlock* block) {
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 81a7558..41e4bbe 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -392,6 +392,34 @@
   return single_impl;
 }
 
+static bool AlwaysThrows(ArtMethod* method) {
+  CodeItemDataAccessor accessor(method);
+  // Skip native methods, methods with try blocks, and methods that are too large.
+  if (!accessor.HasCodeItem() ||
+      accessor.TriesSize() != 0 ||
+      accessor.InsnsSizeInCodeUnits() > kMaximumNumberOfTotalInstructions) {
+    return false;
+  }
+  // Scan for exits.
+  bool throw_seen = false;
+  for (const DexInstructionPcPair& pair : accessor) {
+    switch (pair.Inst().Opcode()) {
+      case Instruction::RETURN:
+      case Instruction::RETURN_VOID:
+      case Instruction::RETURN_WIDE:
+      case Instruction::RETURN_OBJECT:
+      case Instruction::RETURN_VOID_NO_BARRIER:
+        return false;  // found regular control flow back
+      case Instruction::THROW:
+        throw_seen = true;
+        break;
+      default:
+        break;
+    }
+  }
+  return throw_seen;
+}
+
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
   if (invoke_instruction->IsInvokeUnresolved() ||
       invoke_instruction->IsInvokePolymorphic()) {
@@ -445,6 +473,11 @@
       } else {
         MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface);
       }
+    } else if (!result && invoke_instruction->IsInvokeStaticOrDirect()) {
+      // Analyze always throws property for static/direct method call with single target.
+      if (AlwaysThrows(actual_method)) {
+        invoke_instruction->SetAlwaysThrows(true);
+      }
     }
     return result;
   }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index d4382c6..2047954 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2018,6 +2018,10 @@
   // TODO: We should rename to CanVisiblyThrow, as some instructions (like HNewInstance),
   // could throw OOME, but it is still OK to remove them if they are unused.
   virtual bool CanThrow() const { return false; }
+
+  // Does the instruction always throw an exception unconditionally?
+  virtual bool AlwaysThrows() const { return false; }
+
   bool CanThrowIntoCatchBlock() const { return CanThrow() && block_->IsTryBlock(); }
 
   bool HasSideEffects() const { return side_effects_.HasSideEffects(); }
@@ -4169,6 +4173,10 @@
 
   bool CanThrow() const OVERRIDE { return GetPackedFlag<kFlagCanThrow>(); }
 
+  void SetAlwaysThrows(bool always_throws) { SetPackedFlag<kFlagAlwaysThrows>(always_throws); }
+
+  bool AlwaysThrows() const OVERRIDE { return GetPackedFlag<kFlagAlwaysThrows>(); }
+
   bool CanBeMoved() const OVERRIDE { return IsIntrinsic() && !DoesAnyWrite(); }
 
   bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
@@ -4199,7 +4207,8 @@
   static constexpr size_t kFieldReturnTypeSize =
       MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast));
   static constexpr size_t kFlagCanThrow = kFieldReturnType + kFieldReturnTypeSize;
-  static constexpr size_t kNumberOfInvokePackedBits = kFlagCanThrow + 1;
+  static constexpr size_t kFlagAlwaysThrows = kFlagCanThrow + 1;
+  static constexpr size_t kNumberOfInvokePackedBits = kFlagAlwaysThrows + 1;
   static_assert(kNumberOfInvokePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
   using InvokeTypeField = BitField<InvokeType, kFieldInvokeType, kFieldInvokeTypeSize>;
   using ReturnTypeField = BitField<DataType::Type, kFieldReturnType, kFieldReturnTypeSize>;
@@ -6575,6 +6584,8 @@
 
   bool CanThrow() const OVERRIDE { return true; }
 
+  bool AlwaysThrows() const OVERRIDE { return true; }
+
   DECLARE_INSTRUCTION(Throw);
 
  protected:
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 32a94ab..0023265 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -75,6 +75,7 @@
   kImplicitNullCheckGenerated,
   kExplicitNullCheckGenerated,
   kSimplifyIf,
+  kSimplifyThrowingInvoke,
   kInstructionSunk,
   kNotInlinedUnresolvedEntrypoint,
   kNotInlinedDexCache,