Add an environment to the beginning of catch blocks

We can use the HNop to generate an environment for us and start
using the existing environment infrastructure (e.g. EmitEnvironment)
for recording the catch block's environment.

This is done to provide support for try catch inlining since it
requires several environments rather than just the one.

When doing so, we now have a HNop at the beginning of each catch
block. We have to modify code sinking to allow it to sink code
past this HNop because if we don't we will sink code right before
this instruction which will break the invariant of that HNop being
the first instruction of a catch block.

Test: art/test/testrunner/testrunner.py --host --64 --optimizing -b
Change-Id: I16e7b8cad499fd2d6f7415112b46206db8daf544
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index b514f9b..252e756 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -413,6 +413,12 @@
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* current = it.Current();
       if (current->HasEnvironment()) {
+        // Catch StackMaps are dealt with later on in `RecordCatchBlockInfo`.
+        if (block->IsCatchBlock() && block->GetFirstInstruction() == current) {
+          DCHECK(current->IsNop());
+          continue;
+        }
+
         // Create stackmap for HNop or any instruction which calls native code.
         // Note that we need correct mapping for the native PC of the call instruction,
         // so the runtime's stackmap is not sufficient since it is at PC after the call.
@@ -1334,53 +1340,29 @@
       continue;
     }
 
-    uint32_t dex_pc = block->GetDexPc();
-    uint32_t num_vregs = graph_->GetNumberOfVRegs();
-    uint32_t native_pc = GetAddressOf(block);
+    // Get the outer dex_pc
+    uint32_t outer_dex_pc = block->GetDexPc();
+    DCHECK(block->GetFirstInstruction()->IsNop());
+    DCHECK(block->GetFirstInstruction()->AsNop()->NeedsEnvironment());
+    HEnvironment* const environment = block->GetFirstInstruction()->GetEnvironment();
+    DCHECK(environment != nullptr);
+    HEnvironment* outer_environment = environment;
+    while (outer_environment->GetParent() != nullptr) {
+      outer_environment = outer_environment->GetParent();
+    }
+    outer_dex_pc = outer_environment->GetDexPc();
 
-    stack_map_stream->BeginStackMapEntry(dex_pc,
+    uint32_t native_pc = GetAddressOf(block);
+    stack_map_stream->BeginStackMapEntry(outer_dex_pc,
                                          native_pc,
                                          /* register_mask= */ 0,
                                          /* sp_mask= */ nullptr,
                                          StackMap::Kind::Catch);
 
-    HInstruction* current_phi = block->GetFirstPhi();
-    for (size_t vreg = 0; vreg < num_vregs; ++vreg) {
-      while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) {
-        HInstruction* next_phi = current_phi->GetNext();
-        DCHECK(next_phi == nullptr ||
-               current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber())
-            << "Phis need to be sorted by vreg number to keep this a linear-time loop.";
-        current_phi = next_phi;
-      }
-
-      if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) {
-        stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0);
-      } else {
-        Location location = current_phi->GetLocations()->Out();
-        switch (location.GetKind()) {
-          case Location::kStackSlot: {
-            stack_map_stream->AddDexRegisterEntry(
-                DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
-            break;
-          }
-          case Location::kDoubleStackSlot: {
-            stack_map_stream->AddDexRegisterEntry(
-                DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
-            stack_map_stream->AddDexRegisterEntry(
-                DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize));
-            ++vreg;
-            DCHECK_LT(vreg, num_vregs);
-            break;
-          }
-          default: {
-            // All catch phis must be allocated to a stack slot.
-            LOG(FATAL) << "Unexpected kind " << location.GetKind();
-            UNREACHABLE();
-          }
-        }
-      }
-    }
+    EmitEnvironment(environment,
+                    /* slow_path= */ nullptr,
+                    /* needs_vreg_info= */ true,
+                    /* is_for_catch_handler= */ true);
 
     stack_map_stream->EndStackMapEntry();
   }
@@ -1391,7 +1373,9 @@
   code_generation_data_->AddSlowPath(slow_path);
 }
 
-void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path) {
+void CodeGenerator::EmitVRegInfo(HEnvironment* environment,
+                                 SlowPathCode* slow_path,
+                                 bool is_for_catch_handler) {
   StackMapStream* stack_map_stream = GetStackMapStream();
   // Walk over the environment, and record the location of dex registers.
   for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) {
@@ -1446,6 +1430,7 @@
       }
 
       case Location::kRegister : {
+        DCHECK(!is_for_catch_handler);
         int id = location.reg();
         if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(id)) {
           uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(id);
@@ -1467,6 +1452,7 @@
       }
 
       case Location::kFpuRegister : {
+        DCHECK(!is_for_catch_handler);
         int id = location.reg();
         if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(id)) {
           uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(id);
@@ -1488,6 +1474,7 @@
       }
 
       case Location::kFpuRegisterPair : {
+        DCHECK(!is_for_catch_handler);
         int low = location.low();
         int high = location.high();
         if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(low)) {
@@ -1509,6 +1496,7 @@
       }
 
       case Location::kRegisterPair : {
+        DCHECK(!is_for_catch_handler);
         int low = location.low();
         int high = location.high();
         if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(low)) {
@@ -1539,9 +1527,54 @@
   }
 }
 
+void CodeGenerator::EmitVRegInfoOnlyCatchPhis(HEnvironment* environment) {
+  StackMapStream* stack_map_stream = GetStackMapStream();
+  DCHECK(environment->GetHolder()->GetBlock()->IsCatchBlock());
+  DCHECK_EQ(environment->GetHolder()->GetBlock()->GetFirstInstruction(), environment->GetHolder());
+  HInstruction* current_phi = environment->GetHolder()->GetBlock()->GetFirstPhi();
+  for (size_t vreg = 0; vreg < environment->Size(); ++vreg) {
+    while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) {
+      HInstruction* next_phi = current_phi->GetNext();
+      DCHECK(next_phi == nullptr ||
+             current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber())
+          << "Phis need to be sorted by vreg number to keep this a linear-time loop.";
+      current_phi = next_phi;
+    }
+
+    if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) {
+      stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0);
+    } else {
+      Location location = current_phi->GetLocations()->Out();
+      switch (location.GetKind()) {
+        case Location::kStackSlot: {
+          stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack,
+                                                location.GetStackIndex());
+          break;
+        }
+        case Location::kDoubleStackSlot: {
+          stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack,
+                                                location.GetStackIndex());
+          stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack,
+                                                location.GetHighStackIndex(kVRegSize));
+          ++vreg;
+          DCHECK_LT(vreg, environment->Size());
+          break;
+        }
+        default: {
+          LOG(FATAL) << "All catch phis must be allocated to a stack slot. Unexpected kind "
+                     << location.GetKind();
+          UNREACHABLE();
+        }
+      }
+    }
+  }
+}
+
 void CodeGenerator::EmitEnvironment(HEnvironment* environment,
                                     SlowPathCode* slow_path,
-                                    bool needs_vreg_info) {
+                                    bool needs_vreg_info,
+                                    bool is_for_catch_handler,
+                                    bool innermost_environment) {
   if (environment == nullptr) return;
 
   StackMapStream* stack_map_stream = GetStackMapStream();
@@ -1549,7 +1582,11 @@
 
   if (emit_inline_info) {
     // We emit the parent environment first.
-    EmitEnvironment(environment->GetParent(), slow_path, needs_vreg_info);
+    EmitEnvironment(environment->GetParent(),
+                    slow_path,
+                    needs_vreg_info,
+                    is_for_catch_handler,
+                    /* innermost_environment= */ false);
     stack_map_stream->BeginInlineInfoEntry(environment->GetMethod(),
                                            environment->GetDexPc(),
                                            needs_vreg_info ? environment->Size() : 0,
@@ -1557,9 +1594,13 @@
                                            this);
   }
 
+  // If a dex register map is not required we just won't emit it.
   if (needs_vreg_info) {
-    // If a dex register map is not required we just won't emit it.
-    EmitVRegInfo(environment, slow_path);
+    if (innermost_environment && is_for_catch_handler) {
+      EmitVRegInfoOnlyCatchPhis(environment);
+    } else {
+      EmitVRegInfo(environment, slow_path, is_for_catch_handler);
+    }
   }
 
   if (emit_inline_info) {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index de247a9..6b85aaa 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -846,8 +846,11 @@
   void BlockIfInRegister(Location location, bool is_out = false) const;
   void EmitEnvironment(HEnvironment* environment,
                        SlowPathCode* slow_path,
-                       bool needs_vreg_info = true);
-  void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path);
+                       bool needs_vreg_info = true,
+                       bool is_for_catch_handler = false,
+                       bool innermost_environment = true);
+  void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path, bool is_for_catch_handler);
+  void EmitVRegInfoOnlyCatchPhis(HEnvironment* environment);
 
   static void PrepareCriticalNativeArgumentMoves(
       HInvokeStaticOrDirect* invoke,
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index 766bb01..930675b 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -271,10 +271,21 @@
     }
   }
   for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
-    HInstruction* user = use.GetUser()->GetHolder();
+    HEnvironment* env = use.GetUser();
+    HInstruction* user = env->GetHolder();
     if (user->GetBlock() == target_block &&
         (insert_pos == nullptr || user->StrictlyDominates(insert_pos))) {
-      insert_pos = user;
+      if (target_block->IsCatchBlock() && target_block->GetFirstInstruction() == user) {
+        // We can sink the instructions past the environment setting Nop. If we do that, we have to
+        // remove said instruction from the environment. Since we know that we will be sinking the
+        // instruction to this block and there are no more instructions to consider, we can safely
+        // remove it from the environment now.
+        DCHECK(target_block->GetFirstInstruction()->IsNop());
+        env->RemoveAsUserOfInput(use.GetIndex());
+        env->SetRawEnvAt(use.GetIndex(), /*instruction=*/ nullptr);
+      } else {
+        insert_pos = user;
+      }
     }
   }
   if (insert_pos == nullptr) {
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index eda6363..1071151 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -159,6 +159,24 @@
     }
   }
 
+  // Make sure the first instruction of a catch block is always a Nop that emits an environment.
+  if (block->IsCatchBlock()) {
+    if (!block->GetFirstInstruction()->IsNop()) {
+      AddError(StringPrintf("Block %d doesn't have a Nop as its first instruction.",
+                            current_block_->GetBlockId()));
+    } else {
+      HNop* nop = block->GetFirstInstruction()->AsNop();
+      if (!nop->NeedsEnvironment()) {
+        AddError(
+            StringPrintf("%s:%d is a Nop and the first instruction of block %d, but it doesn't "
+                         "need an environment.",
+                         nop->DebugName(),
+                         nop->GetId(),
+                         current_block_->GetBlockId()));
+      }
+    }
+  }
+
   // Visit this block's list of phis.
   for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
     HInstruction* current = it.Current();
@@ -342,14 +360,15 @@
 }
 
 void GraphChecker::VisitLoadException(HLoadException* load) {
-  // Ensure that LoadException is the first instruction in a catch block.
+  // Ensure that LoadException is the second instruction in a catch block. The first one should be a
+  // Nop (checked separately).
   if (!load->GetBlock()->IsCatchBlock()) {
     AddError(StringPrintf("%s:%d is in a non-catch block %d.",
                           load->DebugName(),
                           load->GetId(),
                           load->GetBlock()->GetBlockId()));
-  } else if (load->GetBlock()->GetFirstInstruction() != load) {
-    AddError(StringPrintf("%s:%d is not the first instruction in catch block %d.",
+  } else if (load->GetBlock()->GetFirstInstruction()->GetNext() != load) {
+    AddError(StringPrintf("%s:%d is not the second instruction in catch block %d.",
                           load->DebugName(),
                           load->GetId(),
                           load->GetBlock()->GetBlockId()));
@@ -513,17 +532,10 @@
                           instruction->GetId(),
                           current_block_->GetBlockId()));
   } else if (instruction->CanThrowIntoCatchBlock()) {
-    // Find the top-level environment. This corresponds to the environment of
-    // the catch block since we do not inline methods with try/catch.
-    HEnvironment* environment = instruction->GetEnvironment();
-    while (environment->GetParent() != nullptr) {
-      environment = environment->GetParent();
-    }
-
-    // Find all catch blocks and test that `instruction` has an environment
-    // value for each one.
+    // Find all catch blocks and test that `instruction` has an environment value for each one.
     const HTryBoundary& entry = instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
     for (HBasicBlock* catch_block : entry.GetExceptionHandlers()) {
+      const HEnvironment* environment = catch_block->GetFirstInstruction()->GetEnvironment();
       for (HInstructionIterator phi_it(catch_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
         HPhi* catch_phi = phi_it.Current()->AsPhi();
         if (environment->GetInstructionAt(catch_phi->GetRegNumber()) == nullptr) {
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 605427b..ba58c8d 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -343,6 +343,10 @@
     // Suspend checks were inserted into loop headers during building of dominator tree.
     DCHECK(block->GetFirstInstruction()->IsSuspendCheck());
     return block->GetFirstInstruction() != block->GetLastInstruction();
+  } else if (block->IsCatchBlock()) {
+    // Nops were inserted into the beginning of catch blocks.
+    DCHECK(block->GetFirstInstruction()->IsNop());
+    return block->GetFirstInstruction() != block->GetLastInstruction();
   } else {
     return !block->GetInstructions().IsEmpty();
   }
@@ -387,6 +391,11 @@
       // This is slightly odd because the loop header might not be empty (TryBoundary).
       // But we're still creating the environment with locals from the top of the block.
       InsertInstructionAtTop(suspend_check);
+    } else if (current_block_->IsCatchBlock()) {
+      // We add an environment emitting instruction at the beginning of each catch block, in order
+      // to support try catch inlining.
+      // This is slightly odd because the catch block might not be empty (TryBoundary).
+      InsertInstructionAtTop(new (allocator_) HNop(block_dex_pc, /* needs_environment= */ true));
     }
 
     if (block_dex_pc == kNoDexPc || current_block_ != block_builder_->GetBlockAt(block_dex_pc)) {
diff --git a/test/565-checker-condition-liveness/src/Main.java b/test/565-checker-condition-liveness/src/Main.java
index 17a8613..350d9ad 100644
--- a/test/565-checker-condition-liveness/src/Main.java
+++ b/test/565-checker-condition-liveness/src/Main.java
@@ -33,8 +33,8 @@
   
   /// CHECK-START-{ARM,ARM64}: void Main.testThrowIntoCatchBlock(int, java.lang.Object, int[]) liveness (after)
   /// CHECK-DAG:  <<IntArg:i\d+>>   ParameterValue        env_uses:[23,25]
-  /// CHECK-DAG:  <<RefArg:l\d+>>   ParameterValue        env_uses:[11,23,25]
-  /// CHECK-DAG:  <<Array:l\d+>>    ParameterValue        env_uses:[11,23,25]
+  /// CHECK-DAG:  <<RefArg:l\d+>>   ParameterValue        env_uses:[11,23,25,33]
+  /// CHECK-DAG:  <<Array:l\d+>>    ParameterValue        env_uses:[11,23,25,33]
   /// CHECK-DAG:  <<Const1:i\d+>>   IntConstant 1         env_uses:[23,25]
   /// CHECK-DAG:                    SuspendCheck          env:[[_,<<IntArg>>,<<RefArg>>,<<Array>>]]           liveness:10
   /// CHECK-DAG:                    NullCheck             env:[[<<Const1>>,<<IntArg>>,<<RefArg>>,<<Array>>]]  liveness:20
@@ -43,9 +43,9 @@
   /// CHECK-DAG:                    TryBoundary
   
   /// CHECK-START-{ARM,ARM64}-DEBUGGABLE: void Main.testThrowIntoCatchBlock(int, java.lang.Object, int[]) liveness (after)
-  /// CHECK-DAG:  <<IntArg:i\d+>>   ParameterValue        env_uses:[11,23,25]
-  /// CHECK-DAG:  <<RefArg:l\d+>>   ParameterValue        env_uses:[11,23,25]
-  /// CHECK-DAG:  <<Array:l\d+>>    ParameterValue        env_uses:[11,23,25]
+  /// CHECK-DAG:  <<IntArg:i\d+>>   ParameterValue        env_uses:[11,23,25,33]
+  /// CHECK-DAG:  <<RefArg:l\d+>>   ParameterValue        env_uses:[11,23,25,33]
+  /// CHECK-DAG:  <<Array:l\d+>>    ParameterValue        env_uses:[11,23,25,33]
   /// CHECK-DAG:  <<Const1:i\d+>>   IntConstant 1         env_uses:[23,25]
   /// CHECK-DAG:                    SuspendCheck          env:[[_,<<IntArg>>,<<RefArg>>,<<Array>>]]           liveness:10
   /// CHECK-DAG:                    NullCheck             env:[[<<Const1>>,<<IntArg>>,<<RefArg>>,<<Array>>]]  liveness:20
@@ -56,8 +56,8 @@
   // X86 and X86_64 generate at use site the ArrayLength, meaning only the BoundsCheck will have environment uses.
   /// CHECK-START-{X86,X86_64}: void Main.testThrowIntoCatchBlock(int, java.lang.Object, int[]) liveness (after)
   /// CHECK-DAG:  <<IntArg:i\d+>>   ParameterValue        env_uses:[25,25]
-  /// CHECK-DAG:  <<RefArg:l\d+>>   ParameterValue        env_uses:[11,25,25]
-  /// CHECK-DAG:  <<Array:l\d+>>    ParameterValue        env_uses:[11,25,25]
+  /// CHECK-DAG:  <<RefArg:l\d+>>   ParameterValue        env_uses:[11,25,25,33]
+  /// CHECK-DAG:  <<Array:l\d+>>    ParameterValue        env_uses:[11,25,25,33]
   /// CHECK-DAG:  <<Const1:i\d+>>   IntConstant 1         env_uses:[25,25]
   /// CHECK-DAG:                    SuspendCheck          env:[[_,<<IntArg>>,<<RefArg>>,<<Array>>]]           liveness:10
   /// CHECK-DAG:                    NullCheck             env:[[<<Const1>>,<<IntArg>>,<<RefArg>>,<<Array>>]]  liveness:20
@@ -66,9 +66,9 @@
   /// CHECK-DAG:                    TryBoundary
 
   /// CHECK-START-{X86,X86_64}-DEBUGGABLE: void Main.testThrowIntoCatchBlock(int, java.lang.Object, int[]) liveness (after)
-  /// CHECK-DAG:  <<IntArg:i\d+>>   ParameterValue        env_uses:[11,25,25]
-  /// CHECK-DAG:  <<RefArg:l\d+>>   ParameterValue        env_uses:[11,25,25]
-  /// CHECK-DAG:  <<Array:l\d+>>    ParameterValue        env_uses:[11,25,25]
+  /// CHECK-DAG:  <<IntArg:i\d+>>   ParameterValue        env_uses:[11,25,25,33]
+  /// CHECK-DAG:  <<RefArg:l\d+>>   ParameterValue        env_uses:[11,25,25,33]
+  /// CHECK-DAG:  <<Array:l\d+>>    ParameterValue        env_uses:[11,25,25,33]
   /// CHECK-DAG:  <<Const1:i\d+>>   IntConstant 1         env_uses:[25,25]
   /// CHECK-DAG:                    SuspendCheck          env:[[_,<<IntArg>>,<<RefArg>>,<<Array>>]]           liveness:10
   /// CHECK-DAG:                    NullCheck             env:[[<<Const1>>,<<IntArg>>,<<RefArg>>,<<Array>>]]  liveness:20