ART: Introduce stackmap with no DexRegInfo.

Some of safepoints don't need to have DexRegisterMap info;
this will decrease the stackmap size.

.oat file size reduction:
 - boot.oat:           -233 kb (-5.4%)
 - boot-framework.oat: -704 kb (-4.9%)

Test: 461-get-reference-vreg, 466-get-live-vreg.
Test: 543-env-long-ref, 616-cha*.
Test: test-art-target, +gc-stress.

Change-Id: Idbad355770e30a30dcf14127642e03ee666878b8
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 3b5699b..6d3a5c6 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1088,6 +1088,30 @@
   return stack_map;
 }
 
+// Returns whether stackmap dex register info is needed for the instruction.
+//
+// The following cases mandate having a dex register map:
+//  * Deoptimization
+//    when we need to obtain the values to restore actual vregisters for interpreter.
+//  * Debuggability
+//    when we want to observe the values / asynchronously deoptimize.
+//  * Monitor operations
+//    to allow dumping in a stack trace locked dex registers for non-debuggable code.
+//  * On-stack-replacement (OSR)
+//    when entering compiled for OSR code from the interpreter we need to initialize the compiled
+//    code values with the values from the vregisters.
+//  * Method local catch blocks
+//    a catch block must see the environment of the instruction from the same method that can
+//    throw to this block.
+static bool NeedsVregInfo(HInstruction* instruction, bool osr) {
+  HGraph* graph = instruction->GetBlock()->GetGraph();
+  return instruction->IsDeoptimize() ||
+         graph->IsDebuggable() ||
+         graph->HasMonitorOperations() ||
+         osr ||
+         instruction->CanThrowIntoCatchBlock();
+}
+
 void CodeGenerator::RecordPcInfo(HInstruction* instruction,
                                  uint32_t dex_pc,
                                  SlowPathCode* slow_path,
@@ -1166,12 +1190,15 @@
   StackMap::Kind kind = native_debug_info
       ? StackMap::Kind::Debug
       : (osr ? StackMap::Kind::OSR : StackMap::Kind::Default);
+  bool needs_vreg_info = NeedsVregInfo(instruction, osr);
   stack_map_stream->BeginStackMapEntry(outer_dex_pc,
                                        native_pc,
                                        register_mask,
                                        locations->GetStackMask(),
-                                       kind);
-  EmitEnvironment(environment, slow_path);
+                                       kind,
+                                       needs_vreg_info);
+
+  EmitEnvironment(environment, slow_path, needs_vreg_info);
   stack_map_stream->EndStackMapEntry();
 
   if (osr) {
@@ -1284,19 +1311,8 @@
   code_generation_data_->AddSlowPath(slow_path);
 }
 
-void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path) {
-  if (environment == nullptr) return;
-
+void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path) {
   StackMapStream* stack_map_stream = GetStackMapStream();
-  if (environment->GetParent() != nullptr) {
-    // We emit the parent environment first.
-    EmitEnvironment(environment->GetParent(), slow_path);
-    stack_map_stream->BeginInlineInfoEntry(environment->GetMethod(),
-                                           environment->GetDexPc(),
-                                           environment->Size(),
-                                           &graph_->GetDexFile());
-  }
-
   // Walk over the environment, and record the location of dex registers.
   for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) {
     HInstruction* current = environment->GetInstructionAt(i);
@@ -1441,8 +1457,31 @@
         LOG(FATAL) << "Unexpected kind " << location.GetKind();
     }
   }
+}
 
-  if (environment->GetParent() != nullptr) {
+void CodeGenerator::EmitEnvironment(HEnvironment* environment,
+                                    SlowPathCode* slow_path,
+                                    bool needs_vreg_info) {
+  if (environment == nullptr) return;
+
+  StackMapStream* stack_map_stream = GetStackMapStream();
+  bool emit_inline_info = environment->GetParent() != nullptr;
+
+  if (emit_inline_info) {
+    // We emit the parent environment first.
+    EmitEnvironment(environment->GetParent(), slow_path, needs_vreg_info);
+    stack_map_stream->BeginInlineInfoEntry(environment->GetMethod(),
+                                           environment->GetDexPc(),
+                                           needs_vreg_info ? environment->Size() : 0,
+                                           &graph_->GetDexFile());
+  }
+
+  if (needs_vreg_info) {
+    // If a dex register map is not required we just won't emit it.
+    EmitVRegInfo(environment, slow_path);
+  }
+
+  if (emit_inline_info) {
     stack_map_stream->EndInlineInfoEntry();
   }
 }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 357c4bb..d932c6a 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -761,7 +761,10 @@
   size_t GetStackOffsetOfSavedRegister(size_t index);
   void GenerateSlowPaths();
   void BlockIfInRegister(Location location, bool is_out = false) const;
-  void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path);
+  void EmitEnvironment(HEnvironment* environment,
+                       SlowPathCode* slow_path,
+                       bool needs_vreg_info = true);
+  void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path);
 
   OptimizingCompilerStats* stats_;
 
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index f8f813e..e7f0872 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -3096,6 +3096,7 @@
           LoadLocal(instruction.VRegA_11x(), DataType::Type::kReference),
           HMonitorOperation::OperationKind::kEnter,
           dex_pc));
+      graph_->SetHasMonitorOperations(true);
       break;
     }
 
@@ -3104,6 +3105,7 @@
           LoadLocal(instruction.VRegA_11x(), DataType::Type::kReference),
           HMonitorOperation::OperationKind::kExit,
           dex_pc));
+      graph_->SetHasMonitorOperations(true);
       break;
     }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 5111036..3e6e211 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -335,6 +335,7 @@
         temporaries_vreg_slots_(0),
         has_bounds_checks_(false),
         has_try_catch_(false),
+        has_monitor_operations_(false),
         has_simd_(false),
         has_loops_(false),
         has_irreducible_loops_(false),
@@ -606,6 +607,9 @@
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
+  bool HasMonitorOperations() const { return has_monitor_operations_; }
+  void SetHasMonitorOperations(bool value) { has_monitor_operations_ = value; }
+
   bool HasSIMD() const { return has_simd_; }
   void SetHasSIMD(bool value) { has_simd_ = value; }
 
@@ -702,6 +706,10 @@
   // false positives.
   bool has_try_catch_;
 
+  // Flag whether there are any HMonitorOperation in the graph. If yes this will mandate
+  // DexRegisterMap to be present to allow deadlock analysis for non-debuggable code.
+  bool has_monitor_operations_;
+
   // Flag whether SIMD instructions appear in the graph. If true, the
   // code generators may have to be more careful spilling the wider
   // contents of SIMD registers.
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index afb6071c..3f6010d 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -81,7 +81,8 @@
                                         uint32_t native_pc_offset,
                                         uint32_t register_mask,
                                         BitVector* stack_mask,
-                                        StackMap::Kind kind) {
+                                        StackMap::Kind kind,
+                                        bool needs_vreg_info) {
   DCHECK(in_method_) << "Call BeginMethod first";
   DCHECK(!in_stack_map_) << "Mismatched Begin/End calls";
   in_stack_map_ = true;
@@ -114,7 +115,7 @@
   lazy_stack_masks_.push_back(stack_mask);
   current_inline_infos_.clear();
   current_dex_registers_.clear();
-  expected_num_dex_registers_ = num_dex_registers_;
+  expected_num_dex_registers_ = needs_vreg_info  ? num_dex_registers_ : 0u;
 
   if (kVerifyStackMaps) {
     size_t stack_map_index = stack_maps_.size();
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 33c624a..f45e3d7 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -68,7 +68,8 @@
                           uint32_t native_pc_offset,
                           uint32_t register_mask = 0,
                           BitVector* sp_mask = nullptr,
-                          StackMap::Kind kind = StackMap::Kind::Default);
+                          StackMap::Kind kind = StackMap::Kind::Default,
+                          bool needs_vreg_info = true);
   void EndStackMapEntry();
 
   void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) {
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index 8f9f45c..ebf9ea0 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -68,6 +68,12 @@
     StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
     CodeItemDataAccessor accessor(m->DexInstructionData());
     uint16_t number_of_dex_registers = accessor.RegistersSize();
+
+    if (!Runtime::Current()->IsAsyncDeoptimizeable(GetCurrentQuickFramePc())) {
+      // We can only guarantee dex register info presence for debuggable methods.
+      return;
+    }
+
     DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map);
     DCHECK_EQ(dex_register_map.size(), number_of_dex_registers);
     uint32_t register_mask = code_info.GetRegisterMaskOf(stack_map);
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index d497a52..153761c 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -481,6 +481,7 @@
     // We found a stack map, now fill the frame with dex register values from the interpreter's
     // shadow frame.
     DexRegisterMap vreg_map = code_info.GetDexRegisterMapOf(stack_map);
+    DCHECK_EQ(vreg_map.size(), number_of_vregs);
 
     frame_size = osr_method->GetFrameSizeInBytes();
 
@@ -500,7 +501,6 @@
       // If we don't have a dex register map, then there are no live dex registers at
       // this dex pc.
     } else {
-      DCHECK_EQ(vreg_map.size(), number_of_vregs);
       for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
         DexRegisterLocation::Kind location = vreg_map[vreg].GetKind();
         if (location == DexRegisterLocation::Kind::kNone) {
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 676bceb..9d114ed 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -1481,6 +1481,9 @@
     bool success = false;
     for (uint32_t dex_reg : dex_lock_info.dex_registers) {
       uint32_t value;
+
+      // For optimized code we expect the DexRegisterMap to be present - monitor information
+      // not be optimized out.
       success = stack_visitor->GetVReg(m, dex_reg, kReferenceVReg, &value);
       if (success) {
         ObjPtr<mirror::Object> o = reinterpret_cast<mirror::Object*>(value);
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 3bc718b..1777b3d 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -301,10 +301,11 @@
   StackMap catch_stack_map = code_info.GetCatchStackMapForDexPc(GetHandlerDexPc());
   DCHECK(catch_stack_map.IsValid());
   DexRegisterMap catch_vreg_map = code_info.GetDexRegisterMapOf(catch_stack_map);
+  DCHECK_EQ(catch_vreg_map.size(), number_of_vregs);
+
   if (!catch_vreg_map.HasAnyLiveDexRegisters()) {
     return;
   }
-  DCHECK_EQ(catch_vreg_map.size(), number_of_vregs);
 
   // Find stack map of the throwing instruction.
   StackMap throw_stack_map =
@@ -478,10 +479,11 @@
     DexRegisterMap vreg_map = IsInInlinedFrame()
         ? code_info.GetInlineDexRegisterMapOf(stack_map, GetCurrentInlinedFrame())
         : code_info.GetDexRegisterMapOf(stack_map);
+
+    DCHECK_EQ(vreg_map.size(), number_of_vregs);
     if (vreg_map.empty()) {
       return;
     }
-    DCHECK_EQ(vreg_map.size(), number_of_vregs);
 
     for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
       if (updated_vregs != nullptr && updated_vregs[vreg]) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 23861ac..27d1038 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -3873,6 +3873,7 @@
             code_info(_code_info),
             dex_register_map(code_info.GetDexRegisterMapOf(map)),
             visitor(_visitor) {
+        DCHECK_EQ(dex_register_map.size(), number_of_dex_registers);
       }
 
       // TODO: If necessary, we should consider caching a reverse map instead of the linear
diff --git a/test/461-get-reference-vreg/get_reference_vreg_jni.cc b/test/461-get-reference-vreg/get_reference_vreg_jni.cc
index 817a647..0636682 100644
--- a/test/461-get-reference-vreg/get_reference_vreg_jni.cc
+++ b/test/461-get-reference-vreg/get_reference_vreg_jni.cc
@@ -17,6 +17,7 @@
 #include "arch/context.h"
 #include "art_method-inl.h"
 #include "jni.h"
+#include "oat_quick_method_header.h"
 #include "scoped_thread_state_change-inl.h"
 #include "stack.h"
 #include "thread.h"
@@ -25,6 +26,29 @@
 
 namespace {
 
+bool IsFrameCompiledAndNonDebuggable(const art::StackVisitor* sv) {
+  return sv->GetCurrentShadowFrame() == nullptr &&
+         sv->GetCurrentOatQuickMethodHeader()->IsOptimized() &&
+         !Runtime::Current()->IsJavaDebuggable();
+}
+
+void CheckOptimizedOutRegLiveness(const art::StackVisitor* sv,
+                                  ArtMethod* m,
+                                  uint32_t dex_reg,
+                                  VRegKind vreg_kind,
+                                  bool check_val = false,
+                                  uint32_t expected = 0) REQUIRES_SHARED(Locks::mutator_lock_) {
+  uint32_t value = 0;
+  if (IsFrameCompiledAndNonDebuggable(sv)) {
+    CHECK_EQ(sv->GetVReg(m, dex_reg, vreg_kind, &value), false);
+  } else {
+    CHECK(sv->GetVReg(m, dex_reg, vreg_kind, &value));
+    if (check_val) {
+      CHECK_EQ(value, expected);
+    }
+  }
+}
+
 jint FindMethodIndex(jobject this_value_jobj) {
   ScopedObjectAccess soa(Thread::Current());
   std::unique_ptr<Context> context(Context::Create());
@@ -38,21 +62,22 @@
         if (m_name.compare("$noinline$testThisWithInstanceCall") == 0) {
           found_method_index = 1;
           uint32_t value = 0;
-          CHECK(stack_visitor->GetVReg(m, 1, kReferenceVReg, &value));
-          CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value);
-          CHECK_EQ(stack_visitor->GetThisObject(), this_value);
+          if (IsFrameCompiledAndNonDebuggable(stack_visitor)) {
+            CheckOptimizedOutRegLiveness(stack_visitor, m, 1, kReferenceVReg);
+          } else {
+            CHECK(stack_visitor->GetVReg(m, 1, kReferenceVReg, &value));
+            CHECK_EQ(reinterpret_cast<mirror::Object*>(value), this_value);
+            CHECK_EQ(stack_visitor->GetThisObject(), this_value);
+          }
         } else if (m_name.compare("$noinline$testThisWithStaticCall") == 0) {
           found_method_index = 2;
-          uint32_t value = 0;
-          CHECK(stack_visitor->GetVReg(m, 1, kReferenceVReg, &value));
+          CheckOptimizedOutRegLiveness(stack_visitor, m, 1, kReferenceVReg);
         } else if (m_name.compare("$noinline$testParameter") == 0) {
           found_method_index = 3;
-          uint32_t value = 0;
-          CHECK(stack_visitor->GetVReg(m, 1, kReferenceVReg, &value));
+          CheckOptimizedOutRegLiveness(stack_visitor, m, 1, kReferenceVReg);
         } else if (m_name.compare("$noinline$testObjectInScope") == 0) {
           found_method_index = 4;
-          uint32_t value = 0;
-          CHECK(stack_visitor->GetVReg(m, 0, kReferenceVReg, &value));
+          CheckOptimizedOutRegLiveness(stack_visitor, m, 0, kReferenceVReg);
         }
 
         return true;
diff --git a/test/466-get-live-vreg/get_live_vreg_jni.cc b/test/466-get-live-vreg/get_live_vreg_jni.cc
index 905d8e6..b1fd6b5 100644
--- a/test/466-get-live-vreg/get_live_vreg_jni.cc
+++ b/test/466-get-live-vreg/get_live_vreg_jni.cc
@@ -40,9 +40,7 @@
       found_method_ = true;
       CHECK_EQ(CodeItemDataAccessor(m->DexInstructionData()).RegistersSize(), 3u);
       CheckOptimizedOutRegLiveness(m, 1, kIntVReg, true, 42);
-
-      uint32_t value;
-      CHECK(GetVReg(m, 2, kReferenceVReg, &value));
+      CheckOptimizedOutRegLiveness(m, 2, kReferenceVReg);
     } else if (m_name.compare("$noinline$testIntervalHole") == 0) {
       found_method_ = true;
       uint32_t number_of_dex_registers =
diff --git a/test/543-env-long-ref/env_long_ref.cc b/test/543-env-long-ref/env_long_ref.cc
index 1885f8d..1c30d46 100644
--- a/test/543-env-long-ref/env_long_ref.cc
+++ b/test/543-env-long-ref/env_long_ref.cc
@@ -34,6 +34,11 @@
 
         if (m_name == "testCase") {
           found = true;
+          // For optimized non-debuggable code do not expect dex register info to be present.
+          if (stack_visitor->GetCurrentShadowFrame() == nullptr &&
+              !Runtime::Current()->IsAsyncDeoptimizeable(stack_visitor->GetCurrentQuickFramePc())) {
+            return true;
+          }
           uint32_t stack_value = 0;
           CHECK(stack_visitor->GetVReg(m, 1, kReferenceVReg, &stack_value));
           CHECK_EQ(reinterpret_cast<mirror::Object*>(stack_value),
diff --git a/test/common/stack_inspect.cc b/test/common/stack_inspect.cc
index cb011a8..e8160b4 100644
--- a/test/common/stack_inspect.cc
+++ b/test/common/stack_inspect.cc
@@ -187,8 +187,11 @@
   jobject result = nullptr;
   StackVisitor::WalkStack(
       [&](const art::StackVisitor* stack_visitor) REQUIRES_SHARED(Locks::mutator_lock_) {
-        // Discard stubs and Main.getThisOfCaller.
-        if (stack_visitor->GetMethod() == nullptr || stack_visitor->GetMethod()->IsNative()) {
+        // Discard stubs and Main.getThisOfCaller and methods without vreg info.
+        if (stack_visitor->GetMethod() == nullptr ||
+            stack_visitor->GetMethod()->IsNative() ||
+            (stack_visitor->GetCurrentShadowFrame() == nullptr &&
+             !Runtime::Current()->IsAsyncDeoptimizeable(stack_visitor->GetCurrentQuickFramePc()))) {
           return true;
         }
         result = soa.AddLocalReference<jobject>(stack_visitor->GetThisObject());