Optimize stack map decoding.

We usually read several consecutive varints.
Add helper method optimized for that use case
(ideally reading 8 varints from single load).

This improves app startup by 0.4% (maps,speed).
PMD on golem seems to get around 5% faster.
CodeInfo::Decode on its own is 25% faster.

Bug: 133257467
Test: ./art/test.py -b --host --64
Change-Id: Iaf7e8469ed6397b1d1d4102e409b5731f7229557
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 87133cf..a2f0019 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -440,10 +440,9 @@
 
   ALWAYS_INLINE static QuickMethodFrameInfo DecodeFrameInfo(const uint8_t* data) {
     BitMemoryReader reader(data);
-    return QuickMethodFrameInfo(
-        reader.ReadVarint() * kStackAlignment,  // Decode packed_frame_size_ and unpack.
-        reader.ReadVarint(),  // core_spill_mask_.
-        reader.ReadVarint());  // fp_spill_mask_.
+    uint32_t args[3];  // packed_frame_size, core_spill_mask, fp_spill_mask.
+    reader.ReadVarints(args);
+    return QuickMethodFrameInfo(args[0] * kStackAlignment, args[1], args[2]);
   }
 
  private:
@@ -499,6 +498,8 @@
   BitTable<DexRegisterMapInfo> dex_register_maps_;
   BitTable<DexRegisterInfo> dex_register_catalog_;
   uint32_t size_in_bits_ = 0;
+
+  friend class StackMapStream;
 };
 
 #undef ELEMENT_BYTE_OFFSET_AFTER