Delta-encoding of mapping tables. Both PC offsets and dalvik offsets are delta-encoded. Since PC offsets are increasing, the deltas are then compressed as unsigned LEB128. Dalvik offsets are not monotonic, so their deltas are compressed as signed LEB128. This reduces the size of the mapping tables by about 30% on average, 25% from the PC offset and 5% from the dalvik offset delta encoding. Bug: 9437697 Change-Id: I600ab9c22dec178088d4947a811cca3bc8bd4cf4

commit: 1e6cb63d77090ddc6aa19c755d7066f66e9ff87e [log] [tgz]
author: Vladimir Marko <vmarko@google.com> Thu Nov 28 16:27:29 2013 +0000
committer: Vladimir Marko <vmarko@google.com> Fri Nov 29 10:14:39 2013 +0000
tree: 7cffe82e563757521da204c5dfbd3fff41dde7ed
parent: 00ce185371be9930dfd75e5acc2258a1139b558e [diff]
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index e9a6e4f..8f542d8 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc

@@ -54,17 +54,17 @@
       fake_code_.push_back(0x70 | i);
     }
 
-    fake_mapping_data_.PushBack(4);  // first element is count
-    fake_mapping_data_.PushBack(4);  // total (non-length) elements
-    fake_mapping_data_.PushBack(2);  // count of pc to dex elements
+    fake_mapping_data_.PushBackUnsigned(4);  // first element is count
+    fake_mapping_data_.PushBackUnsigned(4);  // total (non-length) elements
+    fake_mapping_data_.PushBackUnsigned(2);  // count of pc to dex elements
                                       // ---  pc to dex table
-    fake_mapping_data_.PushBack(3);  // offset 3
-    fake_mapping_data_.PushBack(3);  // maps to dex offset 3
+    fake_mapping_data_.PushBackUnsigned(3 - 0);  // offset 3
+    fake_mapping_data_.PushBackSigned(3 - 0);    // maps to dex offset 3
                                       // ---  dex to pc table
-    fake_mapping_data_.PushBack(3);  // offset 3
-    fake_mapping_data_.PushBack(3);  // maps to dex offset 3
+    fake_mapping_data_.PushBackUnsigned(3 - 0);  // offset 3
+    fake_mapping_data_.PushBackSigned(3 - 0);    // maps to dex offset 3
 
-    fake_vmap_table_data_.PushBack(0);
+    fake_vmap_table_data_.PushBackUnsigned(0);
 
     fake_gc_map_.push_back(0);  // 0 bytes to encode references and native pc offsets.
     fake_gc_map_.push_back(0);
@@ -91,8 +91,8 @@
   const DexFile* dex_;
 
   std::vector<uint8_t> fake_code_;
-  UnsignedLeb128EncodingVector fake_mapping_data_;
-  UnsignedLeb128EncodingVector fake_vmap_table_data_;
+  Leb128EncodingVector fake_mapping_data_;
+  Leb128EncodingVector fake_vmap_table_data_;
   std::vector<uint8_t> fake_gc_map_;
 
   mirror::ArtMethod* method_f_;

diff --git a/runtime/leb128.h b/runtime/leb128.h
index 6041f8c..7a7d38d 100644
--- a/runtime/leb128.h
+++ b/runtime/leb128.h

@@ -18,6 +18,7 @@
 #define ART_RUNTIME_LEB128_H_
 
 #include "globals.h"
+#include "utils.h"
 
 namespace art {
 
@@ -95,12 +96,20 @@
 
 // Returns the number of bytes needed to encode the value in unsigned LEB128.
 static inline uint32_t UnsignedLeb128Size(uint32_t data) {
-  uint32_t count = 0;
-  do {
-    data >>= 7;
-    count++;
-  } while (data != 0);
-  return count;
+  // bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1  // 32 - CLZ(data | 1)
+  // bytes = ceil(bits_to_encode / 7.0);             // (6 + bits_to_encode) / 7
+  uint32_t x = 6 + 32 - CLZ(data | 1);
+  // Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7).
+  // This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85.
+  return (x * 37) >> 8;
+}
+
+// Returns the number of bytes needed to encode the value in unsigned LEB128.
+static inline uint32_t SignedLeb128Size(int32_t data) {
+  // Like UnsignedLeb128Size(), but we need one bit beyond the highest bit that differs from sign.
+  data = data ^ (data >> 31);
+  uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1);
+  return (x * 37) >> 8;
 }
 
 }  // namespace art

diff --git a/runtime/mapping_table.h b/runtime/mapping_table.h
index c468c1e..a82bc1c 100644
--- a/runtime/mapping_table.h
+++ b/runtime/mapping_table.h

@@ -72,7 +72,8 @@
         if (end_ > 0) {
           encoded_table_ptr_ = table_->FirstDexToPcPtr();
           native_pc_offset_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
-          dex_pc_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
+          // First delta is always positive.
+          dex_pc_ = static_cast<uint32_t>(DecodeSignedLeb128(&encoded_table_ptr_));
         }
       } else {  // An iterator wanted from the end.
         DCHECK_EQ(table_->DexToPcSize(), element);
@@ -87,8 +88,9 @@
     void operator++() {
       ++element_;
       if (element_ != end_) {  // Avoid reading beyond the end of the table.
-        native_pc_offset_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
-        dex_pc_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
+        native_pc_offset_ += DecodeUnsignedLeb128(&encoded_table_ptr_);
+        // For negative delta, unsigned overflow after static_cast does exactly what we need.
+        dex_pc_ += static_cast<uint32_t>(DecodeSignedLeb128(&encoded_table_ptr_));
       }
     }
     bool operator==(const DexToPcIterator& rhs) const {
@@ -147,7 +149,8 @@
         if (end_ > 0) {
           encoded_table_ptr_ = table_->FirstPcToDexPtr();
           native_pc_offset_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
-          dex_pc_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
+          // First delta is always positive.
+          dex_pc_ = static_cast<uint32_t>(DecodeSignedLeb128(&encoded_table_ptr_));
         }
       } else {  // An iterator wanted from the end.
         DCHECK_EQ(table_->PcToDexSize(), element);
@@ -162,8 +165,9 @@
     void operator++() {
       ++element_;
       if (element_ != end_) {  // Avoid reading beyond the end of the table.
-        native_pc_offset_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
-        dex_pc_ = DecodeUnsignedLeb128(&encoded_table_ptr_);
+        native_pc_offset_ += DecodeUnsignedLeb128(&encoded_table_ptr_);
+        // For negative delta, unsigned overflow after static_cast does exactly what we need.
+        dex_pc_ += static_cast<uint32_t>(DecodeSignedLeb128(&encoded_table_ptr_));
       }
     }
     bool operator==(const PcToDexIterator& rhs) const {

diff --git a/runtime/oat.cc b/runtime/oat.cc
index 50069b2..52e74ab 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc

@@ -22,7 +22,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '1', '1', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '1', '2', '\0' };
 
 OatHeader::OatHeader() {
   memset(this, 0, sizeof(*this));
commit	1e6cb63d77090ddc6aa19c755d7066f66e9ff87e	[log] [tgz]
author	Vladimir Marko <vmarko@google.com>	Thu Nov 28 16:27:29 2013 +0000
committer	Vladimir Marko <vmarko@google.com>	Fri Nov 29 10:14:39 2013 +0000
tree	7cffe82e563757521da204c5dfbd3fff41dde7ed
parent	00ce185371be9930dfd75e5acc2258a1139b558e [diff]