Use 28 bits for type check bit string. And reverse the order of fields in the Class::status_. This avoids generated code size increase: - ClassStatus in high bits allows class initialization check using "status_high_byte < (kInitialized << 4)" which is unaffected by the low 4 bits of LHS instead of needing to extract the status bits, - the type check bit string in the bottom bits instead of somewehere in the middle allows the comparison on ARM to be done using the same code size as with the old layout in most cases (except when the compared value is 9-16 bits and not a modified immediate: 2 bytes less for 9-12 bits and sometimes 2 bytes more for 13-16 bits; the latter could be worked around using LDRH if the second character's boundary is at 16 bits). Add one of the extra bits to the 2nd character to push its boundary to 16 bits so that we can test an implementation using 16-bit loads in a subsequent CL, arbitrarily add the other three bits to the 3rd character. This CL is only about making those bits available and allowing testing, the determination of how to use the additonal bits for the best impact (whether to have a 4th character or distribute them differently among the three characters) shall be done later. Test: m test-art-host-gtest Test: testrunner.py --host --optimizing Test: Pixel 2 XL boots. Test: testrunner.py --target --optimizing Bug: 64692057 Change-Id: I38c59837e3df3accb813fb1e04dc42e9afcd2d73

commit: dc682aa9d0eae1a851af059434adb6f6cf8f06f8 [log] [tgz]
author: Vladimir Marko <vmarko@google.com> Thu Jan 04 18:42:57 2018 +0000
committer: Vladimir Marko <vmarko@google.com> Wed Jan 10 14:30:26 2018 +0000
tree: f93f00493ee5887b05b42a6a5dd99eb6794daad4
parent: d6b7e8c63f8eca25460f56f66dcae15eaa897ff0 [diff]
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 13886b3..28f4816 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc

@@ -2097,13 +2097,17 @@
                                                                      Register class_reg) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register temp = temps.AcquireW();
-  size_t status_offset = mirror::Class::StatusOffset().SizeValue();
+  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+  const size_t status_byte_offset =
+      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+  constexpr uint32_t shifted_initialized_value =
+      enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
 
   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
   // TODO(vixl): Let the MacroAssembler handle MemOperand.
-  __ Add(temp, class_reg, status_offset);
+  __ Add(temp, class_reg, status_byte_offset);
   __ Ldarb(temp, HeapOperand(temp));
-  __ Cmp(temp, enum_cast<>(ClassStatus::kInitialized));
+  __ Cmp(temp, shifted_initialized_value);
   __ B(lo, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }

diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 7c6a5fd..f1ad4e1 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc

@@ -7172,11 +7172,14 @@
     LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   vixl32::Register temp = temps.Acquire();
-  GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
-                                 temp,
-                                 class_reg,
-                                 mirror::Class::StatusOffset().Int32Value());
-  __ Cmp(temp, enum_cast<>(ClassStatus::kInitialized));
+  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+  const size_t status_byte_offset =
+      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+  constexpr uint32_t shifted_initialized_value =
+      enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
+
+  GetAssembler()->LoadFromOffset(kLoadUnsignedByte, temp, class_reg, status_byte_offset);
+  __ Cmp(temp, shifted_initialized_value);
   __ B(lo, slow_path->GetEntryLabel());
   // Even if the initialized flag is set, we may be in a situation where caches are not synced
   // properly. Therefore, we do a memory fence.

diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index ebe252a..c8bd5d4 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc

@@ -1915,8 +1915,14 @@
 
 void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path,
                                                                     Register class_reg) {
-  __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, mirror::Class::StatusOffset().Int32Value());
-  __ LoadConst32(AT, enum_cast<>(ClassStatus::kInitialized));
+  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+  const size_t status_byte_offset =
+      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+  constexpr uint32_t shifted_initialized_value =
+      enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
+
+  __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset);
+  __ LoadConst32(AT, shifted_initialized_value);
   __ Bltu(TMP, AT, slow_path->GetEntryLabel());
   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
   __ Sync(0);

diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 3ea7b82..bbdc3be 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc

@@ -1761,8 +1761,14 @@
 
 void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path,
                                                                       GpuRegister class_reg) {
-  __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, mirror::Class::StatusOffset().Int32Value());
-  __ LoadConst32(AT, enum_cast<>(ClassStatus::kInitialized));
+  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+  const size_t status_byte_offset =
+      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+  constexpr uint32_t shifted_initialized_value =
+      enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
+
+  __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset);
+  __ LoadConst32(AT, shifted_initialized_value);
   __ Bltuc(TMP, AT, slow_path->GetEntryLabel());
   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
   __ Sync(0);

diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6853238..537e97a 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc

@@ -6219,8 +6219,13 @@
 
 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
     SlowPathCode* slow_path, Register class_reg) {
-  __ cmpb(Address(class_reg,  mirror::Class::StatusOffset().Int32Value()),
-          Immediate(enum_cast<>(ClassStatus::kInitialized)));
+  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+  const size_t status_byte_offset =
+      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+  constexpr uint32_t shifted_initialized_value =
+      enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
+
+  __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_initialized_value));
   __ j(kBelow, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
   // No need for memory fence, thanks to the X86 memory model.

diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 1f8d822..4a64285 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc

@@ -5425,8 +5425,13 @@
 
 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
     SlowPathCode* slow_path, CpuRegister class_reg) {
-  __ cmpb(Address(class_reg,  mirror::Class::StatusOffset().Int32Value()),
-          Immediate(enum_cast<>(ClassStatus::kInitialized)));
+  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+  const size_t status_byte_offset =
+      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+  constexpr uint32_t shifted_initialized_value =
+      enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
+
+  __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_initialized_value));
   __ j(kBelow, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
   // No need for memory fence, thanks to the x86-64 memory model.

diff --git a/runtime/base/bit_string.h b/runtime/base/bit_string.h
index bfbe8ea..7d9fb70 100644
--- a/runtime/base/bit_string.h
+++ b/runtime/base/bit_string.h

@@ -114,13 +114,13 @@
 /**
  *                           BitString
  *
- * lsb (least significant bit)                              msb
- *  +------------+------------+------------+-----+------------+
- *  |            |            |            |     |            |
- *  |   Char0    |    Char1   |   Char2    | ... |   CharN    |
- *  |            |            |            |     |            |
- *  +------------+------------+------------+-----+------------+
- *   <- len[0] -> <- len[1] -> <- len[2] ->  ...  <- len[N] ->
+ * MSB (most significant bit)                                LSB
+ *  +------------+-----+------------+------------+------------+
+ *  |            |     |            |            |            |
+ *  |   CharN    | ... |    Char2   |   Char1    |   Char0    |
+ *  |            |     |            |            |            |
+ *  +------------+-----+------------+------------+------------+
+ *   <- len[N] ->  ...  <- len[2] -> <- len[1] -> <- len[0] ->
  *
  * Stores up to "N+1" characters in a subset of a machine word. Each character has a different
  * bitlength, as defined by len[pos]. This BitString can be nested inside of a BitStruct
@@ -145,7 +145,7 @@
   // As this is meant to be used only with "SubtypeCheckInfo",
   // the bitlengths and the maximum string length is tuned by maximizing the coverage of "Assigned"
   // bitstrings for instance-of and check-cast targets during Optimizing compilation.
-  static constexpr size_t kBitSizeAtPosition[] = {12, 3, 8};          // len[] from header docs.
+  static constexpr size_t kBitSizeAtPosition[] = {12, 4, 11};         // len[] from header docs.
   static constexpr size_t kCapacity = arraysize(kBitSizeAtPosition);  // MaxBitstringLen above.
 
   // How many bits are needed to represent BitString[0..position)?
@@ -165,8 +165,7 @@
   // (e.g. to use with BitField{Insert,Extract,Clear}.)
   static constexpr size_t GetLsbForPosition(size_t position) {
     DCHECK_GE(kCapacity, position);
-    constexpr size_t kMaximumBitLength = GetBitLengthTotalAtPosition(kCapacity);
-    return kMaximumBitLength - GetBitLengthTotalAtPosition(position + 1u);
+    return GetBitLengthTotalAtPosition(position);
   }
 
   // How many bits are needed for a BitStringChar at the position?
@@ -183,9 +182,7 @@
   BitStringChar operator[](size_t idx) const {
     DCHECK_LT(idx, kCapacity);
 
-    StorageType data =
-        BitFieldExtract(storage_,
-                        GetLsbForPosition(idx), kBitSizeAtPosition[idx]);
+    StorageType data = BitFieldExtract(storage_, GetLsbForPosition(idx), kBitSizeAtPosition[idx]);
 
     return BitStringChar(data, kBitSizeAtPosition[idx]);
   }
@@ -259,17 +256,10 @@
     DCHECK_GE(kCapacity, end);
     BitString copy = *this;
 
-    size_t bit_size = 0;
-    for (size_t idx = end; idx < kCapacity; ++idx) {
-      bit_size += kBitSizeAtPosition[idx];
-    }
-    // TODO: precompute above table.
-
-    if (bit_size > 0) {
-      StorageType data =
-          BitFieldClear(copy.storage_,
-                        GetLsbForPosition(kCapacity),
-                        bit_size);
+    if (end < kCapacity) {
+      size_t lsb = GetLsbForPosition(end);
+      size_t bit_size = GetLsbForPosition(kCapacity) - lsb;
+      StorageType data = BitFieldClear(copy.storage_, lsb, bit_size);
       copy.storage_ = data;
     }
 

diff --git a/runtime/base/bit_string_test.cc b/runtime/base/bit_string_test.cc
index 96aa154..23274e3 100644
--- a/runtime/base/bit_string_test.cc
+++ b/runtime/base/bit_string_test.cc

@@ -65,7 +65,7 @@
   return uint_value;
 }
 
-// Make max bitstring, e.g. BitString[4095,7,255] for {12,3,8}
+// Make max bitstring, e.g. BitString[4095,15,2047] for {12,4,11}
 template <size_t kCount = BitString::kCapacity>
 BitString MakeBitStringMax() {
   BitString bs{};
@@ -87,15 +87,14 @@
 #define EXPECT_BITSTRING_STR(expected_str, actual_value)                       \
   EXPECT_STREQ((expected_str), Stringify((actual_value)).c_str())
 
+// TODO: Consider removing this test, it's kind of replicating the logic in GetLsbForPosition().
 TEST(InstanceOfBitString, GetLsbForPosition) {
   ASSERT_LE(3u, BitString::kCapacity);
   // Test will fail if kCapacity is not at least 3. Update it.
-  EXPECT_EQ(0u, BitString::GetLsbForPosition(BitString::kCapacity - 1u));
-  EXPECT_EQ(BitString::kBitSizeAtPosition[BitString::kCapacity - 1u],
-            BitString::GetLsbForPosition(BitString::kCapacity - 2u));
-  EXPECT_EQ(BitString::kBitSizeAtPosition[BitString::kCapacity - 1u] +
-                BitString::kBitSizeAtPosition[BitString::kCapacity - 2u],
-            BitString::GetLsbForPosition(BitString::kCapacity - 3u));
+  EXPECT_EQ(0u, BitString::GetLsbForPosition(0u));
+  EXPECT_EQ(BitString::kBitSizeAtPosition[0u], BitString::GetLsbForPosition(1u));
+  EXPECT_EQ(BitString::kBitSizeAtPosition[0u] + BitString::kBitSizeAtPosition[1u],
+            BitString::GetLsbForPosition(2u));
 }
 
 TEST(InstanceOfBitString, ToString) {
@@ -126,8 +125,8 @@
   // Each maximal value should be tested here for each position.
   uint32_t max_bitstring_ints[] = {
       MaxInt<uint32_t>(12),
-      MaxInt<uint32_t>(3),
-      MaxInt<uint32_t>(8),
+      MaxInt<uint32_t>(4),
+      MaxInt<uint32_t>(11),
   };
 
   // Update tests if changing the tuning values above.
@@ -151,14 +150,13 @@
 }
 
 TEST(InstanceOfBitString, MemoryRepresentation) {
-  // Verify that the lower positions are stored in more significant bits.
+  // Verify that the lower positions are stored in less significant bits.
   BitString bs = MakeBitString({MaxForPos<0>(), MaxForPos<1>()});
   BitString::StorageType as_int = static_cast<BitString::StorageType>(bs);
 
-  // Below tests assumes the capacity is 3. Update if it this changes.
-  ASSERT_EQ(3u, BitString::kCapacity);
-  EXPECT_EQ(MaxForPos<0>()  << (BitString::kBitSizeAtPosition[2] + BitString::kBitSizeAtPosition[1]) |
-               (MaxForPos<1>() << BitString::kBitSizeAtPosition[2]),
+  // Below tests assumes the capacity is at least 3.
+  ASSERT_LE(3u, BitString::kCapacity);
+  EXPECT_EQ((MaxForPos<0>() << 0) | (MaxForPos<1>() << BitString::kBitSizeAtPosition[0]),
             as_int);
 }
 

diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index 34cddbf..d2a99f1 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h

@@ -46,10 +46,14 @@
 constexpr int CLZ(T x) {
   static_assert(std::is_integral<T>::value, "T must be integral");
   static_assert(std::is_unsigned<T>::value, "T must be unsigned");
-  static_assert(sizeof(T) <= sizeof(long long),  // NOLINT [runtime/int] [4]
-                "T too large, must be smaller than long long");
+  static_assert(std::numeric_limits<T>::radix == 2, "Unexpected radix!");
+  static_assert(sizeof(T) == sizeof(uint64_t) || sizeof(T) <= sizeof(uint32_t),
+                "Unsupported sizeof(T)");
   DCHECK_NE(x, 0u);
-  return (sizeof(T) == sizeof(uint32_t)) ? __builtin_clz(x) : __builtin_clzll(x);
+  constexpr bool is_64_bit = (sizeof(T) == sizeof(uint64_t));
+  constexpr size_t adjustment =
+      is_64_bit ? 0u : std::numeric_limits<uint32_t>::digits - std::numeric_limits<T>::digits;
+  return is_64_bit ? __builtin_clzll(x) : __builtin_clz(x) - adjustment;
 }
 
 // Similar to CLZ except that on zero input it returns bitwidth and supports signed integers.
@@ -65,10 +69,10 @@
   static_assert(std::is_integral<T>::value, "T must be integral");
   // It is not unreasonable to ask for trailing zeros in a negative number. As such, do not check
   // that T is an unsigned type.
-  static_assert(sizeof(T) <= sizeof(long long),  // NOLINT [runtime/int] [4]
-                "T too large, must be smaller than long long");
+  static_assert(sizeof(T) == sizeof(uint64_t) || sizeof(T) <= sizeof(uint32_t),
+                "Unsupported sizeof(T)");
   DCHECK_NE(x, static_cast<T>(0));
-  return (sizeof(T) == sizeof(uint32_t)) ? __builtin_ctz(x) : __builtin_ctzll(x);
+  return (sizeof(T) == sizeof(uint64_t)) ? __builtin_ctzll(x) : __builtin_ctz(x);
 }
 
 // Similar to CTZ except that on zero input it returns bitwidth and supports signed integers.

diff --git a/runtime/image.cc b/runtime/image.cc
index b9d955c..dd0c148 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc

@@ -26,7 +26,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '5', '2', '\0' };  // 4-bit ClassStatus.
+const uint8_t ImageHeader::kImageVersion[] = { '0', '5', '3', '\0' };  // ClassStatus in high bits.
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,

diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 55c5889..84b0326 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h

@@ -81,9 +81,9 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ClassStatus GetStatus() REQUIRES_SHARED(Locks::mutator_lock_) {
     // Avoid including "subtype_check_bits_and_status.h" to get the field.
-    // The ClassStatus is always in the least-significant bits of status_.
+    // The ClassStatus is always in the 4 most-significant of status_.
     return enum_cast<ClassStatus>(
-        static_cast<uint32_t>(GetField32Volatile<kVerifyFlags>(StatusOffset())) & 0xff);
+        static_cast<uint32_t>(GetField32Volatile<kVerifyFlags>(StatusOffset())) >> (32 - 4));
   }
 
   // This is static because 'this' may be moved by GC.

diff --git a/runtime/oat.h b/runtime/oat.h
index 6d4f18b..36099b9 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h

@@ -32,8 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  // Last oat version changed reason: 4-bit ClassStatus.
-  static constexpr uint8_t kOatVersion[] = { '1', '3', '6', '\0' };
+  // Last oat version changed reason: ClassStatus in high bits.
+  static constexpr uint8_t kOatVersion[] = { '1', '3', '7', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";

diff --git a/runtime/subtype_check_bits.h b/runtime/subtype_check_bits.h
index 4305ff8..462f203 100644
--- a/runtime/subtype_check_bits.h
+++ b/runtime/subtype_check_bits.h

@@ -26,22 +26,22 @@
 /**
  * The SubtypeCheckBits memory layout (in bits):
  *
- *                                 Variable
- *                                     |
- *        <---- up to 23 bits ---->    v               +---> 1 bit
- *                                                     |
- *  +-------------------------+--------+-----------+---++
- *  |             Bitstring                        |    |
- *  +-------------------------+--------+-----------+    |
- *  |      Path To Root       |  Next  | (unused)  | OF |
- *  +---+---------------------+--------+           |    |
- *  |   |    |    |    | ...  |        | (0....0)  |    |
- *  +---+---------------------+--------+-----------+----+
- * MSB                                                LSB
+ *   1 bit       Variable
+ *     |             |
+ *     v             v     <---- up to 23 bits ---->
+ *
+ *  +----+-----------+--------+-------------------------+
+ *  |    |                  Bitstring                   |
+ *  +    +-----------+--------+-------------------------+
+ *  | OF | (unused)  |  Next  |      Path To Root       |
+ *  +    |           |--------+----+----------+----+----+
+ *  |    | (0....0)  |        |    |   ...    |    |    |
+ *  +----+-----------+--------+----+----------+----+----+
+ * MSB (most significant bit)                          LSB
  *
  * The bitstring takes up to 23 bits; anything exceeding that is truncated:
  * - Path To Root is a list of chars, encoded as a BitString:
- *     starting at the root (in MSB), each character is a sibling index unique to the parent,
+ *     starting at the root (in LSB), each character is a sibling index unique to the parent,
  *   Paths longer than BitString::kCapacity are truncated to fit within the BitString.
  * - Next is a single BitStringChar (immediatelly following Path To Root)
  *     When new children are assigned paths, they get allocated the parent's Next value.
@@ -57,8 +57,8 @@
  * See subtype_check.h and subtype_check_info.h for more details.
  */
 BITSTRUCT_DEFINE_START(SubtypeCheckBits, /*size*/ BitString::BitStructSizeOf() + 1u)
-  BitStructUint</*lsb*/0, /*width*/1> overflow_;
-  BitStructField<BitString, /*lsb*/1> bitstring_;
+  BitStructField<BitString, /*lsb*/ 0> bitstring_;
+  BitStructUint</*lsb*/ BitString::BitStructSizeOf(), /*width*/ 1> overflow_;
 BITSTRUCT_DEFINE_END(SubtypeCheckBits);
 
 }  // namespace art

diff --git a/runtime/subtype_check_bits_and_status.h b/runtime/subtype_check_bits_and_status.h
index 11cb9b9..321a723 100644
--- a/runtime/subtype_check_bits_and_status.h
+++ b/runtime/subtype_check_bits_and_status.h

@@ -19,6 +19,7 @@
 
 #include "base/bit_struct.h"
 #include "base/bit_utils.h"
+#include "base/casts.h"
 #include "class_status.h"
 #include "subtype_check_bits.h"
 
@@ -36,13 +37,13 @@
 }
 
 /**
- *  MSB                                                                  LSB
- *  +---------------------------------------------------+---------------+
- *  |                                                   |               |
- *  |                 SubtypeCheckBits                  |  ClassStatus  |
- *  |                                                   |               |
- *  +---------------------------------------------------+---------------+
- *            <-----     24 bits     ----->               <-- 8 bits -->
+ * MSB (most significant bit)                                          LSB
+ *  +---------------+---------------------------------------------------+
+ *  |               |                                                   |
+ *  |  ClassStatus  |                 SubtypeCheckBits                  |
+ *  |               |                                                   |
+ *  +---------------+---------------------------------------------------+
+ *   <-- 4 bits -->             <-----     28 bits     ----->
  *
  * Invariants:
  *
@@ -53,20 +54,25 @@
  * This enables a highly efficient path comparison between any two labels:
  *
  * src <: target :=
- *   src >> (32 - len(path-to-root(target))) == target >> (32 - len(path-to-root(target))
+ *   (src & mask) == (target & mask)  where  mask := (1u << len(path-to-root(target)) - 1u
  *
- * In the above example, the RHS operands are a function of the depth. Since the target
- * is known at compile time, it becomes:
- *
- *   src >> #imm_target_shift == #imm
+ * In the above example, the `len()` (and thus `mask`) is a function of the depth.
+ * Since the target is known at compile time, it becomes
+ *   (src & #imm_mask) == #imm
+ * or
+ *   ((src - #imm) << #imm_shift_to_remove_high_bits) == 0
+ * or a similar expression chosen for the best performance or code size.
  *
  * (This requires that path-to-root in `target` is not truncated, i.e. it is in the Assigned state).
  */
-static constexpr size_t kClassStatusBitSize = 8u;  // NonNumericBitSizeOf<ClassStatus>()
+static constexpr size_t kClassStatusBitSize = MinimumBitsToStore(enum_cast<>(ClassStatus::kLast));
+static_assert(kClassStatusBitSize == 4u, "ClassStatus should need 4 bits.");
 BITSTRUCT_DEFINE_START(SubtypeCheckBitsAndStatus, BitSizeOf<BitString::StorageType>())
-  BitStructField<ClassStatus, /*lsb*/0, /*width*/kClassStatusBitSize> status_;
-  BitStructField<SubtypeCheckBits, /*lsb*/kClassStatusBitSize> subtype_check_info_;
-  BitStructInt</*lsb*/0, /*width*/BitSizeOf<BitString::StorageType>()> int32_alias_;
+  BitStructField<SubtypeCheckBits, /*lsb*/ 0> subtype_check_info_;
+  BitStructField<ClassStatus,
+                 /*lsb*/ SubtypeCheckBits::BitStructSizeOf(),
+                 /*width*/ kClassStatusBitSize> status_;
+  BitStructInt</*lsb*/ 0, /*width*/ BitSizeOf<BitString::StorageType>()> int32_alias_;
 BITSTRUCT_DEFINE_END(SubtypeCheckBitsAndStatus);
 
 // Use the spare alignment from "ClassStatus" to store all the new SubtypeCheckInfo data.

diff --git a/runtime/subtype_check_info.h b/runtime/subtype_check_info.h
index 61d590b..08db770 100644
--- a/runtime/subtype_check_info.h
+++ b/runtime/subtype_check_info.h

@@ -296,8 +296,7 @@
   BitString::StorageType GetEncodedPathToRoot() const {
     BitString::StorageType data = static_cast<BitString::StorageType>(GetPathToRoot());
     // Bit strings are logically in the least-significant memory.
-    // Shift it so the bits are all most-significant.
-    return data << (BitSizeOf(data) - BitStructSizeOf<BitString>());
+    return data;
   }
 
   // Retrieve the path to root bitstring mask as a plain uintN_t that is amenable to
@@ -305,17 +304,7 @@
   BitString::StorageType GetEncodedPathToRootMask() const {
     size_t num_bitchars = GetSafeDepth();
     size_t bitlength = BitString::GetBitLengthTotalAtPosition(num_bitchars);
-
-    BitString::StorageType mask_all =
-        std::numeric_limits<BitString::StorageType>::max();
-    BitString::StorageType mask_lsb =
-        MaskLeastSignificant<BitString::StorageType>(
-            BitSizeOf<BitString::StorageType>() - bitlength);
-
-    BitString::StorageType result = mask_all & ~mask_lsb;
-
-    // TODO: refactor above code into MaskMostSignificant?
-    return result;
+    return MaskLeastSignificant<BitString::StorageType>(bitlength);
   }
 
   // Get the "Next" bitchar, assuming that there is one to get.

diff --git a/runtime/subtype_check_info_test.cc b/runtime/subtype_check_info_test.cc
index 338d75a..91fcc07 100644
--- a/runtime/subtype_check_info_test.cc
+++ b/runtime/subtype_check_info_test.cc

@@ -65,7 +65,7 @@
   return uint_value;
 }
 
-// Make max bistring, e.g. BitString[4095,7,255] for {12,3,8}
+// Make max bistring, e.g. BitString[4095,15,2047] for {12,4,11}
 template <size_t kCount = BitString::kCapacity>
 BitString MakeBitStringMax() {
   BitString bs{};
@@ -258,60 +258,62 @@
 TEST_F(SubtypeCheckInfoTest, EncodedPathToRoot) {
   using StorageType = BitString::StorageType;
 
-  SubtypeCheckInfo io =
+  SubtypeCheckInfo sci =
       MakeSubtypeCheckInfo(/*path_to_root*/MakeBitStringMax(),
                            /*next*/BitStringChar{},
                            /*overflow*/false,
                            /*depth*/BitString::kCapacity);
-  // 0b11111...000 where MSB == 1, and leading 1s = the maximum bitstring representation.
-  EXPECT_EQ(MaxInt<StorageType>(LenForPos()) << (BitSizeOf<StorageType>() - LenForPos()),
-            io.GetEncodedPathToRoot());
-
-  EXPECT_EQ(MaxInt<StorageType>(LenForPos()) << (BitSizeOf<StorageType>() - LenForPos()),
-            io.GetEncodedPathToRootMask());
-
-  // 0b11111...000 where MSB == 1, and leading 1s = the maximum bitstring representation.
+  // 0b000...111 where LSB == 1, and trailing 1s = the maximum bitstring representation.
+  EXPECT_EQ(MaxInt<StorageType>(LenForPos()), sci.GetEncodedPathToRoot());
 
   // The rest of this test is written assuming kCapacity == 3 for convenience.
   // Please update the test if this changes.
   ASSERT_EQ(3u, BitString::kCapacity);
   ASSERT_EQ(12u, BitString::kBitSizeAtPosition[0]);
-  ASSERT_EQ(3u, BitString::kBitSizeAtPosition[1]);
-  ASSERT_EQ(8u, BitString::kBitSizeAtPosition[2]);
+  ASSERT_EQ(4u, BitString::kBitSizeAtPosition[1]);
+  ASSERT_EQ(11u, BitString::kBitSizeAtPosition[2]);
 
-  SubtypeCheckInfo io2 =
+  SubtypeCheckInfo sci2 =
       MakeSubtypeCheckInfoUnchecked(MakeBitStringMax<2u>(),
                                    /*overflow*/false,
                                    /*depth*/BitString::kCapacity);
 
-#define MAKE_ENCODED_PATH(pos0, pos1, pos2) (((pos0) << 3u << 8u << 9u) | ((pos1) << 8u << 9u) | ((pos2) << 9u))
+#define MAKE_ENCODED_PATH(pos0, pos1, pos2) \
+    (((pos0) << 0) | \
+     ((pos1) << BitString::kBitSizeAtPosition[0]) | \
+     ((pos2) << (BitString::kBitSizeAtPosition[0] + BitString::kBitSizeAtPosition[1])))
 
-  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b111, 0b0), io2.GetEncodedPathToRoot());
-  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b111, 0b11111111), io2.GetEncodedPathToRootMask());
+  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b1111, 0b0),
+            sci2.GetEncodedPathToRoot());
+  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b1111, 0b11111111111),
+            sci2.GetEncodedPathToRootMask());
 
-  SubtypeCheckInfo io3 =
+  SubtypeCheckInfo sci3 =
       MakeSubtypeCheckInfoUnchecked(MakeBitStringMax<2u>(),
                                    /*overflow*/false,
                                    /*depth*/BitString::kCapacity - 1u);
 
-  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b111, 0b0), io3.GetEncodedPathToRoot());
-  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b111, 0b0), io3.GetEncodedPathToRootMask());
+  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b1111, 0b0),
+            sci3.GetEncodedPathToRoot());
+  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b1111, 0b0),
+            sci3.GetEncodedPathToRootMask());
 
-  SubtypeCheckInfo io4 =
+  SubtypeCheckInfo sci4 =
       MakeSubtypeCheckInfoUnchecked(MakeBitString({0b1010101u}),
                                    /*overflow*/false,
                                    /*depth*/BitString::kCapacity - 2u);
 
-  EXPECT_EQ(MAKE_ENCODED_PATH(0b1010101u, 0b000, 0b0), io4.GetEncodedPathToRoot());
-  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b000, 0b0), io4.GetEncodedPathToRootMask());
+  EXPECT_EQ(MAKE_ENCODED_PATH(0b1010101u, 0b0000, 0b0), sci4.GetEncodedPathToRoot());
+  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b0000, 0b0),
+            sci4.GetEncodedPathToRootMask());
 }
 
 TEST_F(SubtypeCheckInfoTest, NewForRoot) {
-  SubtypeCheckInfo io = SubtypeCheckInfo::CreateRoot();
-  EXPECT_EQ(SubtypeCheckInfo::kAssigned, io.GetState());  // Root is always assigned.
-  EXPECT_EQ(0u, GetPathToRoot(io).Length());  // Root's path length is 0.
-  EXPECT_TRUE(HasNext(io));  // Root always has a "Next".
-  EXPECT_EQ(MakeBitStringChar(1u), io.GetNext());  // Next>=1 to disambiguate from Uninitialized.
+  SubtypeCheckInfo sci = SubtypeCheckInfo::CreateRoot();
+  EXPECT_EQ(SubtypeCheckInfo::kAssigned, sci.GetState());  // Root is always assigned.
+  EXPECT_EQ(0u, GetPathToRoot(sci).Length());  // Root's path length is 0.
+  EXPECT_TRUE(HasNext(sci));  // Root always has a "Next".
+  EXPECT_EQ(MakeBitStringChar(1u), sci.GetNext());  // Next>=1 to disambiguate from Uninitialized.
 }
 
 TEST_F(SubtypeCheckInfoTest, CopyCleared) {
commit	dc682aa9d0eae1a851af059434adb6f6cf8f06f8	[log] [tgz]
author	Vladimir Marko <vmarko@google.com>	Thu Jan 04 18:42:57 2018 +0000
committer	Vladimir Marko <vmarko@google.com>	Wed Jan 10 14:30:26 2018 +0000
tree	f93f00493ee5887b05b42a6a5dd99eb6794daad4
parent	d6b7e8c63f8eca25460f56f66dcae15eaa897ff0 [diff]