Use 28 bits for type check bit string.

And reverse the order of fields in the Class::status_. This avoids generated code size increase: - ClassStatus in high bits allows class initialization check using "status_high_byte < (kInitialized << 4)" which is unaffected by the low 4 bits of LHS instead of needing to extract the status bits, - the type check bit string in the bottom bits instead of somewehere in the middle allows the comparison on ARM to be done using the same code size as with the old layout in most cases (except when the compared value is 9-16 bits and not a modified immediate: 2 bytes less for 9-12 bits and sometimes 2 bytes more for 13-16 bits; the latter could be worked around using LDRH if the second character's boundary is at 16 bits). Add one of the extra bits to the 2nd character to push its boundary to 16 bits so that we can test an implementation using 16-bit loads in a subsequent CL, arbitrarily add the other three bits to the 3rd character. This CL is only about making those bits available and allowing testing, the determination of how to use the additonal bits for the best impact (whether to have a 4th character or distribute them differently among the three characters) shall be done later. Test: m test-art-host-gtest Test: testrunner.py --host --optimizing Test: Pixel 2 XL boots. Test: testrunner.py --target --optimizing Bug: 64692057 Change-Id: I38c59837e3df3accb813fb1e04dc42e9afcd2d73
author: Vladimir Marko <vmarko@google.com> 2018-01-04 18:42:57 +0000
committer: Vladimir Marko <vmarko@google.com> 2018-01-10 14:30:26 +0000
commit: dc682aa9d0eae1a851af059434adb6f6cf8f06f8 (patch)
tree: f93f00493ee5887b05b42a6a5dd99eb6794daad4
parent: d6b7e8c63f8eca25460f56f66dcae15eaa897ff0 (diff)
16 files changed, 154 insertions, 136 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 13886b32b3..28f481670c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2097,13 +2097,17 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod
                                                                      Register class_reg) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register temp = temps.AcquireW();
-  size_t status_offset = mirror::Class::StatusOffset().SizeValue();
+  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+  const size_t status_byte_offset =
+      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+  constexpr uint32_t shifted_initialized_value =
+      enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
 
   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
   // TODO(vixl): Let the MacroAssembler handle MemOperand.
-  __ Add(temp, class_reg, status_offset);
+  __ Add(temp, class_reg, status_byte_offset);
   __ Ldarb(temp, HeapOperand(temp));
-  __ Cmp(temp, enum_cast<>(ClassStatus::kInitialized));
+  __ Cmp(temp, shifted_initialized_value);
   __ B(lo, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 7c6a5fde40..f1ad4e187e 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -7172,11 +7172,14 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
     LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   vixl32::Register temp = temps.Acquire();
-  GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
-                                 temp,
-                                 class_reg,
-                                 mirror::Class::StatusOffset().Int32Value());
-  __ Cmp(temp, enum_cast<>(ClassStatus::kInitialized));
+  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+  const size_t status_byte_offset =
+      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+  constexpr uint32_t shifted_initialized_value =
+      enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
+
+  GetAssembler()->LoadFromOffset(kLoadUnsignedByte, temp, class_reg, status_byte_offset);
+  __ Cmp(temp, shifted_initialized_value);
   __ B(lo, slow_path->GetEntryLabel());
   // Even if the initialized flag is set, we may be in a situation where caches are not synced
   // properly. Therefore, we do a memory fence.
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index ebe252a9c8..c8bd5d4fc8 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1915,8 +1915,14 @@ void CodeGeneratorMIPS::GenerateInvokeRuntime(int32_t entry_point_offset, bool d
 
 void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path,
                                                                     Register class_reg) {
-  __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, mirror::Class::StatusOffset().Int32Value());
-  __ LoadConst32(AT, enum_cast<>(ClassStatus::kInitialized));
+  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+  const size_t status_byte_offset =
+      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+  constexpr uint32_t shifted_initialized_value =
+      enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
+
+  __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset);
+  __ LoadConst32(AT, shifted_initialized_value);
   __ Bltu(TMP, AT, slow_path->GetEntryLabel());
   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
   __ Sync(0);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 3ea7b827bb..bbdc3be5c1 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1761,8 +1761,14 @@ void CodeGeneratorMIPS64::GenerateInvokeRuntime(int32_t entry_point_offset) {
 
 void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path,
                                                                       GpuRegister class_reg) {
-  __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, mirror::Class::StatusOffset().Int32Value());
-  __ LoadConst32(AT, enum_cast<>(ClassStatus::kInitialized));
+  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+  const size_t status_byte_offset =
+      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+  constexpr uint32_t shifted_initialized_value =
+      enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
+
+  __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset);
+  __ LoadConst32(AT, shifted_initialized_value);
   __ Bltuc(TMP, AT, slow_path->GetEntryLabel());
   // Even if the initialized flag is set, we need to ensure consistent memory ordering.
   __ Sync(0);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 68532386e1..537e97aacf 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -6219,8 +6219,13 @@ void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
 
 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
     SlowPathCode* slow_path, Register class_reg) {
-  __ cmpb(Address(class_reg,  mirror::Class::StatusOffset().Int32Value()),
-          Immediate(enum_cast<>(ClassStatus::kInitialized)));
+  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+  const size_t status_byte_offset =
+      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+  constexpr uint32_t shifted_initialized_value =
+      enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
+
+  __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_initialized_value));
   __ j(kBelow, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
   // No need for memory fence, thanks to the X86 memory model.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 1f8d822507..4a6428592e 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -5425,8 +5425,13 @@ void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
 
 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
     SlowPathCode* slow_path, CpuRegister class_reg) {
-  __ cmpb(Address(class_reg,  mirror::Class::StatusOffset().Int32Value()),
-          Immediate(enum_cast<>(ClassStatus::kInitialized)));
+  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+  const size_t status_byte_offset =
+      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+  constexpr uint32_t shifted_initialized_value =
+      enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
+
+  __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_initialized_value));
   __ j(kBelow, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
   // No need for memory fence, thanks to the x86-64 memory model.
diff --git a/runtime/base/bit_string.h b/runtime/base/bit_string.h
index bfbe8eaf71..7d9fb70de7 100644
--- a/runtime/base/bit_string.h
+++ b/runtime/base/bit_string.h
@@ -114,13 +114,13 @@ inline std::ostream& operator<<(std::ostream& os, const BitStringChar& bc) {
 /**
  *                           BitString
  *
- * lsb (least significant bit)                              msb
- *  +------------+------------+------------+-----+------------+
- *  |            |            |            |     |            |
- *  |   Char0    |    Char1   |   Char2    | ... |   CharN    |
- *  |            |            |            |     |            |
- *  +------------+------------+------------+-----+------------+
- *   <- len[0] -> <- len[1] -> <- len[2] ->  ...  <- len[N] ->
+ * MSB (most significant bit)                                LSB
+ *  +------------+-----+------------+------------+------------+
+ *  |            |     |            |            |            |
+ *  |   CharN    | ... |    Char2   |   Char1    |   Char0    |
+ *  |            |     |            |            |            |
+ *  +------------+-----+------------+------------+------------+
+ *   <- len[N] ->  ...  <- len[2] -> <- len[1] -> <- len[0] ->
  *
  * Stores up to "N+1" characters in a subset of a machine word. Each character has a different
  * bitlength, as defined by len[pos]. This BitString can be nested inside of a BitStruct
@@ -145,7 +145,7 @@ struct BitString {
   // As this is meant to be used only with "SubtypeCheckInfo",
   // the bitlengths and the maximum string length is tuned by maximizing the coverage of "Assigned"
   // bitstrings for instance-of and check-cast targets during Optimizing compilation.
-  static constexpr size_t kBitSizeAtPosition[] = {12, 3, 8};          // len[] from header docs.
+  static constexpr size_t kBitSizeAtPosition[] = {12, 4, 11};         // len[] from header docs.
   static constexpr size_t kCapacity = arraysize(kBitSizeAtPosition);  // MaxBitstringLen above.
 
   // How many bits are needed to represent BitString[0..position)?
@@ -165,8 +165,7 @@ struct BitString {
   // (e.g. to use with BitField{Insert,Extract,Clear}.)
   static constexpr size_t GetLsbForPosition(size_t position) {
     DCHECK_GE(kCapacity, position);
-    constexpr size_t kMaximumBitLength = GetBitLengthTotalAtPosition(kCapacity);
-    return kMaximumBitLength - GetBitLengthTotalAtPosition(position + 1u);
+    return GetBitLengthTotalAtPosition(position);
   }
 
   // How many bits are needed for a BitStringChar at the position?
@@ -183,9 +182,7 @@ struct BitString {
   BitStringChar operator[](size_t idx) const {
     DCHECK_LT(idx, kCapacity);
 
-    StorageType data =
-        BitFieldExtract(storage_,
-                        GetLsbForPosition(idx), kBitSizeAtPosition[idx]);
+    StorageType data = BitFieldExtract(storage_, GetLsbForPosition(idx), kBitSizeAtPosition[idx]);
 
     return BitStringChar(data, kBitSizeAtPosition[idx]);
   }
@@ -259,17 +256,10 @@ struct BitString {
     DCHECK_GE(kCapacity, end);
     BitString copy = *this;
 
-    size_t bit_size = 0;
-    for (size_t idx = end; idx < kCapacity; ++idx) {
-      bit_size += kBitSizeAtPosition[idx];
-    }
-    // TODO: precompute above table.
-
-    if (bit_size > 0) {
-      StorageType data =
-          BitFieldClear(copy.storage_,
-                        GetLsbForPosition(kCapacity),
-                        bit_size);
+    if (end < kCapacity) {
+      size_t lsb = GetLsbForPosition(end);
+      size_t bit_size = GetLsbForPosition(kCapacity) - lsb;
+      StorageType data = BitFieldClear(copy.storage_, lsb, bit_size);
       copy.storage_ = data;
     }
 
diff --git a/runtime/base/bit_string_test.cc b/runtime/base/bit_string_test.cc
index 96aa154ef3..23274e3f2f 100644
--- a/runtime/base/bit_string_test.cc
+++ b/runtime/base/bit_string_test.cc
@@ -65,7 +65,7 @@ size_t AsUint(const T& value) {
   return uint_value;
 }
 
-// Make max bitstring, e.g. BitString[4095,7,255] for {12,3,8}
+// Make max bitstring, e.g. BitString[4095,15,2047] for {12,4,11}
 template <size_t kCount = BitString::kCapacity>
 BitString MakeBitStringMax() {
   BitString bs{};
@@ -87,15 +87,14 @@ BitString SetBitStringCharAt(BitString bit_string, size_t i, size_t val) {
 #define EXPECT_BITSTRING_STR(expected_str, actual_value)                       \
   EXPECT_STREQ((expected_str), Stringify((actual_value)).c_str())
 
+// TODO: Consider removing this test, it's kind of replicating the logic in GetLsbForPosition().
 TEST(InstanceOfBitString, GetLsbForPosition) {
   ASSERT_LE(3u, BitString::kCapacity);
   // Test will fail if kCapacity is not at least 3. Update it.
-  EXPECT_EQ(0u, BitString::GetLsbForPosition(BitString::kCapacity - 1u));
-  EXPECT_EQ(BitString::kBitSizeAtPosition[BitString::kCapacity - 1u],
-            BitString::GetLsbForPosition(BitString::kCapacity - 2u));
-  EXPECT_EQ(BitString::kBitSizeAtPosition[BitString::kCapacity - 1u] +
-                BitString::kBitSizeAtPosition[BitString::kCapacity - 2u],
-            BitString::GetLsbForPosition(BitString::kCapacity - 3u));
+  EXPECT_EQ(0u, BitString::GetLsbForPosition(0u));
+  EXPECT_EQ(BitString::kBitSizeAtPosition[0u], BitString::GetLsbForPosition(1u));
+  EXPECT_EQ(BitString::kBitSizeAtPosition[0u] + BitString::kBitSizeAtPosition[1u],
+            BitString::GetLsbForPosition(2u));
 }
 
 TEST(InstanceOfBitString, ToString) {
@@ -126,8 +125,8 @@ TEST(InstanceOfBitString, ReadWrite) {
   // Each maximal value should be tested here for each position.
   uint32_t max_bitstring_ints[] = {
       MaxInt<uint32_t>(12),
-      MaxInt<uint32_t>(3),
-      MaxInt<uint32_t>(8),
+      MaxInt<uint32_t>(4),
+      MaxInt<uint32_t>(11),
   };
 
   // Update tests if changing the tuning values above.
@@ -151,14 +150,13 @@ constexpr auto MaxForPos() {
 }
 
 TEST(InstanceOfBitString, MemoryRepresentation) {
-  // Verify that the lower positions are stored in more significant bits.
+  // Verify that the lower positions are stored in less significant bits.
   BitString bs = MakeBitString({MaxForPos<0>(), MaxForPos<1>()});
   BitString::StorageType as_int = static_cast<BitString::StorageType>(bs);
 
-  // Below tests assumes the capacity is 3. Update if it this changes.
-  ASSERT_EQ(3u, BitString::kCapacity);
-  EXPECT_EQ(MaxForPos<0>()  << (BitString::kBitSizeAtPosition[2] + BitString::kBitSizeAtPosition[1]) |
-               (MaxForPos<1>() << BitString::kBitSizeAtPosition[2]),
+  // Below tests assumes the capacity is at least 3.
+  ASSERT_LE(3u, BitString::kCapacity);
+  EXPECT_EQ((MaxForPos<0>() << 0) | (MaxForPos<1>() << BitString::kBitSizeAtPosition[0]),
             as_int);
 }
 
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index 34cddbff6a..d2a99f1a39 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -46,10 +46,14 @@ template<typename T>
 constexpr int CLZ(T x) {
   static_assert(std::is_integral<T>::value, "T must be integral");
   static_assert(std::is_unsigned<T>::value, "T must be unsigned");
-  static_assert(sizeof(T) <= sizeof(long long),  // NOLINT [runtime/int] [4]
-                "T too large, must be smaller than long long");
+  static_assert(std::numeric_limits<T>::radix == 2, "Unexpected radix!");
+  static_assert(sizeof(T) == sizeof(uint64_t) || sizeof(T) <= sizeof(uint32_t),
+                "Unsupported sizeof(T)");
   DCHECK_NE(x, 0u);
-  return (sizeof(T) == sizeof(uint32_t)) ? __builtin_clz(x) : __builtin_clzll(x);
+  constexpr bool is_64_bit = (sizeof(T) == sizeof(uint64_t));
+  constexpr size_t adjustment =
+      is_64_bit ? 0u : std::numeric_limits<uint32_t>::digits - std::numeric_limits<T>::digits;
+  return is_64_bit ? __builtin_clzll(x) : __builtin_clz(x) - adjustment;
 }
 
 // Similar to CLZ except that on zero input it returns bitwidth and supports signed integers.
@@ -65,10 +69,10 @@ constexpr int CTZ(T x) {
   static_assert(std::is_integral<T>::value, "T must be integral");
   // It is not unreasonable to ask for trailing zeros in a negative number. As such, do not check
   // that T is an unsigned type.
-  static_assert(sizeof(T) <= sizeof(long long),  // NOLINT [runtime/int] [4]
-                "T too large, must be smaller than long long");
+  static_assert(sizeof(T) == sizeof(uint64_t) || sizeof(T) <= sizeof(uint32_t),
+                "Unsupported sizeof(T)");
   DCHECK_NE(x, static_cast<T>(0));
-  return (sizeof(T) == sizeof(uint32_t)) ? __builtin_ctz(x) : __builtin_ctzll(x);
+  return (sizeof(T) == sizeof(uint64_t)) ? __builtin_ctzll(x) : __builtin_ctz(x);
 }
 
 // Similar to CTZ except that on zero input it returns bitwidth and supports signed integers.
diff --git a/runtime/image.cc b/runtime/image.cc
index b9d955c08c..dd0c1487c0 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -26,7 +26,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '5', '2', '\0' };  // 4-bit ClassStatus.
+const uint8_t ImageHeader::kImageVersion[] = { '0', '5', '3', '\0' };  // ClassStatus in high bits.
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 55c588930e..84b032620f 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -81,9 +81,9 @@ class MANAGED Class FINAL : public Object {
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ClassStatus GetStatus() REQUIRES_SHARED(Locks::mutator_lock_) {
     // Avoid including "subtype_check_bits_and_status.h" to get the field.
-    // The ClassStatus is always in the least-significant bits of status_.
+    // The ClassStatus is always in the 4 most-significant of status_.
     return enum_cast<ClassStatus>(
-        static_cast<uint32_t>(GetField32Volatile<kVerifyFlags>(StatusOffset())) & 0xff);
+        static_cast<uint32_t>(GetField32Volatile<kVerifyFlags>(StatusOffset())) >> (32 - 4));
   }
 
   // This is static because 'this' may be moved by GC.
diff --git a/runtime/oat.h b/runtime/oat.h
index 6d4f18bdb1..36099b93dc 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@ class InstructionSetFeatures;
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  // Last oat version changed reason: 4-bit ClassStatus.
-  static constexpr uint8_t kOatVersion[] = { '1', '3', '6', '\0' };
+  // Last oat version changed reason: ClassStatus in high bits.
+  static constexpr uint8_t kOatVersion[] = { '1', '3', '7', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/subtype_check_bits.h b/runtime/subtype_check_bits.h
index 4305ff849a..462f203978 100644
--- a/runtime/subtype_check_bits.h
+++ b/runtime/subtype_check_bits.h
@@ -26,22 +26,22 @@ namespace art {
 /**
  * The SubtypeCheckBits memory layout (in bits):
  *
- *                                 Variable
- *                                     |
- *        <---- up to 23 bits ---->    v               +---> 1 bit
- *                                                     |
- *  +-------------------------+--------+-----------+---++
- *  |             Bitstring                        |    |
- *  +-------------------------+--------+-----------+    |
- *  |      Path To Root       |  Next  | (unused)  | OF |
- *  +---+---------------------+--------+           |    |
- *  |   |    |    |    | ...  |        | (0....0)  |    |
- *  +---+---------------------+--------+-----------+----+
- * MSB                                                LSB
+ *   1 bit       Variable
+ *     |             |
+ *     v             v     <---- up to 23 bits ---->
+ *
+ *  +----+-----------+--------+-------------------------+
+ *  |    |                  Bitstring                   |
+ *  +    +-----------+--------+-------------------------+
+ *  | OF | (unused)  |  Next  |      Path To Root       |
+ *  +    |           |--------+----+----------+----+----+
+ *  |    | (0....0)  |        |    |   ...    |    |    |
+ *  +----+-----------+--------+----+----------+----+----+
+ * MSB (most significant bit)                          LSB
  *
  * The bitstring takes up to 23 bits; anything exceeding that is truncated:
  * - Path To Root is a list of chars, encoded as a BitString:
- *     starting at the root (in MSB), each character is a sibling index unique to the parent,
+ *     starting at the root (in LSB), each character is a sibling index unique to the parent,
  *   Paths longer than BitString::kCapacity are truncated to fit within the BitString.
  * - Next is a single BitStringChar (immediatelly following Path To Root)
  *     When new children are assigned paths, they get allocated the parent's Next value.
@@ -57,8 +57,8 @@ namespace art {
  * See subtype_check.h and subtype_check_info.h for more details.
  */
 BITSTRUCT_DEFINE_START(SubtypeCheckBits, /*size*/ BitString::BitStructSizeOf() + 1u)
-  BitStructUint</*lsb*/0, /*width*/1> overflow_;
-  BitStructField<BitString, /*lsb*/1> bitstring_;
+  BitStructField<BitString, /*lsb*/ 0> bitstring_;
+  BitStructUint</*lsb*/ BitString::BitStructSizeOf(), /*width*/ 1> overflow_;
 BITSTRUCT_DEFINE_END(SubtypeCheckBits);
 
 }  // namespace art
diff --git a/runtime/subtype_check_bits_and_status.h b/runtime/subtype_check_bits_and_status.h
index 11cb9b9d21..321a723985 100644
--- a/runtime/subtype_check_bits_and_status.h
+++ b/runtime/subtype_check_bits_and_status.h
@@ -19,6 +19,7 @@
 
 #include "base/bit_struct.h"
 #include "base/bit_utils.h"
+#include "base/casts.h"
 #include "class_status.h"
 #include "subtype_check_bits.h"
 
@@ -36,13 +37,13 @@ static constexpr size_t NonNumericBitSizeOf() {
 }
 
 /**
- *  MSB                                                                  LSB
- *  +---------------------------------------------------+---------------+
- *  |                                                   |               |
- *  |                 SubtypeCheckBits                  |  ClassStatus  |
- *  |                                                   |               |
- *  +---------------------------------------------------+---------------+
- *            <-----     24 bits     ----->               <-- 8 bits -->
+ * MSB (most significant bit)                                          LSB
+ *  +---------------+---------------------------------------------------+
+ *  |               |                                                   |
+ *  |  ClassStatus  |                 SubtypeCheckBits                  |
+ *  |               |                                                   |
+ *  +---------------+---------------------------------------------------+
+ *   <-- 4 bits -->             <-----     28 bits     ----->
  *
  * Invariants:
  *
@@ -53,20 +54,25 @@ static constexpr size_t NonNumericBitSizeOf() {
  * This enables a highly efficient path comparison between any two labels:
  *
  * src <: target :=
- *   src >> (32 - len(path-to-root(target))) == target >> (32 - len(path-to-root(target))
+ *   (src & mask) == (target & mask)  where  mask := (1u << len(path-to-root(target)) - 1u
  *
- * In the above example, the RHS operands are a function of the depth. Since the target
- * is known at compile time, it becomes:
- *
- *   src >> #imm_target_shift == #imm
+ * In the above example, the `len()` (and thus `mask`) is a function of the depth.
+ * Since the target is known at compile time, it becomes
+ *   (src & #imm_mask) == #imm
+ * or
+ *   ((src - #imm) << #imm_shift_to_remove_high_bits) == 0
+ * or a similar expression chosen for the best performance or code size.
  *
  * (This requires that path-to-root in `target` is not truncated, i.e. it is in the Assigned state).
  */
-static constexpr size_t kClassStatusBitSize = 8u;  // NonNumericBitSizeOf<ClassStatus>()
+static constexpr size_t kClassStatusBitSize = MinimumBitsToStore(enum_cast<>(ClassStatus::kLast));
+static_assert(kClassStatusBitSize == 4u, "ClassStatus should need 4 bits.");
 BITSTRUCT_DEFINE_START(SubtypeCheckBitsAndStatus, BitSizeOf<BitString::StorageType>())
-  BitStructField<ClassStatus, /*lsb*/0, /*width*/kClassStatusBitSize> status_;
-  BitStructField<SubtypeCheckBits, /*lsb*/kClassStatusBitSize> subtype_check_info_;
-  BitStructInt</*lsb*/0, /*width*/BitSizeOf<BitString::StorageType>()> int32_alias_;
+  BitStructField<SubtypeCheckBits, /*lsb*/ 0> subtype_check_info_;
+  BitStructField<ClassStatus,
+                 /*lsb*/ SubtypeCheckBits::BitStructSizeOf(),
+                 /*width*/ kClassStatusBitSize> status_;
+  BitStructInt</*lsb*/ 0, /*width*/ BitSizeOf<BitString::StorageType>()> int32_alias_;
 BITSTRUCT_DEFINE_END(SubtypeCheckBitsAndStatus);
 
 // Use the spare alignment from "ClassStatus" to store all the new SubtypeCheckInfo data.
diff --git a/runtime/subtype_check_info.h b/runtime/subtype_check_info.h
index 61d590bd59..08db77030e 100644
--- a/runtime/subtype_check_info.h
+++ b/runtime/subtype_check_info.h
@@ -296,8 +296,7 @@ struct SubtypeCheckInfo {
   BitString::StorageType GetEncodedPathToRoot() const {
     BitString::StorageType data = static_cast<BitString::StorageType>(GetPathToRoot());
     // Bit strings are logically in the least-significant memory.
-    // Shift it so the bits are all most-significant.
-    return data << (BitSizeOf(data) - BitStructSizeOf<BitString>());
+    return data;
   }
 
   // Retrieve the path to root bitstring mask as a plain uintN_t that is amenable to
@@ -305,17 +304,7 @@ struct SubtypeCheckInfo {
   BitString::StorageType GetEncodedPathToRootMask() const {
     size_t num_bitchars = GetSafeDepth();
     size_t bitlength = BitString::GetBitLengthTotalAtPosition(num_bitchars);
-
-    BitString::StorageType mask_all =
-        std::numeric_limits<BitString::StorageType>::max();
-    BitString::StorageType mask_lsb =
-        MaskLeastSignificant<BitString::StorageType>(
-            BitSizeOf<BitString::StorageType>() - bitlength);
-
-    BitString::StorageType result = mask_all & ~mask_lsb;
-
-    // TODO: refactor above code into MaskMostSignificant?
-    return result;
+    return MaskLeastSignificant<BitString::StorageType>(bitlength);
   }
 
   // Get the "Next" bitchar, assuming that there is one to get.
diff --git a/runtime/subtype_check_info_test.cc b/runtime/subtype_check_info_test.cc
index 338d75a285..91fcc07d65 100644
--- a/runtime/subtype_check_info_test.cc
+++ b/runtime/subtype_check_info_test.cc
@@ -65,7 +65,7 @@ size_t AsUint(const T& value) {
   return uint_value;
 }
 
-// Make max bistring, e.g. BitString[4095,7,255] for {12,3,8}
+// Make max bistring, e.g. BitString[4095,15,2047] for {12,4,11}
 template <size_t kCount = BitString::kCapacity>
 BitString MakeBitStringMax() {
   BitString bs{};
@@ -258,60 +258,62 @@ size_t LenForPos() { return BitString::GetBitLengthTotalAtPosition(kPos); }
 TEST_F(SubtypeCheckInfoTest, EncodedPathToRoot) {
   using StorageType = BitString::StorageType;
 
-  SubtypeCheckInfo io =
+  SubtypeCheckInfo sci =
       MakeSubtypeCheckInfo(/*path_to_root*/MakeBitStringMax(),
                            /*next*/BitStringChar{},
                            /*overflow*/false,
                            /*depth*/BitString::kCapacity);
-  // 0b11111...000 where MSB == 1, and leading 1s = the maximum bitstring representation.
-  EXPECT_EQ(MaxInt<StorageType>(LenForPos()) << (BitSizeOf<StorageType>() - LenForPos()),
-            io.GetEncodedPathToRoot());
-
-  EXPECT_EQ(MaxInt<StorageType>(LenForPos()) << (BitSizeOf<StorageType>() - LenForPos()),
-            io.GetEncodedPathToRootMask());
-
-  // 0b11111...000 where MSB == 1, and leading 1s = the maximum bitstring representation.
+  // 0b000...111 where LSB == 1, and trailing 1s = the maximum bitstring representation.
+  EXPECT_EQ(MaxInt<StorageType>(LenForPos()), sci.GetEncodedPathToRoot());
 
   // The rest of this test is written assuming kCapacity == 3 for convenience.
   // Please update the test if this changes.
   ASSERT_EQ(3u, BitString::kCapacity);
   ASSERT_EQ(12u, BitString::kBitSizeAtPosition[0]);
-  ASSERT_EQ(3u, BitString::kBitSizeAtPosition[1]);
-  ASSERT_EQ(8u, BitString::kBitSizeAtPosition[2]);
+  ASSERT_EQ(4u, BitString::kBitSizeAtPosition[1]);
+  ASSERT_EQ(11u, BitString::kBitSizeAtPosition[2]);
 
-  SubtypeCheckInfo io2 =
+  SubtypeCheckInfo sci2 =
       MakeSubtypeCheckInfoUnchecked(MakeBitStringMax<2u>(),
                                    /*overflow*/false,
                                    /*depth*/BitString::kCapacity);
 
-#define MAKE_ENCODED_PATH(pos0, pos1, pos2) (((pos0) << 3u << 8u << 9u) | ((pos1) << 8u << 9u) | ((pos2) << 9u))
+#define MAKE_ENCODED_PATH(pos0, pos1, pos2) \
+    (((pos0) << 0) | \
+     ((pos1) << BitString::kBitSizeAtPosition[0]) | \
+     ((pos2) << (BitString::kBitSizeAtPosition[0] + BitString::kBitSizeAtPosition[1])))
 
-  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b111, 0b0), io2.GetEncodedPathToRoot());
-  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b111, 0b11111111), io2.GetEncodedPathToRootMask());
+  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b1111, 0b0),
+            sci2.GetEncodedPathToRoot());
+  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b1111, 0b11111111111),
+            sci2.GetEncodedPathToRootMask());
 
-  SubtypeCheckInfo io3 =
+  SubtypeCheckInfo sci3 =
       MakeSubtypeCheckInfoUnchecked(MakeBitStringMax<2u>(),
                                    /*overflow*/false,
                                    /*depth*/BitString::kCapacity - 1u);
 
-  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b111, 0b0), io3.GetEncodedPathToRoot());
-  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b111, 0b0), io3.GetEncodedPathToRootMask());
+  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b1111, 0b0),
+            sci3.GetEncodedPathToRoot());
+  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b1111, 0b0),
+            sci3.GetEncodedPathToRootMask());
 
-  SubtypeCheckInfo io4 =
+  SubtypeCheckInfo sci4 =
       MakeSubtypeCheckInfoUnchecked(MakeBitString({0b1010101u}),
                                    /*overflow*/false,
                                    /*depth*/BitString::kCapacity - 2u);
 
-  EXPECT_EQ(MAKE_ENCODED_PATH(0b1010101u, 0b000, 0b0), io4.GetEncodedPathToRoot());
-  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b000, 0b0), io4.GetEncodedPathToRootMask());
+  EXPECT_EQ(MAKE_ENCODED_PATH(0b1010101u, 0b0000, 0b0), sci4.GetEncodedPathToRoot());
+  EXPECT_EQ(MAKE_ENCODED_PATH(MaxInt<BitString::StorageType>(12), 0b0000, 0b0),
+            sci4.GetEncodedPathToRootMask());
 }
 
 TEST_F(SubtypeCheckInfoTest, NewForRoot) {
-  SubtypeCheckInfo io = SubtypeCheckInfo::CreateRoot();
-  EXPECT_EQ(SubtypeCheckInfo::kAssigned, io.GetState());  // Root is always assigned.
-  EXPECT_EQ(0u, GetPathToRoot(io).Length());  // Root's path length is 0.
-  EXPECT_TRUE(HasNext(io));  // Root always has a "Next".
-  EXPECT_EQ(MakeBitStringChar(1u), io.GetNext());  // Next>=1 to disambiguate from Uninitialized.
+  SubtypeCheckInfo sci = SubtypeCheckInfo::CreateRoot();
+  EXPECT_EQ(SubtypeCheckInfo::kAssigned, sci.GetState());  // Root is always assigned.
+  EXPECT_EQ(0u, GetPathToRoot(sci).Length());  // Root's path length is 0.
+  EXPECT_TRUE(HasNext(sci));  // Root always has a "Next".
+  EXPECT_EQ(MakeBitStringChar(1u), sci.GetNext());  // Next>=1 to disambiguate from Uninitialized.
 }
 
 TEST_F(SubtypeCheckInfoTest, CopyCleared) {
author	Vladimir Marko <vmarko@google.com>	2018-01-04 18:42:57 +0000
committer	Vladimir Marko <vmarko@google.com>	2018-01-10 14:30:26 +0000
commit	dc682aa9d0eae1a851af059434adb6f6cf8f06f8 (patch)
tree	f93f00493ee5887b05b42a6a5dd99eb6794daad4
parent	d6b7e8c63f8eca25460f56f66dcae15eaa897ff0 (diff)