ART: Arm32 packed-switch jump tables Add jump table support to the thumb2 assembler. Jump tables are a collection of labels for the case targets, and an anchor label denoting the position of the jump. Use the jump table support to implement packed-switch support for arm32. Add tests for BindTrackedLabel and JumpTable to the thumb2 assembler test. Bug: 24092914 Change-Id: I5c84f193dfebf9e07f48678efc8bd151bb1410dd

commit: 7cffc3b0004d32faffc552c0a59286f369b21504 [log] [tgz]
author: Andreas Gampe <agampe@google.com> Mon Oct 19 21:31:53 2015 -0700
committer: Andreas Gampe <agampe@google.com> Fri Oct 30 11:15:26 2015 -0700
tree: e3838b8ba2a782ed91ef8faa381362b7a686a32a
parent: 9e1b56f0e77aa5b6c72374b86d0cef58484ddcaa [diff]
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 807beda..68e3956 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc

@@ -16,6 +16,8 @@
 
 #include "assembler_arm.h"
 
+#include <algorithm>
+
 #include "base/bit_utils.h"
 #include "base/logging.h"
 #include "entrypoints/quick/quick_entrypoints.h"
@@ -922,5 +924,24 @@
   return value | i << 26 | imm3 << 12 | a << 7;
 }
 
+void ArmAssembler::FinalizeTrackedLabels() {
+  if (!tracked_labels_.empty()) {
+    // This array should be sorted, as assembly is generated in linearized order. It isn't
+    // technically required, but GetAdjustedPosition() used in AdjustLabelPosition() can take
+    // advantage of it. So ensure that it's actually the case.
+    DCHECK(std::is_sorted(
+        tracked_labels_.begin(),
+        tracked_labels_.end(),
+        [](const Label* lhs, const Label* rhs) { return lhs->Position() < rhs->Position(); }));
+
+    Label* last_label = nullptr;  // Track duplicates, we must not adjust twice.
+    for (Label* label : tracked_labels_) {
+      DCHECK_NE(label, last_label);
+      AdjustLabelPosition(label);
+      last_label = label;
+    }
+  }
+}
+
 }  // namespace arm
 }  // namespace art

diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index d59bc6b..4a6e6d7 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h

@@ -77,6 +77,45 @@
   DISALLOW_COPY_AND_ASSIGN(Literal);
 };
 
+// Jump table: table of labels emitted after the literals. Similar to literals.
+class JumpTable {
+ public:
+  explicit JumpTable(std::vector<Label*>&& labels)
+      : label_(), anchor_label_(), labels_(std::move(labels)) {
+  }
+
+  uint32_t GetSize() const {
+    return static_cast<uint32_t>(labels_.size()) * sizeof(uint32_t);
+  }
+
+  const std::vector<Label*>& GetData() const {
+    return labels_;
+  }
+
+  Label* GetLabel() {
+    return &label_;
+  }
+
+  const Label* GetLabel() const {
+    return &label_;
+  }
+
+  Label* GetAnchorLabel() {
+    return &anchor_label_;
+  }
+
+  const Label* GetAnchorLabel() const {
+    return &anchor_label_;
+  }
+
+ private:
+  Label label_;
+  Label anchor_label_;
+  std::vector<Label*> labels_;
+
+  DISALLOW_COPY_AND_ASSIGN(JumpTable);
+};
+
 class ShifterOperand {
  public:
   ShifterOperand() : type_(kUnknown), rm_(kNoRegister), rs_(kNoRegister),
@@ -685,6 +724,8 @@
     AddConstant(rd, rd, value, cond, set_cc);
   }
 
+  virtual void CmpConstant(Register rn, int32_t value, Condition cond = AL) = 0;
+
   // Load and Store. May clobber IP.
   virtual void LoadImmediate(Register rd, int32_t value, Condition cond = AL) = 0;
   void LoadSImmediate(SRegister sd, float value, Condition cond = AL) {
@@ -996,11 +1037,43 @@
     b(label);
   }
 
+  // Jump table support. This is split into three functions:
+  //
+  // * CreateJumpTable creates the internal metadata to track the jump targets, and emits code to
+  // load the base address of the jump table.
+  //
+  // * EmitJumpTableDispatch emits the code to actually jump, assuming that the right table value
+  // has been loaded into a register already.
+  //
+  // * FinalizeTables emits the jump table into the literal pool. This can only be called after the
+  // labels for the jump targets have been finalized.
+
+  // Create a jump table for the given labels that will be emitted when finalizing. Create a load
+  // sequence (or placeholder) that stores the base address into the given register. When the table
+  // is emitted, offsets will be relative to the location EmitJumpTableDispatch was called on (the
+  // anchor).
+  virtual JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) = 0;
+
+  // Emit the jump-table jump, assuming that the right value was loaded into displacement_reg.
+  virtual void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) = 0;
+
+  // Bind a Label that needs to be updated by the assembler in FinalizeCode() if its position
+  // changes due to branch/literal fixup.
+  void BindTrackedLabel(Label* label) {
+    Bind(label);
+    tracked_labels_.push_back(label);
+  }
+
  protected:
   // Returns whether or not the given register is used for passing parameters.
   static int RegisterCompare(const Register* reg1, const Register* reg2) {
     return *reg1 - *reg2;
   }
+
+  void FinalizeTrackedLabels();
+
+  // Tracked labels. Use a vector, as we need to sort before adjusting.
+  std::vector<Label*> tracked_labels_;
 };
 
 // Slowpath entered when Thread::Current()->_exception is non-null

diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 6e7c828..a7dbacd 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc

@@ -1385,6 +1385,21 @@
   }
 }
 
+void Arm32Assembler::CmpConstant(Register rn, int32_t value, Condition cond) {
+  ShifterOperand shifter_op;
+  if (ShifterOperandCanHoldArm32(value, &shifter_op)) {
+    cmp(rn, shifter_op, cond);
+  } else if (ShifterOperandCanHoldArm32(~value, &shifter_op)) {
+    cmn(rn, shifter_op, cond);
+  } else {
+    movw(IP, Low16Bits(value), cond);
+    uint16_t value_high = High16Bits(value);
+    if (value_high != 0) {
+      movt(IP, value_high, cond);
+    }
+    cmp(rn, ShifterOperand(IP), cond);
+  }
+}
 
 void Arm32Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
   ShifterOperand shifter_op;
@@ -1584,6 +1599,23 @@
   b(label, NE);
 }
 
+JumpTable* Arm32Assembler::CreateJumpTable(std::vector<Label*>&& labels ATTRIBUTE_UNUSED,
+                                           Register base_reg ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "CreateJumpTable is not supported on ARM32";
+  UNREACHABLE();
+}
+
+void Arm32Assembler::EmitJumpTableDispatch(JumpTable* jump_table ATTRIBUTE_UNUSED,
+                                           Register displacement_reg ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "EmitJumpTableDispatch is not supported on ARM32";
+  UNREACHABLE();
+}
+
+void Arm32Assembler::FinalizeCode() {
+  ArmAssembler::FinalizeCode();
+  // Currently the arm32 assembler does not support fixups, and thus no tracking. We must not call
+  // FinalizeTrackedLabels(), which would lead to an abort.
+}
 
 }  // namespace arm
 }  // namespace art

diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 4646538..5233dcb 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h

@@ -261,6 +261,8 @@
   void AddConstant(Register rd, Register rn, int32_t value,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
+  void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE;
+
   // Load and Store. May clobber IP.
   void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
   void MarkExceptionHandler(Label* label) OVERRIDE;
@@ -308,6 +310,11 @@
 
   void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
 
+  JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
+  void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE;
+
+  void FinalizeCode() OVERRIDE;
+
  private:
   void EmitType01(Condition cond,
                   int type,

diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index cc87856..fb3aa1e 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc

@@ -92,7 +92,7 @@
   label->BindTo(bound_pc);
 }
 
-void Thumb2Assembler::BindLiterals() {
+uint32_t Thumb2Assembler::BindLiterals() {
   // We don't add the padding here, that's done only after adjusting the Fixup sizes.
   uint32_t code_size = buffer_.Size();
   for (Literal& lit : literals_) {
@@ -100,6 +100,15 @@
     BindLabel(label, code_size);
     code_size += lit.GetSize();
   }
+  return code_size;
+}
+
+void Thumb2Assembler::BindJumpTables(uint32_t code_size) {
+  for (JumpTable& table : jump_tables_) {
+    Label* label = table.GetLabel();
+    BindLabel(label, code_size);
+    code_size += table.GetSize();
+  }
 }
 
 void Thumb2Assembler::AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size,
@@ -144,7 +153,7 @@
       AdjustFixupIfNeeded(fixup, &current_code_size, &fixups_to_recalculate);
     } while (!fixups_to_recalculate.empty());
 
-    if ((current_code_size & 2) != 0 && !literals_.empty()) {
+    if ((current_code_size & 2) != 0 && (!literals_.empty() || !jump_tables_.empty())) {
       // If we need to add padding before literals, this may just push some out of range,
       // so recalculate all load literals. This makes up for the fact that we don't mark
       // load literal as a dependency of all previous Fixups even though it actually is.
@@ -173,6 +182,13 @@
       label->Reinitialize();
       label->BindTo(old_position + literals_adjustment);
     }
+    for (JumpTable& table : jump_tables_) {
+      Label* label = table.GetLabel();
+      DCHECK(label->IsBound());
+      int old_position = label->Position();
+      label->Reinitialize();
+      label->BindTo(old_position + literals_adjustment);
+    }
   }
 
   return current_code_size;
@@ -229,6 +245,43 @@
   }
 }
 
+void Thumb2Assembler::EmitJumpTables() {
+  if (!jump_tables_.empty()) {
+    // Jump tables require 4 byte alignment. (We don't support byte and half-word jump tables.)
+    uint32_t code_size = buffer_.Size();
+    DCHECK_ALIGNED(code_size, 2);
+    if ((code_size & 2u) != 0u) {
+      Emit16(0);
+    }
+    for (JumpTable& table : jump_tables_) {
+      // Bulk ensure capacity, as this may be large.
+      size_t orig_size = buffer_.Size();
+      buffer_.ExtendCapacity(orig_size + table.GetSize());
+#ifndef NDEBUG
+      buffer_.has_ensured_capacity_ = true;
+#endif
+
+      DCHECK_EQ(static_cast<size_t>(table.GetLabel()->Position()), buffer_.Size());
+      int32_t anchor_position = table.GetAnchorLabel()->Position() + 4;
+
+      for (Label* target : table.GetData()) {
+        // Ensure that the label was tracked, so that it will have the right position.
+        DCHECK(std::find(tracked_labels_.begin(), tracked_labels_.end(), target) !=
+                   tracked_labels_.end());
+
+        int32_t offset = target->Position() - anchor_position;
+        buffer_.Emit<int32_t>(offset);
+      }
+
+#ifndef NDEBUG
+      buffer_.has_ensured_capacity_ = false;
+#endif
+      size_t new_size = buffer_.Size();
+      DCHECK_LE(new_size - orig_size, table.GetSize());
+    }
+  }
+}
+
 inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) {
   DCHECK_ALIGNED(offset, 2);
   int16_t encoding = B15 | B14;
@@ -382,12 +435,34 @@
   return B31 | B30 | B29 | B28 | B27 | B23 | B22 | B20 | (rn << 16) | (rt << 12) | offset;
 }
 
+inline int16_t Thumb2Assembler::AdrEncoding16(Register rd, int32_t offset) {
+  DCHECK(IsUint<10>(offset));
+  DCHECK(IsAligned<4>(offset));
+  DCHECK(!IsHighRegister(rd));
+  return B15 | B13 | (rd << 8) | (offset >> 2);
+}
+
+inline int32_t Thumb2Assembler::AdrEncoding32(Register rd, int32_t offset) {
+  DCHECK(IsUint<12>(offset));
+  // Bit     26: offset[11]
+  // Bits 14-12: offset[10-8]
+  // Bits   7-0: offset[7-0]
+  int32_t immediate_mask =
+      ((offset & (1 << 11)) << (26 - 11)) |
+      ((offset & (7 << 8)) << (12 - 8)) |
+      (offset & 0xFF);
+  return B31 | B30 | B29 | B28 | B25 | B19 | B18 | B17 | B16 | (rd << 8) | immediate_mask;
+}
+
 void Thumb2Assembler::FinalizeCode() {
   ArmAssembler::FinalizeCode();
-  BindLiterals();
+  uint32_t size_after_literals = BindLiterals();
+  BindJumpTables(size_after_literals);
   uint32_t adjusted_code_size = AdjustFixups();
   EmitFixups(adjusted_code_size);
   EmitLiterals();
+  FinalizeTrackedLabels();
+  EmitJumpTables();
 }
 
 bool Thumb2Assembler::ShifterOperandCanAlwaysHold(uint32_t immediate) {
@@ -1770,6 +1845,15 @@
     case kLiteralFar:
       return 14u;
 
+    case kLiteralAddr1KiB:
+      return 2u;
+    case kLiteralAddr4KiB:
+      return 4u;
+    case kLiteralAddr64KiB:
+      return 6u;
+    case kLiteralAddrFar:
+      return 10u;
+
     case kLongOrFPLiteral1KiB:
       return 4u;
     case kLongOrFPLiteral256KiB:
@@ -1831,6 +1915,8 @@
     case kLiteral1KiB:
     case kLiteral4KiB:
     case kLongOrFPLiteral1KiB:
+    case kLiteralAddr1KiB:
+    case kLiteralAddr4KiB:
       DCHECK(diff >= 0 || (GetSize() == kLiteral1KiB && diff == -2));
       diff += LiteralPoolPaddingSize(current_code_size);
       // Load literal instructions round down the PC+4 to a multiple of 4, so if the PC
@@ -1843,12 +1929,14 @@
     case kLiteral1MiB:
     case kLiteral64KiB:
     case kLongOrFPLiteral256KiB:
+    case kLiteralAddr64KiB:
       DCHECK_GE(diff, 4);  // The target must be at least 4 bytes after the ADD rX, PC.
       diff -= 4;        // One extra 32-bit MOV.
       diff += LiteralPoolPaddingSize(current_code_size);
       break;
     case kLiteralFar:
     case kLongOrFPLiteralFar:
+    case kLiteralAddrFar:
       DCHECK_GE(diff, 8);  // The target must be at least 4 bytes after the ADD rX, PC.
       diff -= 8;        // Extra MOVW+MOVT; both 32-bit.
       diff += LiteralPoolPaddingSize(current_code_size);
@@ -1929,6 +2017,29 @@
       // This encoding can reach any target.
       break;
 
+    case kLiteralAddr1KiB:
+      DCHECK(!IsHighRegister(rn_));
+      if (IsUint<10>(GetOffset(current_code_size))) {
+        break;
+      }
+      current_code_size += IncreaseSize(kLiteralAddr4KiB);
+      FALLTHROUGH_INTENDED;
+    case kLiteralAddr4KiB:
+      if (IsUint<12>(GetOffset(current_code_size))) {
+        break;
+      }
+      current_code_size += IncreaseSize(kLiteralAddr64KiB);
+      FALLTHROUGH_INTENDED;
+    case kLiteralAddr64KiB:
+      if (IsUint<16>(GetOffset(current_code_size))) {
+        break;
+      }
+      current_code_size += IncreaseSize(kLiteralAddrFar);
+      FALLTHROUGH_INTENDED;
+    case kLiteralAddrFar:
+      // This encoding can reach any target.
+      break;
+
     case kLongOrFPLiteral1KiB:
       if (IsUint<10>(GetOffset(current_code_size))) {
         break;
@@ -2055,6 +2166,42 @@
       break;
     }
 
+    case kLiteralAddr1KiB: {
+      DCHECK(type_ == kLoadLiteralAddr);
+      int16_t encoding = AdrEncoding16(rn_, GetOffset(code_size));
+      buffer->Store<int16_t>(location_, encoding);
+      break;
+    }
+    case kLiteralAddr4KiB: {
+      DCHECK(type_ == kLoadLiteralAddr);
+      int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size));
+      buffer->Store<int16_t>(location_, encoding >> 16);
+      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+      break;
+    }
+    case kLiteralAddr64KiB: {
+      DCHECK(type_ == kLoadLiteralAddr);
+      int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size));
+      int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
+      buffer->Store<int16_t>(location_, mov_encoding >> 16);
+      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
+      buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
+      break;
+    }
+    case kLiteralAddrFar: {
+      DCHECK(type_ == kLoadLiteralAddr);
+      int32_t offset = GetOffset(code_size);
+      int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff);
+      int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff);
+      int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
+      buffer->Store<int16_t>(location_, movw_encoding >> 16);
+      buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
+      buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16);
+      buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
+      buffer->Store<int16_t>(location_ + 8u, add_pc_encoding);
+      break;
+    }
+
     case kLongOrFPLiteral1KiB: {
       int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size));  // DCHECKs type_.
       buffer->Store<int16_t>(location_, encoding >> 16);
@@ -3260,6 +3407,25 @@
   }
 }
 
+void Thumb2Assembler::CmpConstant(Register rn, int32_t value, Condition cond) {
+  // We prefer to select the shorter code sequence rather than selecting add for
+  // positive values and sub for negatives ones, which would slightly improve
+  // the readability of generated code for some constants.
+  ShifterOperand shifter_op;
+  if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, &shifter_op)) {
+    cmp(rn, shifter_op, cond);
+  } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, &shifter_op)) {
+    cmn(rn, shifter_op, cond);
+  } else {
+    CHECK(rn != IP);
+    movw(IP, Low16Bits(value), cond);
+    uint16_t value_high = High16Bits(value);
+    if (value_high != 0) {
+      movt(IP, value_high, cond);
+    }
+    cmp(rn, ShifterOperand(IP), cond);
+  }
+}
 
 void Thumb2Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
   ShifterOperand shifter_op;
@@ -3476,5 +3642,39 @@
     b(label, NE);
   }
 }
+
+JumpTable* Thumb2Assembler::CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) {
+  jump_tables_.emplace_back(std::move(labels));
+  JumpTable* table = &jump_tables_.back();
+  DCHECK(!table->GetLabel()->IsBound());
+
+  bool use32bit = IsForced32Bit() || IsHighRegister(base_reg);
+  uint32_t location = buffer_.Size();
+  Fixup::Size size = use32bit ? Fixup::kLiteralAddr4KiB : Fixup::kLiteralAddr1KiB;
+  FixupId fixup_id = AddFixup(Fixup::LoadLiteralAddress(location, base_reg, size));
+  Emit16(static_cast<uint16_t>(table->GetLabel()->position_));
+  table->GetLabel()->LinkTo(fixup_id);
+  if (use32bit) {
+    Emit16(0);
+  }
+  DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size());
+
+  return table;
+}
+
+void Thumb2Assembler::EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) {
+  CHECK(!IsForced32Bit()) << "Forced 32-bit dispatch not implemented yet";
+  // 32-bit ADD doesn't support PC as an input, so we need a two-instruction sequence:
+  //   SUB ip, ip, #0
+  //   ADD pc, ip, reg
+  // TODO: Implement.
+
+  // The anchor's position needs to be fixed up before we can compute offsets - so make it a tracked
+  // label.
+  BindTrackedLabel(jump_table->GetAnchorLabel());
+
+  add(PC, PC, ShifterOperand(displacement_reg));
+}
+
 }  // namespace arm
 }  // namespace art

diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 055b137..38fd244 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h

@@ -18,6 +18,7 @@
 #define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
 
 #include <deque>
+#include <utility>
 #include <vector>
 
 #include "base/logging.h"
@@ -304,6 +305,8 @@
   void AddConstant(Register rd, Register rn, int32_t value,
                    Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
 
+  void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE;
+
   // Load and Store. May clobber IP.
   void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
   void MarkExceptionHandler(Label* label) OVERRIDE;
@@ -358,6 +361,12 @@
     force_32bit_ = true;
   }
 
+  // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This
+  // will generate a fixup.
+  JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
+  // Emit an ADD PC, X to dispatch a jump-table jump. This will generate a fixup.
+  void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE;
+
  private:
   typedef uint16_t FixupId;
 
@@ -399,6 +408,7 @@
       kCompareAndBranchXZero,     // cbz/cbnz.
       kLoadLiteralNarrow,         // Load narrrow integer literal.
       kLoadLiteralWide,           // Load wide integer literal.
+      kLoadLiteralAddr,           // Load address of literal (used for jump table).
       kLoadFPLiteralSingle,       // Load FP literal single.
       kLoadFPLiteralDouble,       // Load FP literal double.
     };
@@ -429,6 +439,16 @@
       // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes.
       kLiteralFar,
 
+      // Load literal base addr.
+      // ADR rX, label; X < 8; 8 bit immediate, shifted to 10 bit. 2 bytes.
+      kLiteralAddr1KiB,
+      // ADR rX, label; 4KiB offset. 4 bytes.
+      kLiteralAddr4KiB,
+      // MOV rX, imm16 + ADD rX, pc; 64KiB offset. 6 bytes.
+      kLiteralAddr64KiB,
+      // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc; any offset; 10 bytes.
+      kLiteralAddrFar,
+
       // Load long or FP literal variants.
       // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes.
       kLongOrFPLiteral1KiB,
@@ -457,7 +477,7 @@
     }
 
     // Load narrow literal.
-    static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size = kLiteral1KiB) {
+    static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) {
       DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB ||
              size == kLiteral1MiB || size == kLiteralFar);
       DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
@@ -493,6 +513,14 @@
                    AL, kLoadFPLiteralDouble, size, location);
     }
 
+    static Fixup LoadLiteralAddress(uint32_t location, Register rt, Size size) {
+      DCHECK(size == kLiteralAddr1KiB || size == kLiteralAddr4KiB || size == kLiteralAddr64KiB ||
+             size == kLiteralAddrFar);
+      DCHECK(!IsHighRegister(rt) || size != kLiteralAddr1KiB);
+      return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
+                   AL, kLoadLiteralAddr, size, location);
+    }
+
     Type GetType() const {
       return type_;
     }
@@ -756,12 +784,14 @@
   }
 
   void BindLabel(Label* label, uint32_t bound_pc);
-  void BindLiterals();
+  uint32_t BindLiterals();
+  void BindJumpTables(uint32_t code_size);
   void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size,
                            std::deque<FixupId>* fixups_to_recalculate);
   uint32_t AdjustFixups();
   void EmitFixups(uint32_t adjusted_code_size);
   void EmitLiterals();
+  void EmitJumpTables();
 
   static int16_t BEncoding16(int32_t offset, Condition cond);
   static int32_t BEncoding32(int32_t offset, Condition cond);
@@ -778,6 +808,8 @@
   static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset);
   static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset);
   static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset);
+  static int16_t AdrEncoding16(Register rd, int32_t offset);
+  static int32_t AdrEncoding32(Register rd, int32_t offset);
 
   std::vector<Fixup> fixups_;
   std::unique_ptr<FixupId[]> fixup_dependents_;
@@ -786,6 +818,9 @@
   // without invalidating pointers and references to existing elements.
   std::deque<Literal> literals_;
 
+  // Jump table list.
+  std::deque<JumpTable> jump_tables_;
+
   // Data for AdjustedPosition(), see the description there.
   uint32_t last_position_adjustment_;
   uint32_t last_old_position_;

diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 9c08ce0..cb4b20b 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc

@@ -17,6 +17,7 @@
 #include "assembler_thumb2.h"
 
 #include "base/stl_util.h"
+#include "base/stringprintf.h"
 #include "utils/assembler_test.h"
 
 namespace art {
@@ -1011,6 +1012,315 @@
             __ GetAdjustedPosition(label.Position()));
 }
 
+TEST_F(AssemblerThumb2Test, BindTrackedLabel) {
+  Label non_tracked, tracked, branch_target;
+
+  // A few dummy loads on entry.
+  constexpr size_t kLdrR0R0Count = 5;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // A branch that will need to be fixed up.
+  __ cbz(arm::R0, &branch_target);
+
+  // Some more dummy loads.
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Now insert tracked and untracked label.
+  __ Bind(&non_tracked);
+  __ BindTrackedLabel(&tracked);
+
+  // A lot of dummy loads, to ensure the branch needs resizing.
+  constexpr size_t kLdrR0R0CountLong = 60;
+  for (size_t i = 0; i != kLdrR0R0CountLong; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Bind the branch target.
+  __ Bind(&branch_target);
+
+  // One more load.
+  __ ldr(arm::R0, arm::Address(arm::R0));
+
+  std::string expected =
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      "cmp r0, #0\n"                                                       // cbz r0, 1f
+      "beq.n 1f\n" +
+      RepeatInsn(kLdrR0R0Count + kLdrR0R0CountLong, "ldr r0, [r0]\n") +
+      "1:\n"
+      "ldr r0, [r0]\n";
+  DriverStr(expected, "BindTrackedLabel");
+
+  // Expectation is that the tracked label should have moved.
+  EXPECT_LT(non_tracked.Position(), tracked.Position());
+}
+
+TEST_F(AssemblerThumb2Test, JumpTable) {
+  // The jump table. Use three labels.
+  Label label1, label2, label3;
+  std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+  // A few dummy loads on entry, interspersed with 2 labels.
+  constexpr size_t kLdrR0R0Count = 5;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label1);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label2);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Create the jump table, emit the base load.
+  arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+  // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+  // it's being used.
+  __ ldr(arm::R0, arm::Address(arm::R0));
+
+  // Emit the jump
+  __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+  // Some more dummy instructions.
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label3);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {          // Note: odd so there's no alignment
+    __ ldr(arm::R0, arm::Address(arm::R0));              //       necessary, as gcc as emits nops,
+  }                                                      //       whereas we emit 0 != nop.
+
+  static_assert((kLdrR0R0Count + 3) * 2 < 1 * KB, "Too much offset");
+
+  std::string expected =
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L1:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L2:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      "adr r1, .Ljump_table\n"
+      "ldr r0, [r0]\n"
+      ".Lbase:\n"
+      "add pc, r1\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L3:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".align 2\n"
+      ".Ljump_table:\n"
+      ".4byte (.L1 - .Lbase - 4)\n"
+      ".4byte (.L2 - .Lbase - 4)\n"
+      ".4byte (.L3 - .Lbase - 4)\n";
+  DriverStr(expected, "JumpTable");
+}
+
+// Test for >1K fixup.
+TEST_F(AssemblerThumb2Test, JumpTable4K) {
+  // The jump table. Use three labels.
+  Label label1, label2, label3;
+  std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+  // A few dummy loads on entry, interspersed with 2 labels.
+  constexpr size_t kLdrR0R0Count = 5;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label1);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label2);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Create the jump table, emit the base load.
+  arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+  // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+  // it's being used.
+  __ ldr(arm::R0, arm::Address(arm::R0));
+
+  // Emit the jump
+  __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+  // Some more dummy instructions.
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label3);
+  constexpr size_t kLdrR0R0Count2 = 600;               // Note: even so there's no alignment
+  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {       //       necessary, as gcc as emits nops,
+    __ ldr(arm::R0, arm::Address(arm::R0));            //       whereas we emit 0 != nop.
+  }
+
+  static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 > 1 * KB, "Not enough offset");
+  static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 < 4 * KB, "Too much offset");
+
+  std::string expected =
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L1:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L2:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      "adr r1, .Ljump_table\n"
+      "ldr r0, [r0]\n"
+      ".Lbase:\n"
+      "add pc, r1\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L3:\n" +
+      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
+      ".align 2\n"
+      ".Ljump_table:\n"
+      ".4byte (.L1 - .Lbase - 4)\n"
+      ".4byte (.L2 - .Lbase - 4)\n"
+      ".4byte (.L3 - .Lbase - 4)\n";
+  DriverStr(expected, "JumpTable4K");
+}
+
+// Test for >4K fixup.
+TEST_F(AssemblerThumb2Test, JumpTable64K) {
+  // The jump table. Use three labels.
+  Label label1, label2, label3;
+  std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+  // A few dummy loads on entry, interspersed with 2 labels.
+  constexpr size_t kLdrR0R0Count = 5;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label1);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label2);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Create the jump table, emit the base load.
+  arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+  // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+  // it's being used.
+  __ ldr(arm::R0, arm::Address(arm::R0));
+
+  // Emit the jump
+  __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+  // Some more dummy instructions.
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label3);
+  constexpr size_t kLdrR0R0Count2 = 2601;              // Note: odd so there's no alignment
+  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {       //       necessary, as gcc as emits nops,
+    __ ldr(arm::R0, arm::Address(arm::R0));            //       whereas we emit 0 != nop.
+  }
+
+  static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 > 4 * KB, "Not enough offset");
+  static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 < 64 * KB, "Too much offset");
+
+  std::string expected =
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L1:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L2:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      // ~ adr r1, .Ljump_table, gcc as can't seem to fix up a large offset itself.
+      // (Note: have to use constants, as labels aren't accepted.
+      "movw r1, #(((3 + " + StringPrintf("%zu", kLdrR0R0Count + kLdrR0R0Count2) +
+          ") * 2 - 4) & 0xFFFF)\n"
+      "add r1, pc\n"
+      "ldr r0, [r0]\n"
+      ".Lbase:\n"
+      "add pc, r1\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L3:\n" +
+      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
+      ".align 2\n"
+      ".Ljump_table:\n"
+      ".4byte (.L1 - .Lbase - 4)\n"
+      ".4byte (.L2 - .Lbase - 4)\n"
+      ".4byte (.L3 - .Lbase - 4)\n";
+  DriverStr(expected, "JumpTable64K");
+}
+
+// Test for >64K fixup.
+TEST_F(AssemblerThumb2Test, JumpTableFar) {
+  // The jump table. Use three labels.
+  Label label1, label2, label3;
+  std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+  // A few dummy loads on entry, interspersed with 2 labels.
+  constexpr size_t kLdrR0R0Count = 5;
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label1);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label2);
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+
+  // Create the jump table, emit the base load.
+  arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+  // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+  // it's being used.
+  __ ldr(arm::R0, arm::Address(arm::R0));
+
+  // Emit the jump
+  __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+  // Some more dummy instructions.
+  for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+    __ ldr(arm::R0, arm::Address(arm::R0));
+  }
+  __ BindTrackedLabel(&label3);
+  constexpr size_t kLdrR0R0Count2 = 70001;             // Note: odd so there's no alignment
+  for (size_t i = 0; i != kLdrR0R0Count2; ++i) {       //       necessary, as gcc as emits nops,
+    __ ldr(arm::R0, arm::Address(arm::R0));            //       whereas we emit 0 != nop.
+  }
+
+  static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 > 64 * KB, "Not enough offset");
+
+  std::string expected =
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L1:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L2:\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      // ~ adr r1, .Ljump_table, gcc as can't seem to fix up a large offset itself.
+      // (Note: have to use constants, as labels aren't accepted.
+      "movw r1, #(((3 + " + StringPrintf("%zu", kLdrR0R0Count + kLdrR0R0Count2) +
+          ") * 2 - 4) & 0xFFFF)\n"
+      "movt r1, #(((3 + " + StringPrintf("%zu", kLdrR0R0Count + kLdrR0R0Count2) +
+          ") * 2 - 4) >> 16)\n"
+      ".Lhelp:"
+      "add r1, pc\n"
+      "ldr r0, [r0]\n"
+      ".Lbase:\n"
+      "add pc, r1\n" +
+      RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+      ".L3:\n" +
+      RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
+      ".align 2\n"
+      ".Ljump_table:\n"
+      ".4byte (.L1 - .Lbase - 4)\n"
+      ".4byte (.L2 - .Lbase - 4)\n"
+      ".4byte (.L3 - .Lbase - 4)\n";
+  DriverStr(expected, "JumpTableFar");
+}
+
 TEST_F(AssemblerThumb2Test, Clz) {
   __ clz(arm::R0, arm::R1);
 

diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index d97a2a4..dfe6bab 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h

@@ -227,6 +227,8 @@
   // Returns the position in the instruction stream.
   int GetPosition() { return  cursor_ - contents_; }
 
+  void ExtendCapacity(size_t min_capacity = 0u);
+
  private:
   // The limit is set to kMinimumGap bytes before the end of the data area.
   // This leaves enough space for the longest possible instruction and allows
@@ -261,8 +263,6 @@
     return data + capacity - kMinimumGap;
   }
 
-  void ExtendCapacity(size_t min_capacity = 0u);
-
   friend class AssemblerFixup;
 };
commit	7cffc3b0004d32faffc552c0a59286f369b21504	[log] [tgz]
author	Andreas Gampe <agampe@google.com>	Mon Oct 19 21:31:53 2015 -0700
committer	Andreas Gampe <agampe@google.com>	Fri Oct 30 11:15:26 2015 -0700
tree	e3838b8ba2a782ed91ef8faa381362b7a686a32a
parent	9e1b56f0e77aa5b6c72374b86d0cef58484ddcaa [diff]