ART: Arm32 packed-switch jump tables
Add jump table support to the thumb2 assembler. Jump tables are
a collection of labels for the case targets, and an anchor label
denoting the position of the jump.
Use the jump table support to implement packed-switch support for
arm32.
Add tests for BindTrackedLabel and JumpTable to the thumb2 assembler
test.
Bug: 24092914
Change-Id: I5c84f193dfebf9e07f48678efc8bd151bb1410dd
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 807beda..68e3956 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -16,6 +16,8 @@
#include "assembler_arm.h"
+#include <algorithm>
+
#include "base/bit_utils.h"
#include "base/logging.h"
#include "entrypoints/quick/quick_entrypoints.h"
@@ -922,5 +924,24 @@
return value | i << 26 | imm3 << 12 | a << 7;
}
+void ArmAssembler::FinalizeTrackedLabels() {
+ if (!tracked_labels_.empty()) {
+ // This array should be sorted, as assembly is generated in linearized order. It isn't
+ // technically required, but GetAdjustedPosition() used in AdjustLabelPosition() can take
+ // advantage of it. So ensure that it's actually the case.
+ DCHECK(std::is_sorted(
+ tracked_labels_.begin(),
+ tracked_labels_.end(),
+ [](const Label* lhs, const Label* rhs) { return lhs->Position() < rhs->Position(); }));
+
+ Label* last_label = nullptr; // Track duplicates, we must not adjust twice.
+ for (Label* label : tracked_labels_) {
+ DCHECK_NE(label, last_label);
+ AdjustLabelPosition(label);
+ last_label = label;
+ }
+ }
+}
+
} // namespace arm
} // namespace art
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index d59bc6b..4a6e6d7 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -77,6 +77,45 @@
DISALLOW_COPY_AND_ASSIGN(Literal);
};
+// Jump table: table of labels emitted after the literals. Similar to literals.
+class JumpTable {
+ public:
+ explicit JumpTable(std::vector<Label*>&& labels)
+ : label_(), anchor_label_(), labels_(std::move(labels)) {
+ }
+
+ uint32_t GetSize() const {
+ return static_cast<uint32_t>(labels_.size()) * sizeof(uint32_t);
+ }
+
+ const std::vector<Label*>& GetData() const {
+ return labels_;
+ }
+
+ Label* GetLabel() {
+ return &label_;
+ }
+
+ const Label* GetLabel() const {
+ return &label_;
+ }
+
+ Label* GetAnchorLabel() {
+ return &anchor_label_;
+ }
+
+ const Label* GetAnchorLabel() const {
+ return &anchor_label_;
+ }
+
+ private:
+ Label label_;
+ Label anchor_label_;
+ std::vector<Label*> labels_;
+
+ DISALLOW_COPY_AND_ASSIGN(JumpTable);
+};
+
class ShifterOperand {
public:
ShifterOperand() : type_(kUnknown), rm_(kNoRegister), rs_(kNoRegister),
@@ -685,6 +724,8 @@
AddConstant(rd, rd, value, cond, set_cc);
}
+ virtual void CmpConstant(Register rn, int32_t value, Condition cond = AL) = 0;
+
// Load and Store. May clobber IP.
virtual void LoadImmediate(Register rd, int32_t value, Condition cond = AL) = 0;
void LoadSImmediate(SRegister sd, float value, Condition cond = AL) {
@@ -996,11 +1037,43 @@
b(label);
}
+ // Jump table support. This is split into three functions:
+ //
+ // * CreateJumpTable creates the internal metadata to track the jump targets, and emits code to
+ // load the base address of the jump table.
+ //
+ // * EmitJumpTableDispatch emits the code to actually jump, assuming that the right table value
+ // has been loaded into a register already.
+ //
+ // * FinalizeTables emits the jump table into the literal pool. This can only be called after the
+ // labels for the jump targets have been finalized.
+
+ // Create a jump table for the given labels that will be emitted when finalizing. Create a load
+ // sequence (or placeholder) that stores the base address into the given register. When the table
+ // is emitted, offsets will be relative to the location EmitJumpTableDispatch was called on (the
+ // anchor).
+ virtual JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) = 0;
+
+ // Emit the jump-table jump, assuming that the right value was loaded into displacement_reg.
+ virtual void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) = 0;
+
+ // Bind a Label that needs to be updated by the assembler in FinalizeCode() if its position
+ // changes due to branch/literal fixup.
+ void BindTrackedLabel(Label* label) {
+ Bind(label);
+ tracked_labels_.push_back(label);
+ }
+
protected:
// Returns whether or not the given register is used for passing parameters.
static int RegisterCompare(const Register* reg1, const Register* reg2) {
return *reg1 - *reg2;
}
+
+ void FinalizeTrackedLabels();
+
+ // Tracked labels. Use a vector, as we need to sort before adjusting.
+ std::vector<Label*> tracked_labels_;
};
// Slowpath entered when Thread::Current()->_exception is non-null
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 6e7c828..a7dbacd 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -1385,6 +1385,21 @@
}
}
+void Arm32Assembler::CmpConstant(Register rn, int32_t value, Condition cond) {
+ ShifterOperand shifter_op;
+ if (ShifterOperandCanHoldArm32(value, &shifter_op)) {
+ cmp(rn, shifter_op, cond);
+ } else if (ShifterOperandCanHoldArm32(~value, &shifter_op)) {
+ cmn(rn, shifter_op, cond);
+ } else {
+ movw(IP, Low16Bits(value), cond);
+ uint16_t value_high = High16Bits(value);
+ if (value_high != 0) {
+ movt(IP, value_high, cond);
+ }
+ cmp(rn, ShifterOperand(IP), cond);
+ }
+}
void Arm32Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
ShifterOperand shifter_op;
@@ -1584,6 +1599,23 @@
b(label, NE);
}
+JumpTable* Arm32Assembler::CreateJumpTable(std::vector<Label*>&& labels ATTRIBUTE_UNUSED,
+ Register base_reg ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "CreateJumpTable is not supported on ARM32";
+ UNREACHABLE();
+}
+
+void Arm32Assembler::EmitJumpTableDispatch(JumpTable* jump_table ATTRIBUTE_UNUSED,
+ Register displacement_reg ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "EmitJumpTableDispatch is not supported on ARM32";
+ UNREACHABLE();
+}
+
+void Arm32Assembler::FinalizeCode() {
+ ArmAssembler::FinalizeCode();
+ // Currently the arm32 assembler does not support fixups, and thus no tracking. We must not call
+ // FinalizeTrackedLabels(), which would lead to an abort.
+}
} // namespace arm
} // namespace art
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 4646538..5233dcb 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -261,6 +261,8 @@
void AddConstant(Register rd, Register rn, int32_t value,
Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+ void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE;
+
// Load and Store. May clobber IP.
void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
void MarkExceptionHandler(Label* label) OVERRIDE;
@@ -308,6 +310,11 @@
void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
+ JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
+ void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE;
+
+ void FinalizeCode() OVERRIDE;
+
private:
void EmitType01(Condition cond,
int type,
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index cc87856..fb3aa1e 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -92,7 +92,7 @@
label->BindTo(bound_pc);
}
-void Thumb2Assembler::BindLiterals() {
+uint32_t Thumb2Assembler::BindLiterals() {
// We don't add the padding here, that's done only after adjusting the Fixup sizes.
uint32_t code_size = buffer_.Size();
for (Literal& lit : literals_) {
@@ -100,6 +100,15 @@
BindLabel(label, code_size);
code_size += lit.GetSize();
}
+ return code_size;
+}
+
+void Thumb2Assembler::BindJumpTables(uint32_t code_size) {
+ for (JumpTable& table : jump_tables_) {
+ Label* label = table.GetLabel();
+ BindLabel(label, code_size);
+ code_size += table.GetSize();
+ }
}
void Thumb2Assembler::AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size,
@@ -144,7 +153,7 @@
AdjustFixupIfNeeded(fixup, ¤t_code_size, &fixups_to_recalculate);
} while (!fixups_to_recalculate.empty());
- if ((current_code_size & 2) != 0 && !literals_.empty()) {
+ if ((current_code_size & 2) != 0 && (!literals_.empty() || !jump_tables_.empty())) {
// If we need to add padding before literals, this may just push some out of range,
// so recalculate all load literals. This makes up for the fact that we don't mark
// load literal as a dependency of all previous Fixups even though it actually is.
@@ -173,6 +182,13 @@
label->Reinitialize();
label->BindTo(old_position + literals_adjustment);
}
+ for (JumpTable& table : jump_tables_) {
+ Label* label = table.GetLabel();
+ DCHECK(label->IsBound());
+ int old_position = label->Position();
+ label->Reinitialize();
+ label->BindTo(old_position + literals_adjustment);
+ }
}
return current_code_size;
@@ -229,6 +245,43 @@
}
}
+void Thumb2Assembler::EmitJumpTables() {
+ if (!jump_tables_.empty()) {
+ // Jump tables require 4 byte alignment. (We don't support byte and half-word jump tables.)
+ uint32_t code_size = buffer_.Size();
+ DCHECK_ALIGNED(code_size, 2);
+ if ((code_size & 2u) != 0u) {
+ Emit16(0);
+ }
+ for (JumpTable& table : jump_tables_) {
+ // Bulk ensure capacity, as this may be large.
+ size_t orig_size = buffer_.Size();
+ buffer_.ExtendCapacity(orig_size + table.GetSize());
+#ifndef NDEBUG
+ buffer_.has_ensured_capacity_ = true;
+#endif
+
+ DCHECK_EQ(static_cast<size_t>(table.GetLabel()->Position()), buffer_.Size());
+ int32_t anchor_position = table.GetAnchorLabel()->Position() + 4;
+
+ for (Label* target : table.GetData()) {
+ // Ensure that the label was tracked, so that it will have the right position.
+ DCHECK(std::find(tracked_labels_.begin(), tracked_labels_.end(), target) !=
+ tracked_labels_.end());
+
+ int32_t offset = target->Position() - anchor_position;
+ buffer_.Emit<int32_t>(offset);
+ }
+
+#ifndef NDEBUG
+ buffer_.has_ensured_capacity_ = false;
+#endif
+ size_t new_size = buffer_.Size();
+ DCHECK_LE(new_size - orig_size, table.GetSize());
+ }
+ }
+}
+
inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) {
DCHECK_ALIGNED(offset, 2);
int16_t encoding = B15 | B14;
@@ -382,12 +435,34 @@
return B31 | B30 | B29 | B28 | B27 | B23 | B22 | B20 | (rn << 16) | (rt << 12) | offset;
}
+inline int16_t Thumb2Assembler::AdrEncoding16(Register rd, int32_t offset) {
+ DCHECK(IsUint<10>(offset));
+ DCHECK(IsAligned<4>(offset));
+ DCHECK(!IsHighRegister(rd));
+ return B15 | B13 | (rd << 8) | (offset >> 2);
+}
+
+inline int32_t Thumb2Assembler::AdrEncoding32(Register rd, int32_t offset) {
+ DCHECK(IsUint<12>(offset));
+ // Bit 26: offset[11]
+ // Bits 14-12: offset[10-8]
+ // Bits 7-0: offset[7-0]
+ int32_t immediate_mask =
+ ((offset & (1 << 11)) << (26 - 11)) |
+ ((offset & (7 << 8)) << (12 - 8)) |
+ (offset & 0xFF);
+ return B31 | B30 | B29 | B28 | B25 | B19 | B18 | B17 | B16 | (rd << 8) | immediate_mask;
+}
+
void Thumb2Assembler::FinalizeCode() {
ArmAssembler::FinalizeCode();
- BindLiterals();
+ uint32_t size_after_literals = BindLiterals();
+ BindJumpTables(size_after_literals);
uint32_t adjusted_code_size = AdjustFixups();
EmitFixups(adjusted_code_size);
EmitLiterals();
+ FinalizeTrackedLabels();
+ EmitJumpTables();
}
bool Thumb2Assembler::ShifterOperandCanAlwaysHold(uint32_t immediate) {
@@ -1770,6 +1845,15 @@
case kLiteralFar:
return 14u;
+ case kLiteralAddr1KiB:
+ return 2u;
+ case kLiteralAddr4KiB:
+ return 4u;
+ case kLiteralAddr64KiB:
+ return 6u;
+ case kLiteralAddrFar:
+ return 10u;
+
case kLongOrFPLiteral1KiB:
return 4u;
case kLongOrFPLiteral256KiB:
@@ -1831,6 +1915,8 @@
case kLiteral1KiB:
case kLiteral4KiB:
case kLongOrFPLiteral1KiB:
+ case kLiteralAddr1KiB:
+ case kLiteralAddr4KiB:
DCHECK(diff >= 0 || (GetSize() == kLiteral1KiB && diff == -2));
diff += LiteralPoolPaddingSize(current_code_size);
// Load literal instructions round down the PC+4 to a multiple of 4, so if the PC
@@ -1843,12 +1929,14 @@
case kLiteral1MiB:
case kLiteral64KiB:
case kLongOrFPLiteral256KiB:
+ case kLiteralAddr64KiB:
DCHECK_GE(diff, 4); // The target must be at least 4 bytes after the ADD rX, PC.
diff -= 4; // One extra 32-bit MOV.
diff += LiteralPoolPaddingSize(current_code_size);
break;
case kLiteralFar:
case kLongOrFPLiteralFar:
+ case kLiteralAddrFar:
DCHECK_GE(diff, 8); // The target must be at least 4 bytes after the ADD rX, PC.
diff -= 8; // Extra MOVW+MOVT; both 32-bit.
diff += LiteralPoolPaddingSize(current_code_size);
@@ -1929,6 +2017,29 @@
// This encoding can reach any target.
break;
+ case kLiteralAddr1KiB:
+ DCHECK(!IsHighRegister(rn_));
+ if (IsUint<10>(GetOffset(current_code_size))) {
+ break;
+ }
+ current_code_size += IncreaseSize(kLiteralAddr4KiB);
+ FALLTHROUGH_INTENDED;
+ case kLiteralAddr4KiB:
+ if (IsUint<12>(GetOffset(current_code_size))) {
+ break;
+ }
+ current_code_size += IncreaseSize(kLiteralAddr64KiB);
+ FALLTHROUGH_INTENDED;
+ case kLiteralAddr64KiB:
+ if (IsUint<16>(GetOffset(current_code_size))) {
+ break;
+ }
+ current_code_size += IncreaseSize(kLiteralAddrFar);
+ FALLTHROUGH_INTENDED;
+ case kLiteralAddrFar:
+ // This encoding can reach any target.
+ break;
+
case kLongOrFPLiteral1KiB:
if (IsUint<10>(GetOffset(current_code_size))) {
break;
@@ -2055,6 +2166,42 @@
break;
}
+ case kLiteralAddr1KiB: {
+ DCHECK(type_ == kLoadLiteralAddr);
+ int16_t encoding = AdrEncoding16(rn_, GetOffset(code_size));
+ buffer->Store<int16_t>(location_, encoding);
+ break;
+ }
+ case kLiteralAddr4KiB: {
+ DCHECK(type_ == kLoadLiteralAddr);
+ int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size));
+ buffer->Store<int16_t>(location_, encoding >> 16);
+ buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff));
+ break;
+ }
+ case kLiteralAddr64KiB: {
+ DCHECK(type_ == kLoadLiteralAddr);
+ int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size));
+ int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
+ buffer->Store<int16_t>(location_, mov_encoding >> 16);
+ buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff));
+ buffer->Store<int16_t>(location_ + 4u, add_pc_encoding);
+ break;
+ }
+ case kLiteralAddrFar: {
+ DCHECK(type_ == kLoadLiteralAddr);
+ int32_t offset = GetOffset(code_size);
+ int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff);
+ int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff);
+ int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC);
+ buffer->Store<int16_t>(location_, movw_encoding >> 16);
+ buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff));
+ buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16);
+ buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff));
+ buffer->Store<int16_t>(location_ + 8u, add_pc_encoding);
+ break;
+ }
+
case kLongOrFPLiteral1KiB: {
int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size)); // DCHECKs type_.
buffer->Store<int16_t>(location_, encoding >> 16);
@@ -3260,6 +3407,25 @@
}
}
+void Thumb2Assembler::CmpConstant(Register rn, int32_t value, Condition cond) {
+ // We prefer to select the shorter code sequence rather than selecting add for
+ // positive values and sub for negatives ones, which would slightly improve
+ // the readability of generated code for some constants.
+ ShifterOperand shifter_op;
+ if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, &shifter_op)) {
+ cmp(rn, shifter_op, cond);
+ } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, &shifter_op)) {
+ cmn(rn, shifter_op, cond);
+ } else {
+ CHECK(rn != IP);
+ movw(IP, Low16Bits(value), cond);
+ uint16_t value_high = High16Bits(value);
+ if (value_high != 0) {
+ movt(IP, value_high, cond);
+ }
+ cmp(rn, ShifterOperand(IP), cond);
+ }
+}
void Thumb2Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
ShifterOperand shifter_op;
@@ -3476,5 +3642,39 @@
b(label, NE);
}
}
+
+JumpTable* Thumb2Assembler::CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) {
+ jump_tables_.emplace_back(std::move(labels));
+ JumpTable* table = &jump_tables_.back();
+ DCHECK(!table->GetLabel()->IsBound());
+
+ bool use32bit = IsForced32Bit() || IsHighRegister(base_reg);
+ uint32_t location = buffer_.Size();
+ Fixup::Size size = use32bit ? Fixup::kLiteralAddr4KiB : Fixup::kLiteralAddr1KiB;
+ FixupId fixup_id = AddFixup(Fixup::LoadLiteralAddress(location, base_reg, size));
+ Emit16(static_cast<uint16_t>(table->GetLabel()->position_));
+ table->GetLabel()->LinkTo(fixup_id);
+ if (use32bit) {
+ Emit16(0);
+ }
+ DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size());
+
+ return table;
+}
+
+void Thumb2Assembler::EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) {
+ CHECK(!IsForced32Bit()) << "Forced 32-bit dispatch not implemented yet";
+ // 32-bit ADD doesn't support PC as an input, so we need a two-instruction sequence:
+ // SUB ip, ip, #0
+ // ADD pc, ip, reg
+ // TODO: Implement.
+
+ // The anchor's position needs to be fixed up before we can compute offsets - so make it a tracked
+ // label.
+ BindTrackedLabel(jump_table->GetAnchorLabel());
+
+ add(PC, PC, ShifterOperand(displacement_reg));
+}
+
} // namespace arm
} // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 055b137..38fd244 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_
#include <deque>
+#include <utility>
#include <vector>
#include "base/logging.h"
@@ -304,6 +305,8 @@
void AddConstant(Register rd, Register rn, int32_t value,
Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE;
+ void CmpConstant(Register rn, int32_t value, Condition cond = AL) OVERRIDE;
+
// Load and Store. May clobber IP.
void LoadImmediate(Register rd, int32_t value, Condition cond = AL) OVERRIDE;
void MarkExceptionHandler(Label* label) OVERRIDE;
@@ -358,6 +361,12 @@
force_32bit_ = true;
}
+ // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This
+ // will generate a fixup.
+ JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE;
+ // Emit an ADD PC, X to dispatch a jump-table jump. This will generate a fixup.
+ void EmitJumpTableDispatch(JumpTable* jump_table, Register displacement_reg) OVERRIDE;
+
private:
typedef uint16_t FixupId;
@@ -399,6 +408,7 @@
kCompareAndBranchXZero, // cbz/cbnz.
kLoadLiteralNarrow, // Load narrrow integer literal.
kLoadLiteralWide, // Load wide integer literal.
+ kLoadLiteralAddr, // Load address of literal (used for jump table).
kLoadFPLiteralSingle, // Load FP literal single.
kLoadFPLiteralDouble, // Load FP literal double.
};
@@ -429,6 +439,16 @@
// MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes.
kLiteralFar,
+ // Load literal base addr.
+ // ADR rX, label; X < 8; 8 bit immediate, shifted to 10 bit. 2 bytes.
+ kLiteralAddr1KiB,
+ // ADR rX, label; 4KiB offset. 4 bytes.
+ kLiteralAddr4KiB,
+ // MOV rX, imm16 + ADD rX, pc; 64KiB offset. 6 bytes.
+ kLiteralAddr64KiB,
+ // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc; any offset; 10 bytes.
+ kLiteralAddrFar,
+
// Load long or FP literal variants.
// VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes.
kLongOrFPLiteral1KiB,
@@ -457,7 +477,7 @@
}
// Load narrow literal.
- static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size = kLiteral1KiB) {
+ static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) {
DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB ||
size == kLiteral1MiB || size == kLiteralFar);
DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB));
@@ -493,6 +513,14 @@
AL, kLoadFPLiteralDouble, size, location);
}
+ static Fixup LoadLiteralAddress(uint32_t location, Register rt, Size size) {
+ DCHECK(size == kLiteralAddr1KiB || size == kLiteralAddr4KiB || size == kLiteralAddr64KiB ||
+ size == kLiteralAddrFar);
+ DCHECK(!IsHighRegister(rt) || size != kLiteralAddr1KiB);
+ return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister,
+ AL, kLoadLiteralAddr, size, location);
+ }
+
Type GetType() const {
return type_;
}
@@ -756,12 +784,14 @@
}
void BindLabel(Label* label, uint32_t bound_pc);
- void BindLiterals();
+ uint32_t BindLiterals();
+ void BindJumpTables(uint32_t code_size);
void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size,
std::deque<FixupId>* fixups_to_recalculate);
uint32_t AdjustFixups();
void EmitFixups(uint32_t adjusted_code_size);
void EmitLiterals();
+ void EmitJumpTables();
static int16_t BEncoding16(int32_t offset, Condition cond);
static int32_t BEncoding32(int32_t offset, Condition cond);
@@ -778,6 +808,8 @@
static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset);
static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset);
static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset);
+ static int16_t AdrEncoding16(Register rd, int32_t offset);
+ static int32_t AdrEncoding32(Register rd, int32_t offset);
std::vector<Fixup> fixups_;
std::unique_ptr<FixupId[]> fixup_dependents_;
@@ -786,6 +818,9 @@
// without invalidating pointers and references to existing elements.
std::deque<Literal> literals_;
+ // Jump table list.
+ std::deque<JumpTable> jump_tables_;
+
// Data for AdjustedPosition(), see the description there.
uint32_t last_position_adjustment_;
uint32_t last_old_position_;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 9c08ce0..cb4b20b 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -17,6 +17,7 @@
#include "assembler_thumb2.h"
#include "base/stl_util.h"
+#include "base/stringprintf.h"
#include "utils/assembler_test.h"
namespace art {
@@ -1011,6 +1012,315 @@
__ GetAdjustedPosition(label.Position()));
}
+TEST_F(AssemblerThumb2Test, BindTrackedLabel) {
+ Label non_tracked, tracked, branch_target;
+
+ // A few dummy loads on entry.
+ constexpr size_t kLdrR0R0Count = 5;
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+
+ // A branch that will need to be fixed up.
+ __ cbz(arm::R0, &branch_target);
+
+ // Some more dummy loads.
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+
+ // Now insert tracked and untracked label.
+ __ Bind(&non_tracked);
+ __ BindTrackedLabel(&tracked);
+
+ // A lot of dummy loads, to ensure the branch needs resizing.
+ constexpr size_t kLdrR0R0CountLong = 60;
+ for (size_t i = 0; i != kLdrR0R0CountLong; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+
+ // Bind the branch target.
+ __ Bind(&branch_target);
+
+ // One more load.
+ __ ldr(arm::R0, arm::Address(arm::R0));
+
+ std::string expected =
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ "cmp r0, #0\n" // cbz r0, 1f
+ "beq.n 1f\n" +
+ RepeatInsn(kLdrR0R0Count + kLdrR0R0CountLong, "ldr r0, [r0]\n") +
+ "1:\n"
+ "ldr r0, [r0]\n";
+ DriverStr(expected, "BindTrackedLabel");
+
+ // Expectation is that the tracked label should have moved.
+ EXPECT_LT(non_tracked.Position(), tracked.Position());
+}
+
+TEST_F(AssemblerThumb2Test, JumpTable) {
+ // The jump table. Use three labels.
+ Label label1, label2, label3;
+ std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+ // A few dummy loads on entry, interspersed with 2 labels.
+ constexpr size_t kLdrR0R0Count = 5;
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+ __ BindTrackedLabel(&label1);
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+ __ BindTrackedLabel(&label2);
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+
+ // Create the jump table, emit the base load.
+ arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+ // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+ // it's being used.
+ __ ldr(arm::R0, arm::Address(arm::R0));
+
+ // Emit the jump
+ __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+ // Some more dummy instructions.
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+ __ BindTrackedLabel(&label3);
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) { // Note: odd so there's no alignment
+ __ ldr(arm::R0, arm::Address(arm::R0)); // necessary, as gcc as emits nops,
+ } // whereas we emit 0 != nop.
+
+ static_assert((kLdrR0R0Count + 3) * 2 < 1 * KB, "Too much offset");
+
+ std::string expected =
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ ".L1:\n" +
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ ".L2:\n" +
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ "adr r1, .Ljump_table\n"
+ "ldr r0, [r0]\n"
+ ".Lbase:\n"
+ "add pc, r1\n" +
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ ".L3:\n" +
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ ".align 2\n"
+ ".Ljump_table:\n"
+ ".4byte (.L1 - .Lbase - 4)\n"
+ ".4byte (.L2 - .Lbase - 4)\n"
+ ".4byte (.L3 - .Lbase - 4)\n";
+ DriverStr(expected, "JumpTable");
+}
+
+// Test for >1K fixup.
+TEST_F(AssemblerThumb2Test, JumpTable4K) {
+ // The jump table. Use three labels.
+ Label label1, label2, label3;
+ std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+ // A few dummy loads on entry, interspersed with 2 labels.
+ constexpr size_t kLdrR0R0Count = 5;
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+ __ BindTrackedLabel(&label1);
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+ __ BindTrackedLabel(&label2);
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+
+ // Create the jump table, emit the base load.
+ arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+ // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+ // it's being used.
+ __ ldr(arm::R0, arm::Address(arm::R0));
+
+ // Emit the jump
+ __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+ // Some more dummy instructions.
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+ __ BindTrackedLabel(&label3);
+ constexpr size_t kLdrR0R0Count2 = 600; // Note: even so there's no alignment
+ for (size_t i = 0; i != kLdrR0R0Count2; ++i) { // necessary, as gcc as emits nops,
+ __ ldr(arm::R0, arm::Address(arm::R0)); // whereas we emit 0 != nop.
+ }
+
+ static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 > 1 * KB, "Not enough offset");
+ static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 < 4 * KB, "Too much offset");
+
+ std::string expected =
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ ".L1:\n" +
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ ".L2:\n" +
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ "adr r1, .Ljump_table\n"
+ "ldr r0, [r0]\n"
+ ".Lbase:\n"
+ "add pc, r1\n" +
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ ".L3:\n" +
+ RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
+ ".align 2\n"
+ ".Ljump_table:\n"
+ ".4byte (.L1 - .Lbase - 4)\n"
+ ".4byte (.L2 - .Lbase - 4)\n"
+ ".4byte (.L3 - .Lbase - 4)\n";
+ DriverStr(expected, "JumpTable4K");
+}
+
+// Test for >4K fixup.
+TEST_F(AssemblerThumb2Test, JumpTable64K) {
+ // The jump table. Use three labels.
+ Label label1, label2, label3;
+ std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+ // A few dummy loads on entry, interspersed with 2 labels.
+ constexpr size_t kLdrR0R0Count = 5;
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+ __ BindTrackedLabel(&label1);
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+ __ BindTrackedLabel(&label2);
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+
+ // Create the jump table, emit the base load.
+ arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+ // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+ // it's being used.
+ __ ldr(arm::R0, arm::Address(arm::R0));
+
+ // Emit the jump
+ __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+ // Some more dummy instructions.
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+ __ BindTrackedLabel(&label3);
+ constexpr size_t kLdrR0R0Count2 = 2601; // Note: odd so there's no alignment
+ for (size_t i = 0; i != kLdrR0R0Count2; ++i) { // necessary, as gcc as emits nops,
+ __ ldr(arm::R0, arm::Address(arm::R0)); // whereas we emit 0 != nop.
+ }
+
+ static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 > 4 * KB, "Not enough offset");
+ static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 < 64 * KB, "Too much offset");
+
+ std::string expected =
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ ".L1:\n" +
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ ".L2:\n" +
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ // ~ adr r1, .Ljump_table, gcc as can't seem to fix up a large offset itself.
+ // (Note: have to use constants, as labels aren't accepted.
+ "movw r1, #(((3 + " + StringPrintf("%zu", kLdrR0R0Count + kLdrR0R0Count2) +
+ ") * 2 - 4) & 0xFFFF)\n"
+ "add r1, pc\n"
+ "ldr r0, [r0]\n"
+ ".Lbase:\n"
+ "add pc, r1\n" +
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ ".L3:\n" +
+ RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
+ ".align 2\n"
+ ".Ljump_table:\n"
+ ".4byte (.L1 - .Lbase - 4)\n"
+ ".4byte (.L2 - .Lbase - 4)\n"
+ ".4byte (.L3 - .Lbase - 4)\n";
+ DriverStr(expected, "JumpTable64K");
+}
+
+// Test for >64K fixup.
+TEST_F(AssemblerThumb2Test, JumpTableFar) {
+ // The jump table. Use three labels.
+ Label label1, label2, label3;
+ std::vector<Label*> labels({ &label1, &label2, &label3 });
+
+ // A few dummy loads on entry, interspersed with 2 labels.
+ constexpr size_t kLdrR0R0Count = 5;
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+ __ BindTrackedLabel(&label1);
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+ __ BindTrackedLabel(&label2);
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+
+ // Create the jump table, emit the base load.
+ arm::JumpTable* jump_table = __ CreateJumpTable(std::move(labels), arm::R1);
+
+ // Dummy computation, stand-in for the address. We're only testing the jump table here, not how
+ // it's being used.
+ __ ldr(arm::R0, arm::Address(arm::R0));
+
+ // Emit the jump
+ __ EmitJumpTableDispatch(jump_table, arm::R1);
+
+ // Some more dummy instructions.
+ for (size_t i = 0; i != kLdrR0R0Count; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+ __ BindTrackedLabel(&label3);
+ constexpr size_t kLdrR0R0Count2 = 70001; // Note: odd so there's no alignment
+ for (size_t i = 0; i != kLdrR0R0Count2; ++i) { // necessary, as gcc as emits nops,
+ __ ldr(arm::R0, arm::Address(arm::R0)); // whereas we emit 0 != nop.
+ }
+
+ static_assert((kLdrR0R0Count + kLdrR0R0Count2 + 3) * 2 > 64 * KB, "Not enough offset");
+
+ std::string expected =
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ ".L1:\n" +
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ ".L2:\n" +
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ // ~ adr r1, .Ljump_table, gcc as can't seem to fix up a large offset itself.
+ // (Note: have to use constants, as labels aren't accepted.
+ "movw r1, #(((3 + " + StringPrintf("%zu", kLdrR0R0Count + kLdrR0R0Count2) +
+ ") * 2 - 4) & 0xFFFF)\n"
+ "movt r1, #(((3 + " + StringPrintf("%zu", kLdrR0R0Count + kLdrR0R0Count2) +
+ ") * 2 - 4) >> 16)\n"
+ ".Lhelp:"
+ "add r1, pc\n"
+ "ldr r0, [r0]\n"
+ ".Lbase:\n"
+ "add pc, r1\n" +
+ RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") +
+ ".L3:\n" +
+ RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") +
+ ".align 2\n"
+ ".Ljump_table:\n"
+ ".4byte (.L1 - .Lbase - 4)\n"
+ ".4byte (.L2 - .Lbase - 4)\n"
+ ".4byte (.L3 - .Lbase - 4)\n";
+ DriverStr(expected, "JumpTableFar");
+}
+
TEST_F(AssemblerThumb2Test, Clz) {
__ clz(arm::R0, arm::R1);
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index d97a2a4..dfe6bab 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -227,6 +227,8 @@
// Returns the position in the instruction stream.
int GetPosition() { return cursor_ - contents_; }
+ void ExtendCapacity(size_t min_capacity = 0u);
+
private:
// The limit is set to kMinimumGap bytes before the end of the data area.
// This leaves enough space for the longest possible instruction and allows
@@ -261,8 +263,6 @@
return data + capacity - kMinimumGap;
}
- void ExtendCapacity(size_t min_capacity = 0u);
-
friend class AssemblerFixup;
};