AArch64: Add tbz/tbnz and tst.
Since the branch offset supported by tbz/tbnz is quite small(-32k ~ +32k),
it will be replaced by tst and beq/bneq in the fix-up stage if the branch
offset is too large.
Change-Id: I4cace06bec6425e0f2e1f5f7c471eec08d06bca6
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index dcc67c3..63f3e64 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -528,6 +528,7 @@
kFixupLoad, // Mostly for immediates.
kFixupVLoad, // FP load which *may* be pc-relative.
kFixupCBxZ, // Cbz, Cbnz.
+ kFixupTBxZ, // Tbz, Tbnz.
kFixupPushPop, // Not really pc relative, but changes size based on args.
kFixupCondBranch, // Conditional branch
kFixupT1Branch, // Thumb1 Unconditional branch
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index a449cbd..d001dd6 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -116,6 +116,7 @@
#define IS_SIGNED_IMM7(value) IS_SIGNED_IMM(7, value)
#define IS_SIGNED_IMM9(value) IS_SIGNED_IMM(9, value)
#define IS_SIGNED_IMM12(value) IS_SIGNED_IMM(12, value)
+#define IS_SIGNED_IMM14(value) IS_SIGNED_IMM(14, value)
#define IS_SIGNED_IMM19(value) IS_SIGNED_IMM(19, value)
#define IS_SIGNED_IMM21(value) IS_SIGNED_IMM(21, value)
@@ -355,7 +356,10 @@
kA64Sub4rrro, // sub [s1001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
kA64Sub4RRre, // sub [s1001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0].
kA64Subs3rRd, // subs[s111000100] imm_12[21-10] rn[9-5] rd[4-0].
+ kA64Tst2rl, // tst alias of "ands rzr, rn, #imm".
kA64Tst3rro, // tst alias of "ands rzr, arg1, arg2, arg3".
+ kA64Tbnz3rht, // tbnz imm_6_b5[31] [0110111] imm_6_b40[23-19] imm_14[18-5] rt[4-0].
+ kA64Tbz3rht, // tbz imm_6_b5[31] [0110110] imm_6_b40[23-19] imm_14[18-5] rt[4-0].
kA64Ubfm4rrdd, // ubfm[s10100110] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
kA64Last,
kA64NotWide = 0, // Flag used to select the first instruction variant.
@@ -400,23 +404,24 @@
enum ArmEncodingKind {
// All the formats below are encoded in the same way (as a kFmtBitBlt).
// These are grouped together, for fast handling (e.g. "if (LIKELY(fmt <= kFmtBitBlt)) ...").
- kFmtRegW = 0, // Word register (w) or wzr.
- kFmtRegX, // Extended word register (x) or xzr.
- kFmtRegR, // Register with same width as the instruction or zr.
- kFmtRegWOrSp, // Word register (w) or wsp.
- kFmtRegXOrSp, // Extended word register (x) or sp.
- kFmtRegROrSp, // Register with same width as the instruction or sp.
- kFmtRegS, // Single FP reg.
- kFmtRegD, // Double FP reg.
- kFmtRegF, // Single/double FP reg depending on the instruction width.
- kFmtBitBlt, // Bit string using end/start.
+ kFmtRegW = 0, // Word register (w) or wzr.
+ kFmtRegX, // Extended word register (x) or xzr.
+ kFmtRegR, // Register with same width as the instruction or zr.
+ kFmtRegWOrSp, // Word register (w) or wsp.
+ kFmtRegXOrSp, // Extended word register (x) or sp.
+ kFmtRegROrSp, // Register with same width as the instruction or sp.
+ kFmtRegS, // Single FP reg.
+ kFmtRegD, // Double FP reg.
+ kFmtRegF, // Single/double FP reg depending on the instruction width.
+ kFmtBitBlt, // Bit string using end/start.
// Less likely formats.
- kFmtUnused, // Unused field and marks end of formats.
- kFmtImm21, // Sign-extended immediate using [23..5,30..29].
- kFmtShift, // Register shift, 9-bit at [23..21, 15..10]..
- kFmtExtend, // Register extend, 9-bit at [23..21, 15..10].
- kFmtSkip, // Unused field, but continue to next.
+ kFmtUnused, // Unused field and marks end of formats.
+ kFmtImm6Shift, // Shift immediate, 6-bit at [31, 23..19].
+ kFmtImm21, // Sign-extended immediate using [23..5,30..29].
+ kFmtShift, // Register shift, 9-bit at [23..21, 15..10]..
+ kFmtExtend, // Register extend, 9-bit at [23..21, 15..10].
+ kFmtSkip, // Unused field, but continue to next.
};
// Struct used to define the snippet positions for each A64 opcode.
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index 15c89f2..5115246 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -89,6 +89,7 @@
* M -> 16-bit shift expression ("" or ", lsl #16" or ", lsl #32"...)
* B -> dmb option string (sy, st, ish, ishst, nsh, hshst)
* H -> operand shift
+ * h -> 6-bit shift immediate
* T -> register shift (either ", lsl #0" or ", lsl #12")
* e -> register extend (e.g. uxtb #1)
* o -> register shift (e.g. lsl #1) for Word registers
@@ -614,10 +615,24 @@
kFmtRegR, 4, 0, kFmtRegROrSp, 9, 5, kFmtBitBlt, 21, 10,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
"subs", "!0r, !1R, #!2d", kFixupNone),
- ENCODING_MAP(WIDE(kA64Tst3rro), SF_VARIANTS(0x6a000000),
+ ENCODING_MAP(WIDE(kA64Tst2rl), SF_VARIANTS(0x7200001f),
+ kFmtRegR, 9, 5, kFmtBitBlt, 22, 10, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | SETS_CCODES,
+ "tst", "!0r, !1l", kFixupNone),
+ ENCODING_MAP(WIDE(kA64Tst3rro), SF_VARIANTS(0x6a00001f),
kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1,
- kFmtUnused, -1, -1, IS_QUAD_OP | REG_USE01 | SETS_CCODES,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
"tst", "!0r, !1r!2o", kFixupNone),
+ // NOTE: Tbz/Tbnz does not require SETS_CCODES, but it may be replaced by some other LIRs
+ // which require SETS_CCODES in the fix-up stage.
+ ENCODING_MAP(WIDE(kA64Tbnz3rht), CUSTOM_VARIANTS(0x37000000, 0x37000000),
+ kFmtRegR, 4, 0, kFmtImm6Shift, -1, -1, kFmtBitBlt, 18, 5, kFmtUnused, -1, -1,
+ IS_TERTIARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP | SETS_CCODES,
+ "tbnz", "!0r, #!1h, !2t", kFixupTBxZ),
+ ENCODING_MAP(WIDE(kA64Tbz3rht), CUSTOM_VARIANTS(0x36000000, 0x36000000),
+ kFmtRegR, 4, 0, kFmtImm6Shift, -1, -1, kFmtBitBlt, 18, 5, kFmtUnused, -1, -1,
+ IS_TERTIARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP | SETS_CCODES,
+ "tbz", "!0r, #!1h, !2t", kFixupTBxZ),
ENCODING_MAP(WIDE(kA64Ubfm4rrdd), SF_N_VARIANTS(0x53000000),
kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 21, 16,
kFmtBitBlt, 15, 10, IS_QUAD_OP | REG_DEF0_USE1,
@@ -787,6 +802,11 @@
value |= ((operand & 0x1ffffc) >> 2) << 5;
bits |= value;
break;
+ case kFmtImm6Shift:
+ value = (operand & 0x1f) << 19;
+ value |= ((operand & 0x20) >> 5) << 31;
+ bits |= value;
+ break;
default:
LOG(FATAL) << "Bad fmt for arg. " << i << " in " << encoder->name
<< " (" << kind << ")";
@@ -827,11 +847,6 @@
*/
int generation = 0;
while (true) {
- // TODO(Arm64): check whether passes and offset adjustments are really necessary.
- // Currently they aren't, as - in the fixups below - LIR are never inserted.
- // Things can be different if jump ranges above 1 MB need to be supported.
- // If they are not, then we can get rid of the assembler retry logic.
-
offset_adjustment = 0;
AssemblerStatus res = kSuccess; // Assume success
generation ^= 1;
@@ -839,13 +854,9 @@
lir = first_fixup_;
prev_lir = NULL;
while (lir != NULL) {
- /*
- * NOTE: the lir being considered here will be encoded following the switch (so long as
- * we're not in a retry situation). However, any new non-pc_rel instructions inserted
- * due to retry must be explicitly encoded at the time of insertion. Note that
- * inserted instructions don't need use/def flags, but do need size and pc-rel status
- * properly updated.
- */
+ // NOTE: Any new non-pc_rel instructions inserted due to retry must be explicitly encoded at
+ // the time of insertion. Note that inserted instructions don't need use/def flags, but do
+ // need size and pc-rel status properly updated.
lir->offset += offset_adjustment;
// During pass, allows us to tell whether a node has been updated with offset_adjustment yet.
lir->flags.generation = generation;
@@ -861,7 +872,8 @@
CodeOffset target = target_lir->offset +
((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
int32_t delta = target - pc;
- if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) {
+ DCHECK_EQ(delta & 0x3, 0);
+ if (!IS_SIGNED_IMM19(delta >> 2)) {
LOG(FATAL) << "Invalid jump range in kFixupT1Branch";
}
lir->operands[0] = delta >> 2;
@@ -876,12 +888,75 @@
CodeOffset target = target_lir->offset +
((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
int32_t delta = target - pc;
- if (!((delta & 0x3) == 0 && IS_SIGNED_IMM19(delta >> 2))) {
+ DCHECK_EQ(delta & 0x3, 0);
+ if (!IS_SIGNED_IMM19(delta >> 2)) {
LOG(FATAL) << "Invalid jump range in kFixupLoad";
}
lir->operands[1] = delta >> 2;
break;
}
+ case kFixupTBxZ: {
+ int16_t opcode = lir->opcode;
+ RegStorage reg(lir->operands[0] | RegStorage::kValid);
+ int32_t imm = lir->operands[1];
+ DCHECK_EQ(IS_WIDE(opcode), reg.Is64Bit());
+ DCHECK_LT(imm, 64);
+ if (imm >= 32) {
+ DCHECK(IS_WIDE(opcode));
+ } else if (kIsDebugBuild && IS_WIDE(opcode)) {
+ // "tbz/tbnz x0, #imm(<32)" is the same with "tbz/tbnz w0, #imm(<32)", but GCC/oatdump
+ // will disassemble it as "tbz/tbnz w0, #imm(<32)". So unwide the LIR to make the
+ // compiler log behave the same with those disassembler in debug build.
+ // This will also affect tst instruction if it need to be replaced, but there is no
+ // performance difference between "tst Xt" and "tst Wt".
+ lir->opcode = UNWIDE(opcode);
+ lir->operands[0] = As32BitReg(reg).GetReg();
+ }
+
+ // Fix-up branch offset.
+ LIR *target_lir = lir->target;
+ DCHECK(target_lir);
+ CodeOffset pc = lir->offset;
+ CodeOffset target = target_lir->offset +
+ ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
+ int32_t delta = target - pc;
+ DCHECK_EQ(delta & 0x3, 0);
+ // Check if branch offset can be encoded in tbz/tbnz.
+ if (!IS_SIGNED_IMM14(delta >> 2)) {
+ DexOffset dalvik_offset = lir->dalvik_offset;
+ int16_t opcode = lir->opcode;
+ LIR* target = lir->target;
+ // "tbz/tbnz Rt, #imm, label" -> "tst Rt, #(1<<imm)".
+ offset_adjustment -= lir->flags.size;
+ int32_t imm = EncodeLogicalImmediate(IS_WIDE(opcode), 1 << lir->operands[1]);
+ DCHECK_NE(imm, -1);
+ lir->opcode = IS_WIDE(opcode) ? WIDE(kA64Tst2rl) : kA64Tst2rl;
+ lir->operands[1] = imm;
+ lir->target = nullptr;
+ lir->flags.fixup = EncodingMap[kA64Tst2rl].fixup;
+ lir->flags.size = EncodingMap[kA64Tst2rl].size;
+ offset_adjustment += lir->flags.size;
+ // Insert "beq/bneq label".
+ opcode = UNWIDE(opcode);
+ DCHECK(opcode == kA64Tbz3rht || opcode == kA64Tbnz3rht);
+ LIR* new_lir = RawLIR(dalvik_offset, kA64B2ct,
+ opcode == kA64Tbz3rht ? kArmCondEq : kArmCondNe, 0, 0, 0, 0, target);
+ InsertLIRAfter(lir, new_lir);
+ new_lir->offset = lir->offset + lir->flags.size;
+ new_lir->flags.generation = generation;
+ new_lir->flags.fixup = EncodingMap[kA64B2ct].fixup;
+ new_lir->flags.size = EncodingMap[kA64B2ct].size;
+ offset_adjustment += new_lir->flags.size;
+ // lir no longer pcrel, unlink and link in new_lir.
+ ReplaceFixup(prev_lir, lir, new_lir);
+ prev_lir = new_lir; // Continue with the new instruction.
+ lir = new_lir->u.a.pcrel_next;
+ res = kRetryAll;
+ continue;
+ }
+ lir->operands[2] = delta >> 2;
+ break;
+ }
case kFixupAdr: {
LIR* target_lir = lir->target;
int32_t delta;
@@ -910,6 +985,7 @@
}
if (res == kSuccess) {
+ DCHECK_EQ(offset_adjustment, 0);
break;
} else {
assembler_retries++;
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index d00c57d..d1b9c81 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -271,8 +271,12 @@
ArmOpcode opcode = kA64Cbz2rt;
ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
+ } else if (arm_cond == kArmCondLt || arm_cond == kArmCondGe) {
+ ArmOpcode opcode = (arm_cond == kArmCondLt) ? kA64Tbnz3rht : kA64Tbz3rht;
+ ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
+ int value = reg.Is64Bit() ? 63 : 31;
+ branch = NewLIR3(opcode | wide, reg.GetReg(), value, 0);
}
- // TODO: Use tbz/tbnz for < 0 or >= 0.
}
if (branch == nullptr) {
@@ -856,16 +860,14 @@
OpRegRegImm(kOpLsl, rs_length, rs_length, 1);
// Copy one element.
- OpRegRegImm(kOpAnd, rs_tmp, As32BitReg(rs_length), 2);
- LIR* jmp_to_copy_two = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr);
+ LIR* jmp_to_copy_two = NewLIR3(WIDE(kA64Tbz3rht), rs_length.GetReg(), 1, 0);
OpRegImm(kOpSub, rs_length, 2);
LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf);
StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf);
// Copy two elements.
LIR *copy_two = NewLIR0(kPseudoTargetLabel);
- OpRegRegImm(kOpAnd, rs_tmp, As32BitReg(rs_length), 4);
- LIR* jmp_to_copy_four = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr);
+ LIR* jmp_to_copy_four = NewLIR3(WIDE(kA64Tbz3rht), rs_length.GetReg(), 2, 0);
OpRegImm(kOpSub, rs_length, 4);
LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32);
StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32);
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 9b4546a..685f8d5 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -504,6 +504,9 @@
else
strcpy(tbuf, ", DecodeError3");
break;
+ case 'h':
+ snprintf(tbuf, arraysize(tbuf), "%d", operand);
+ break;
default:
strcpy(tbuf, "DecodeError1");
break;
diff --git a/test/702-LargeBranchOffset/build b/test/702-LargeBranchOffset/build
new file mode 100644
index 0000000..eacf730
--- /dev/null
+++ b/test/702-LargeBranchOffset/build
@@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Stop if something fails.
+set -e
+
+# Write out a bunch of source files.
+cpp -P src/Main.java.in src/Main.java
+
+mkdir classes
+${JAVAC} -d classes src/*.java
+
+${DX} --debug --dex --output=classes.dex classes
+zip $TEST_NAME.jar classes.dex
diff --git a/test/702-LargeBranchOffset/expected.txt b/test/702-LargeBranchOffset/expected.txt
new file mode 100644
index 0000000..130678f
--- /dev/null
+++ b/test/702-LargeBranchOffset/expected.txt
@@ -0,0 +1,5 @@
+0
+0
+2
+1
+512
diff --git a/test/702-LargeBranchOffset/info.txt b/test/702-LargeBranchOffset/info.txt
new file mode 100644
index 0000000..747263e
--- /dev/null
+++ b/test/702-LargeBranchOffset/info.txt
@@ -0,0 +1 @@
+Simple test to check if large branch offset works correctly.
diff --git a/test/702-LargeBranchOffset/src/Main.java.in b/test/702-LargeBranchOffset/src/Main.java.in
new file mode 100644
index 0000000..270d766
--- /dev/null
+++ b/test/702-LargeBranchOffset/src/Main.java.in
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define DO_2_TIMES(x) x x
+#define DO_4_TIMES(x) DO_2_TIMES(DO_2_TIMES(x))
+#define DO_16_TIMES(x) DO_4_TIMES(DO_4_TIMES(x))
+#define DO_256_TIMES(x) DO_16_TIMES(DO_16_TIMES(x))
+#define DO_512_TIMES(x) DO_256_TIMES(DO_2_TIMES(x))
+
+
+public class Main {
+ public static void main(String[] args) {
+ Main m = new Main();
+ System.out.println(m.foo(-1, -1));
+ System.out.println(m.foo(-1, +1));
+ System.out.println(m.foo(+1, -1));
+ System.out.println(m.foo(+1, +1));
+ System.out.println(m.value);
+ }
+
+ public int foo(int a, int b) {
+ if ( a >= 0 ) {
+ if ( b < 0 ) {
+ DO_512_TIMES( synchronized(lock) { value++; } )
+ return 2;
+ }
+ return 1;
+ }
+ return 0;
+ }
+
+ Object lock = new Object();
+ int value = 0;
+}