ARM: VIXL32: Use 16-bit encoding for B(cond,...) where possible.
If conditional branch's target is known to be not far from branch
(within the range of 254 bytes), 16-bit encoding can be used.
However, we can't assume this by default for branches to a basic
block or a slow path; if we do and fail the range requirement,
veneer pool will be emitted.
Test: ART_USE_VIXL_ARM_BACKEND=true m test-art-host
Test: ART_USE_VIXL_ARM_BACKEND=true m test-art-target
Change-Id: I2fbe6d1a43bc2d1b54472c2c3fe05a575e5634f2
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 3d6415d..2167f4b 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -802,7 +802,7 @@
// as-is.
vixl32::Label done;
__ Cmp(temp1_, ref_reg);
- __ B(eq, &done);
+ __ B(eq, &done, /* far_target */ false);
// Update the the holder's field atomically. This may fail if
// mutator updates before us, but it's OK. This is achieved
@@ -854,11 +854,11 @@
__ clrex(ne);
}
- __ B(ne, &exit_loop);
+ __ B(ne, &exit_loop, /* far_target */ false);
__ Strex(tmp, value, MemOperand(tmp_ptr));
__ Cmp(tmp, 1);
- __ B(eq, &loop_head);
+ __ B(eq, &loop_head, /* far_target */ false);
__ Bind(&exit_loop);
@@ -3641,7 +3641,7 @@
__ And(shift_right, RegisterFrom(rhs), 0x1F);
__ Lsrs(shift_left, RegisterFrom(rhs), 6);
__ Rsb(LeaveFlags, shift_left, shift_right, Operand::From(kArmBitsPerWord));
- __ B(cc, &shift_by_32_plus_shift_right);
+ __ B(cc, &shift_by_32_plus_shift_right, /* far_target */ false);
// out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
// out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
@@ -4113,8 +4113,8 @@
}
case Primitive::kPrimLong: {
__ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare.
- __ B(lt, &less);
- __ B(gt, &greater);
+ __ B(lt, &less, /* far_target */ false);
+ __ B(gt, &greater, /* far_target */ false);
// Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags.
__ Mov(out, 0);
__ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare.
@@ -4135,8 +4135,8 @@
UNREACHABLE();
}
- __ B(eq, &done);
- __ B(less_cond, &less);
+ __ B(eq, &done, /* far_target */ false);
+ __ B(less_cond, &less, /* far_target */ false);
__ Bind(&greater);
__ Mov(out, 1);
@@ -4933,7 +4933,7 @@
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
- __ B(cs, &uncompressed_load);
+ __ B(cs, &uncompressed_load, /* far_target */ false);
GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
RegisterFrom(out_loc),
obj,
@@ -4972,7 +4972,7 @@
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
- __ B(cs, &uncompressed_load);
+ __ B(cs, &uncompressed_load, /* far_target */ false);
__ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
__ B(&done);
__ Bind(&uncompressed_load);
@@ -5271,7 +5271,7 @@
if (instruction->StaticTypeOfArrayIsObjectArray()) {
vixl32::Label do_put;
- __ B(eq, &do_put);
+ __ B(eq, &do_put, /* far_target */ false);
// If heap poisoning is enabled, the `temp1` reference has
// not been unpoisoned yet; unpoison it now.
GetAssembler()->MaybeUnpoisonHeapReference(temp1);
@@ -6212,7 +6212,7 @@
kCompilerReadBarrierOption);
__ Cmp(out, cls);
// Classes must be equal for the instanceof to succeed.
- __ B(ne, &zero);
+ __ B(ne, &zero, /* far_target */ false);
__ Mov(out, 1);
__ B(&done);
break;
@@ -6239,7 +6239,7 @@
// If `out` is null, we use it for the result, and jump to `done`.
__ CompareAndBranchIfZero(out, &done, /* far_target */ false);
__ Cmp(out, cls);
- __ B(ne, &loop);
+ __ B(ne, &loop, /* far_target */ false);
__ Mov(out, 1);
if (zero.IsReferenced()) {
__ B(&done);
@@ -6259,7 +6259,7 @@
vixl32::Label loop, success;
__ Bind(&loop);
__ Cmp(out, cls);
- __ B(eq, &success);
+ __ B(eq, &success, /* far_target */ false);
// /* HeapReference<Class> */ out = out->super_class_
GenerateReferenceLoadOneRegister(instruction,
out_loc,
@@ -6288,7 +6288,7 @@
// Do an exact check.
vixl32::Label exact_check;
__ Cmp(out, cls);
- __ B(eq, &exact_check);
+ __ B(eq, &exact_check, /* far_target */ false);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ out = out->component_type_
GenerateReferenceLoadOneRegister(instruction,
@@ -6490,7 +6490,7 @@
// Otherwise, compare the classes.
__ Cmp(temp, cls);
- __ B(ne, &loop);
+ __ B(ne, &loop, /* far_target */ false);
break;
}
@@ -6507,7 +6507,7 @@
vixl32::Label loop;
__ Bind(&loop);
__ Cmp(temp, cls);
- __ B(eq, &done);
+ __ B(eq, &done, /* far_target */ false);
// /* HeapReference<Class> */ temp = temp->super_class_
GenerateReferenceLoadOneRegister(instruction,
@@ -6535,7 +6535,7 @@
// Do an exact check.
__ Cmp(temp, cls);
- __ B(eq, &done);
+ __ B(eq, &done, /* far_target */ false);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
@@ -6599,7 +6599,7 @@
__ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2);
// Compare the classes and continue the loop if they do not match.
__ Cmp(cls, RegisterFrom(maybe_temp3_loc));
- __ B(ne, &start_loop);
+ __ B(ne, &start_loop, /* far_target */ false);
break;
}
}
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 95551c8..fc69f79 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -187,7 +187,7 @@
assembler->MaybePoisonHeapReference(tmp);
__ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
__ Cmp(src_curr_addr, src_stop_addr);
- __ B(ne, &loop);
+ __ B(ne, &loop, /* far_target */ false);
__ B(GetExitLabel());
}
@@ -851,7 +851,7 @@
__ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg));
__ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg));
__ Cmp(temp_lo, 0);
- __ B(ne, &loop_head);
+ __ B(ne, &loop_head, /* far_target */ false);
} else {
__ Strd(value_lo, value_hi, MemOperand(base, offset));
}
@@ -1062,7 +1062,7 @@
__ cmp(eq, tmp, 1);
}
- __ B(eq, &loop_head);
+ __ B(eq, &loop_head, /* far_target */ false);
__ Dmb(vixl32::ISH);
@@ -1238,23 +1238,23 @@
__ Ldr(temp_reg, MemOperand(str, temp1));
__ Ldr(temp2, MemOperand(arg, temp1));
__ Cmp(temp_reg, temp2);
- __ B(ne, &find_char_diff);
+ __ B(ne, &find_char_diff, /* far_target */ false);
__ Add(temp1, temp1, char_size * 2);
__ Ldr(temp_reg, MemOperand(str, temp1));
__ Ldr(temp2, MemOperand(arg, temp1));
__ Cmp(temp_reg, temp2);
- __ B(ne, &find_char_diff_2nd_cmp);
+ __ B(ne, &find_char_diff_2nd_cmp, /* far_target */ false);
__ Add(temp1, temp1, char_size * 2);
// With string compression, we have compared 8 bytes, otherwise 4 chars.
__ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
- __ B(hi, &loop);
+ __ B(hi, &loop, /* far_target */ false);
__ B(&end);
__ Bind(&find_char_diff_2nd_cmp);
if (mirror::kUseStringCompression) {
__ Subs(temp0, temp0, 4); // 4 bytes previously compared.
- __ B(ls, &end); // Was the second comparison fully beyond the end?
+ __ B(ls, &end, /* far_target */ false); // Was the second comparison fully beyond the end?
} else {
// Without string compression, we can start treating temp0 as signed
// and rely on the signed comparison below.
@@ -1282,7 +1282,7 @@
// the remaining string data, so just return length diff (out).
// The comparison is unsigned for string compression, otherwise signed.
__ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
- __ B((mirror::kUseStringCompression ? ls : le), &end);
+ __ B((mirror::kUseStringCompression ? ls : le), &end, /* far_target */ false);
// Extract the characters and calculate the difference.
if (mirror::kUseStringCompression) {
@@ -1349,9 +1349,9 @@
__ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
__ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
__ Cmp(temp_reg, temp3);
- __ B(ne, &different_compression_diff);
+ __ B(ne, &different_compression_diff, /* far_target */ false);
__ Subs(temp0, temp0, 2);
- __ B(hi, &different_compression_loop);
+ __ B(hi, &different_compression_loop, /* far_target */ false);
__ B(&end);
// Calculate the difference.
@@ -1427,7 +1427,7 @@
// Reference equality check, return true if same reference.
__ Cmp(str, arg);
- __ B(eq, &return_true);
+ __ B(eq, &return_true, /* far_target */ false);
if (!optimizations.GetArgumentIsString()) {
// Instanceof check for the argument by comparing class fields.
@@ -1437,7 +1437,7 @@
__ Ldr(temp, MemOperand(str, class_offset));
__ Ldr(temp1, MemOperand(arg, class_offset));
__ Cmp(temp, temp1);
- __ B(ne, &return_false);
+ __ B(ne, &return_false, /* far_target */ false);
}
// Load `count` fields of this and argument strings.
@@ -1446,7 +1446,7 @@
// Check if `count` fields are equal, return false if they're not.
// Also compares the compression style, if differs return false.
__ Cmp(temp, temp1);
- __ B(ne, &return_false);
+ __ B(ne, &return_false, /* far_target */ false);
// Return true if both strings are empty. Even with string compression `count == 0` means empty.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
@@ -1477,10 +1477,10 @@
__ Ldr(temp2, MemOperand(arg, temp1));
__ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
__ Cmp(out, temp2);
- __ B(ne, &return_false);
+ __ B(ne, &return_false, /* far_target */ false);
// With string compression, we have compared 4 bytes, otherwise 2 chars.
__ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
- __ B(hi, &loop);
+ __ B(hi, &loop, /* far_target */ false);
// Return true and exit the function.
// If loop does not result in returning false, we return true.
@@ -1800,7 +1800,7 @@
} else {
if (!optimizations.GetDestinationIsSource()) {
__ Cmp(src, dest);
- __ B(ne, &conditions_on_positions_validated);
+ __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
}
__ Cmp(RegisterFrom(dest_pos), src_pos_constant);
__ B(gt, intrinsic_slow_path->GetEntryLabel());
@@ -1808,7 +1808,7 @@
} else {
if (!optimizations.GetDestinationIsSource()) {
__ Cmp(src, dest);
- __ B(ne, &conditions_on_positions_validated);
+ __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
}
if (dest_pos.IsConstant()) {
int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
@@ -1916,7 +1916,7 @@
if (optimizations.GetDestinationIsTypedObjectArray()) {
vixl32::Label do_copy;
- __ B(eq, &do_copy);
+ __ B(eq, &do_copy, /* far_target */ false);
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
@@ -1976,7 +1976,7 @@
if (optimizations.GetDestinationIsTypedObjectArray()) {
vixl32::Label do_copy;
- __ B(eq, &do_copy);
+ __ B(eq, &do_copy, /* far_target */ false);
if (!did_unpoison) {
assembler->MaybeUnpoisonHeapReference(temp1);
}
@@ -2069,7 +2069,7 @@
// Don't enter copy loop if `length == 0`.
__ Cmp(temp1, temp3);
- __ B(eq, &done);
+ __ B(eq, &done, /* far_target */ false);
// /* int32_t */ monitor = src->monitor_
__ Ldr(temp2, MemOperand(src, monitor_offset));
@@ -2122,7 +2122,7 @@
}
__ Cmp(temp1, temp3);
- __ B(ne, &loop);
+ __ B(ne, &loop, /* far_target */ false);
__ Bind(read_barrier_slow_path->GetExitLabel());
__ Bind(&done);
@@ -2142,7 +2142,7 @@
// poison/unpoison.
vixl32::Label loop, done;
__ Cmp(temp1, temp3);
- __ B(eq, &done);
+ __ B(eq, &done, /* far_target */ false);
__ Bind(&loop);
{
@@ -2154,7 +2154,7 @@
}
__ Cmp(temp1, temp3);
- __ B(ne, &loop);
+ __ B(ne, &loop, /* far_target */ false);
__ Bind(&done);
}
@@ -2560,7 +2560,7 @@
__ Subs(num_chr, srcEnd, srcBegin);
// Early out for valid zero-length retrievals.
- __ B(eq, &done);
+ __ B(eq, &done, /* far_target */ false);
// src range to copy.
__ Add(src_ptr, srcObj, value_offset);
@@ -2576,7 +2576,7 @@
__ Ldr(temp, MemOperand(srcObj, count_offset));
__ Tst(temp, 1);
temps.Release(temp);
- __ B(eq, &compressed_string_preloop);
+ __ B(eq, &compressed_string_preloop, /* far_target */ false);
}
__ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
@@ -2586,7 +2586,7 @@
temp = temps.Acquire();
// Save repairing the value of num_chr on the < 4 character path.
__ Subs(temp, num_chr, 4);
- __ B(lt, &remainder);
+ __ B(lt, &remainder, /* far_target */ false);
// Keep the result of the earlier subs, we are going to fetch at least 4 characters.
__ Mov(num_chr, temp);
@@ -2601,10 +2601,10 @@
__ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
__ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
temps.Release(temp);
- __ B(ge, &loop);
+ __ B(ge, &loop, /* far_target */ false);
__ Adds(num_chr, num_chr, 4);
- __ B(eq, &done);
+ __ B(eq, &done, /* far_target */ false);
// Main loop for < 4 character case and remainder handling. Loads and stores one
// 16-bit Java character at a time.
@@ -2614,7 +2614,7 @@
__ Subs(num_chr, num_chr, 1);
__ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
temps.Release(temp);
- __ B(gt, &remainder);
+ __ B(gt, &remainder, /* far_target */ false);
if (mirror::kUseStringCompression) {
__ B(&done);
@@ -2630,7 +2630,7 @@
__ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
temps.Release(temp);
__ Subs(num_chr, num_chr, 1);
- __ B(gt, &compressed_string_loop);
+ __ B(gt, &compressed_string_loop, /* far_target */ false);
}
__ Bind(&done);
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index 453c90a..e5eef37 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -444,7 +444,7 @@
return;
}
Cmp(rn, 0);
- B(eq, label);
+ B(eq, label, is_far_target);
}
void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn,
@@ -455,16 +455,16 @@
return;
}
Cmp(rn, 0);
- B(ne, label);
+ B(ne, label, is_far_target);
}
void ArmVIXLMacroAssembler::B(vixl32::Label* label) {
if (!label->IsBound()) {
// Try to use 16-bit T2 encoding of B instruction.
DCHECK(OutsideITBlock());
- ExactAssemblyScope ass(this,
- kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
+ ExactAssemblyScope guard(this,
+ k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
b(al, Narrow, label);
AddBranchLabel(label);
return;
@@ -472,7 +472,17 @@
MacroAssembler::B(label);
}
-void ArmVIXLMacroAssembler::B(vixl32::Condition cond, vixl32::Label* label) {
+void ArmVIXLMacroAssembler::B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target) {
+ if (!label->IsBound() && !is_far_target) {
+ // Try to use 16-bit T2 encoding of B instruction.
+ DCHECK(OutsideITBlock());
+ ExactAssemblyScope guard(this,
+ k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ b(cond, Narrow, label);
+ AddBranchLabel(label);
+ return;
+ }
// To further reduce the Bcc encoding size and use 16-bit T1 encoding,
// we can provide a hint to this function: i.e. far_target=false.
// By default this function uses 'EncodingSizeType::Best' which generates 32-bit T3 encoding.
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index 5661249..3cf6a2e 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -114,7 +114,7 @@
// TODO: Remove when MacroAssembler::Add(FlagsUpdate, Condition, Register, Register, Operand)
// makes the right decision about 16-bit encodings.
void Add(vixl32::Register rd, vixl32::Register rn, const vixl32::Operand& operand) {
- if (rd.Is(rn)) {
+ if (rd.Is(rn) && operand.IsPlainRegister()) {
MacroAssembler::Add(rd, rn, operand);
} else {
MacroAssembler::Add(vixl32::DontCare, rd, rn, operand);
@@ -124,7 +124,10 @@
// These interfaces try to use 16-bit T2 encoding of B instruction.
void B(vixl32::Label* label);
- void B(vixl32::Condition cond, vixl32::Label* label);
+ // For B(label), we always try to use Narrow encoding, because 16-bit T2 encoding supports
+ // jumping within 2KB range. For B(cond, label), because the supported branch range is 256
+ // bytes; we use the far_target hint to try to use 16-bit T1 encoding for short range jumps.
+ void B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target = true);
};
class ArmVIXLAssembler FINAL : public Assembler {