ARM: VIXL32: Improve codegen on CBZ/CBNZ
This patch improves codegen on CBZ/CBNZ.
CompareAndBranchIfZero/CompareAndBranchIfNonZero are introduced.
These two functions can generate CMP+Bcc or Cbz/Cbnz.
CMP+Bcc are generated by default.
If a hint is given (is_far_target = false) and rn and label
can all fit into Cbz/Cbnz, then Cbz/Cbnz is generated.
Prefer these two interfaces to using vixl32::MacroAssembler::Cbz/Cbnz.
In T32, Cbz/Cbnz instructions have following limitations:
- Far targets, which are over 126 bytes away, are not supported.
- Only low registers can be encoded.
- Backward branches are not supported.
Test: ART_USE_VIXL_ARM_BACKEND=true m test-art-host
Test: ART_USE_VIXL_ARM_BACKEND=true m test-art-target
Change-Id: I5d2ada19ea4f83dab78baf0cf78c72e99e58d946
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 110430f..7a1ec9f 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -303,7 +303,7 @@
vixl32::Register in_reg_hi = HighRegisterFrom(in);
vixl32::Label end;
__ Clz(out, in_reg_hi);
- __ Cbnz(in_reg_hi, &end);
+ __ CompareAndBranchIfNonZero(in_reg_hi, &end, /* far_target */ false);
__ Clz(out, in_reg_lo);
__ Add(out, out, 32);
__ Bind(&end);
@@ -345,7 +345,7 @@
vixl32::Label end;
__ Rbit(out, in_reg_lo);
__ Clz(out, out);
- __ Cbnz(in_reg_lo, &end);
+ __ CompareAndBranchIfNonZero(in_reg_lo, &end, /* far_target */ false);
__ Rbit(out, in_reg_hi);
__ Clz(out, out);
__ Add(out, out, 32);
@@ -1158,7 +1158,7 @@
if (can_slow_path) {
slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
codegen_->AddSlowPath(slow_path);
- __ Cbz(arg, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
}
// Reference equality check, return 0 if same reference.
@@ -1191,7 +1191,9 @@
}
// Shorter string is empty?
- __ Cbz(temp0, &end);
+ // Note that mirror::kUseStringCompression==true introduces lots of instructions,
+ // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
+ __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
if (mirror::kUseStringCompression) {
// Check if both strings using same compression style to use this comparison loop.
@@ -1414,7 +1416,7 @@
StringEqualsOptimizations optimizations(invoke);
if (!optimizations.GetArgumentNotNull()) {
// Check if input is null, return false if it is.
- __ Cbz(arg, &return_false);
+ __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false);
}
// Reference equality check, return true if same reference.
@@ -1442,7 +1444,7 @@
// Return true if both strings are empty. Even with string compression `count == 0` means empty.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
- __ Cbz(temp, &return_true);
+ __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
// Assertions that must hold in order to compare strings 4 bytes at a time.
DCHECK_ALIGNED(value_offset, 4);
@@ -1718,7 +1720,7 @@
} else if (length_is_input_length) {
// The only way the copy can succeed is if pos is zero.
vixl32::Register pos_reg = RegisterFrom(pos);
- __ Cbnz(pos_reg, slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
} else {
// Check that pos >= 0.
vixl32::Register pos_reg = RegisterFrom(pos);
@@ -1815,12 +1817,12 @@
if (!optimizations.GetSourceIsNotNull()) {
// Bail out if the source is null.
- __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
// Bail out if the destination is null.
- __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
}
// If the length is negative, bail out.
@@ -1865,13 +1867,13 @@
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
- __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp1` has been unpoisoned
// by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
// /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
__ Ldrh(temp1, MemOperand(temp1, primitive_offset));
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
}
// /* HeapReference<Class> */ temp1 = dest->klass_
@@ -1889,13 +1891,13 @@
// /* HeapReference<Class> */ temp2 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
- __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp2` has been unpoisoned
// by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
// /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
__ Ldrh(temp2, MemOperand(temp2, primitive_offset));
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
}
// For the same reason given earlier, `temp1` is not trashed by the
@@ -1918,7 +1920,7 @@
// comparison with null below, and this reference is not
// kept afterwards.
__ Ldr(temp1, MemOperand(temp1, super_offset));
- __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
__ Bind(&do_copy);
} else {
__ B(ne, intrinsic_slow_path->GetEntryLabel());
@@ -1944,24 +1946,24 @@
// Bail out if the destination is not a non primitive array.
// /* HeapReference<Class> */ temp3 = temp1->component_type_
__ Ldr(temp3, MemOperand(temp1, component_offset));
- __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
assembler->MaybeUnpoisonHeapReference(temp3);
// /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
__ Ldrh(temp3, MemOperand(temp3, primitive_offset));
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
}
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// Bail out if the source is not a non primitive array.
// /* HeapReference<Class> */ temp3 = temp2->component_type_
__ Ldr(temp3, MemOperand(temp2, component_offset));
- __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
assembler->MaybeUnpoisonHeapReference(temp3);
// /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
__ Ldrh(temp3, MemOperand(temp3, primitive_offset));
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
}
__ Cmp(temp1, temp2);
@@ -1978,7 +1980,7 @@
// /* HeapReference<Class> */ temp1 = temp1->super_class_
__ Ldr(temp1, MemOperand(temp1, super_offset));
// No need to unpoison the result, we're comparing against null.
- __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
__ Bind(&do_copy);
} else {
__ B(ne, intrinsic_slow_path->GetEntryLabel());
@@ -1994,7 +1996,7 @@
// /* HeapReference<Class> */ temp3 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
- __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp3` has been unpoisoned
// by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
} else {
@@ -2003,13 +2005,13 @@
assembler->MaybeUnpoisonHeapReference(temp1);
// /* HeapReference<Class> */ temp3 = temp1->component_type_
__ Ldr(temp3, MemOperand(temp1, component_offset));
- __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
assembler->MaybeUnpoisonHeapReference(temp3);
}
// /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
__ Ldrh(temp3, MemOperand(temp3, primitive_offset));
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+ __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
}
int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);