ARM: Avoid branches to branches

Generally speaking, this optimization applies to all code
generation visitors ending with a call to Bind(), which
includes intrinsics with kNoCall CallKind. However, no
changes are done for slow paths (which frequently end with
a branch to an exit label that is bound at the end of a
visitor).

Test: m test-art-target
Change-Id: Ie1a0c8c54ef76b01e7f0b23962c56c29ca8984a9
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 28095c4..ab83898 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -228,9 +228,11 @@
   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
 }
 
-static void GenNumberOfLeadingZeros(LocationSummary* locations,
+static void GenNumberOfLeadingZeros(HInvoke* invoke,
                                     Primitive::Type type,
-                                    ArmAssembler* assembler) {
+                                    CodeGeneratorARM* codegen) {
+  ArmAssembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
   Location in = locations->InAt(0);
   Register out = locations->Out().AsRegister<Register>();
 
@@ -240,11 +242,14 @@
     Register in_reg_lo = in.AsRegisterPairLow<Register>();
     Register in_reg_hi = in.AsRegisterPairHigh<Register>();
     Label end;
+    Label* final_label = codegen->GetFinalLabel(invoke, &end);
     __ clz(out, in_reg_hi);
-    __ CompareAndBranchIfNonZero(in_reg_hi, &end);
+    __ CompareAndBranchIfNonZero(in_reg_hi, final_label);
     __ clz(out, in_reg_lo);
     __ AddConstant(out, 32);
-    __ Bind(&end);
+    if (end.IsLinked()) {
+      __ Bind(&end);
+    }
   } else {
     __ clz(out, in.AsRegister<Register>());
   }
@@ -255,7 +260,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
-  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+  GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_);
 }
 
 void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -267,27 +272,32 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
-  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+  GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_);
 }
 
-static void GenNumberOfTrailingZeros(LocationSummary* locations,
+static void GenNumberOfTrailingZeros(HInvoke* invoke,
                                      Primitive::Type type,
-                                     ArmAssembler* assembler) {
+                                     CodeGeneratorARM* codegen) {
   DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
 
+  ArmAssembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
   Register out = locations->Out().AsRegister<Register>();
 
   if (type == Primitive::kPrimLong) {
     Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
     Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
     Label end;
+    Label* final_label = codegen->GetFinalLabel(invoke, &end);
     __ rbit(out, in_reg_lo);
     __ clz(out, out);
-    __ CompareAndBranchIfNonZero(in_reg_lo, &end);
+    __ CompareAndBranchIfNonZero(in_reg_lo, final_label);
     __ rbit(out, in_reg_hi);
     __ clz(out, out);
     __ AddConstant(out, 32);
-    __ Bind(&end);
+    if (end.IsLinked()) {
+      __ Bind(&end);
+    }
   } else {
     Register in = locations->InAt(0).AsRegister<Register>();
     __ rbit(out, in);
@@ -304,7 +314,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+  GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_);
 }
 
 void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -316,7 +326,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+  GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_);
 }
 
 static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
@@ -1313,6 +1323,7 @@
   Label end;
   Label return_true;
   Label return_false;
+  Label* final_label = codegen_->GetFinalLabel(invoke, &end);
 
   // Get offsets of count, value, and class fields within a string object.
   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
@@ -1386,12 +1397,15 @@
   // If loop does not result in returning false, we return true.
   __ Bind(&return_true);
   __ LoadImmediate(out, 1);
-  __ b(&end);
+  __ b(final_label);
 
   // Return false and exit the function.
   __ Bind(&return_false);
   __ LoadImmediate(out, 0);
-  __ Bind(&end);
+
+  if (end.IsLinked()) {
+    __ Bind(&end);
+  }
 }
 
 static void GenerateVisitStringIndexOf(HInvoke* invoke,
@@ -2474,13 +2488,14 @@
   Register dst_ptr = locations->GetTemp(2).AsRegister<Register>();
 
   Label done, compressed_string_loop;
+  Label* final_label = codegen_->GetFinalLabel(invoke, &done);
   // dst to be copied.
   __ add(dst_ptr, dstObj, ShifterOperand(data_offset));
   __ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1));
 
   __ subs(num_chr, srcEnd, ShifterOperand(srcBegin));
   // Early out for valid zero-length retrievals.
-  __ b(&done, EQ);
+  __ b(final_label, EQ);
 
   // src range to copy.
   __ add(src_ptr, srcObj, ShifterOperand(value_offset));
@@ -2517,7 +2532,7 @@
   __ b(&loop, GE);
 
   __ adds(num_chr, num_chr, ShifterOperand(4));
-  __ b(&done, EQ);
+  __ b(final_label, EQ);
 
   // Main loop for < 4 character case and remainder handling. Loads and stores one
   // 16-bit Java character at a time.
@@ -2528,7 +2543,7 @@
   __ b(&remainder, GT);
 
   if (mirror::kUseStringCompression) {
-    __ b(&done);
+    __ b(final_label);
 
     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
     DCHECK_EQ(c_char_size, 1u);
@@ -2542,7 +2557,9 @@
     __ b(&compressed_string_loop, GT);
   }
 
-  __ Bind(&done);
+  if (done.IsLinked()) {
+    __ Bind(&done);
+  }
 }
 
 void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) {