Merge "ARM: Improve String.equals() intrinsic for const strings."
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 69aaee2..1ed1b75 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -566,14 +566,6 @@
   MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
 }
 
-static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
-  LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
 static void GenAbsInteger(LocationSummary* locations,
                           bool is64bit,
                           MacroAssembler* masm) {
@@ -588,7 +580,7 @@
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) {
-  CreateIntToInt(arena_, invoke);
+  CreateIntToIntLocations(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) {
@@ -596,7 +588,7 @@
 }
 
 void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) {
-  CreateIntToInt(arena_, invoke);
+  CreateIntToIntLocations(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 7c43f2e..76c1410 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -331,6 +331,14 @@
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
+static void CreateIntToIntLocationsWithOverlap(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
@@ -2915,6 +2923,137 @@
   GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
 }
 
+static void GenHighestOneBit(HInvoke* invoke,
+                             Primitive::Type type,
+                             CodeGeneratorARMVIXL* codegen) {
+  DCHECK(Primitive::IsIntOrLongType(type));
+
+  ArmVIXLAssembler* assembler = codegen->GetAssembler();
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+  const vixl32::Register temp = temps.Acquire();
+
+  if (type == Primitive::kPrimLong) {
+    LocationSummary* locations = invoke->GetLocations();
+    Location in = locations->InAt(0);
+    Location out = locations->Out();
+
+    vixl32::Register in_reg_lo = LowRegisterFrom(in);
+    vixl32::Register in_reg_hi = HighRegisterFrom(in);
+    vixl32::Register out_reg_lo = LowRegisterFrom(out);
+    vixl32::Register out_reg_hi = HighRegisterFrom(out);
+
+    __ Mov(temp, 0x80000000);  // Modified immediate.
+    __ Clz(out_reg_lo, in_reg_lo);
+    __ Clz(out_reg_hi, in_reg_hi);
+    __ Lsr(out_reg_lo, temp, out_reg_lo);
+    __ Lsrs(out_reg_hi, temp, out_reg_hi);
+
+    // Discard result for lowest 32 bits if highest 32 bits are not zero.
+    // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+    // we check that the output is in a low register, so that a 16-bit MOV
+    // encoding can be used. If output is in a high register, then we generate
+    // 4 more bytes of code to avoid a branch.
+    Operand mov_src(0);
+    if (!out_reg_lo.IsLow()) {
+      __ Mov(LeaveFlags, temp, 0);
+      mov_src = Operand(temp);
+    }
+    ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
+                                  2 * vixl32::k16BitT32InstructionSizeInBytes,
+                                  CodeBufferCheckScope::kExactSize);
+    __ it(ne);
+    __ mov(ne, out_reg_lo, mov_src);
+  } else {
+    vixl32::Register out = OutputRegister(invoke);
+    vixl32::Register in = InputRegisterAt(invoke, 0);
+
+    __ Mov(temp, 0x80000000);  // Modified immediate.
+    __ Clz(out, in);
+    __ Lsr(out, temp, out);
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  GenHighestOneBit(invoke, Primitive::kPrimInt, codegen_);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocationsWithOverlap(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
+  GenHighestOneBit(invoke, Primitive::kPrimLong, codegen_);
+}
+
+static void GenLowestOneBit(HInvoke* invoke,
+                            Primitive::Type type,
+                            CodeGeneratorARMVIXL* codegen) {
+  DCHECK(Primitive::IsIntOrLongType(type));
+
+  ArmVIXLAssembler* assembler = codegen->GetAssembler();
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+  const vixl32::Register temp = temps.Acquire();
+
+  if (type == Primitive::kPrimLong) {
+    LocationSummary* locations = invoke->GetLocations();
+    Location in = locations->InAt(0);
+    Location out = locations->Out();
+
+    vixl32::Register in_reg_lo = LowRegisterFrom(in);
+    vixl32::Register in_reg_hi = HighRegisterFrom(in);
+    vixl32::Register out_reg_lo = LowRegisterFrom(out);
+    vixl32::Register out_reg_hi = HighRegisterFrom(out);
+
+    __ Rsb(out_reg_hi, in_reg_hi, 0);
+    __ Rsb(out_reg_lo, in_reg_lo, 0);
+    __ And(out_reg_hi, out_reg_hi, in_reg_hi);
+    // The result of this operation is 0 iff in_reg_lo is 0
+    __ Ands(out_reg_lo, out_reg_lo, in_reg_lo);
+
+    // Discard result for highest 32 bits if lowest 32 bits are not zero.
+    // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+    // we check that the output is in a low register, so that a 16-bit MOV
+    // encoding can be used. If output is in a high register, then we generate
+    // 4 more bytes of code to avoid a branch.
+    Operand mov_src(0);
+    if (!out_reg_lo.IsLow()) {
+      __ Mov(LeaveFlags, temp, 0);
+      mov_src = Operand(temp);
+    }
+    ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
+                                  2 * vixl32::k16BitT32InstructionSizeInBytes,
+                                  CodeBufferCheckScope::kExactSize);
+    __ it(ne);
+    __ mov(ne, out_reg_hi, mov_src);
+  } else {
+    vixl32::Register out = OutputRegister(invoke);
+    vixl32::Register in = InputRegisterAt(invoke, 0);
+
+    __ Rsb(temp, in, 0);
+    __ And(out, temp, in);
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  GenLowestOneBit(invoke, Primitive::kPrimInt, codegen_);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocationsWithOverlap(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
+  GenLowestOneBit(invoke, Primitive::kPrimLong, codegen_);
+}
+
 void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
                                                             LocationSummary::kNoCall,
@@ -3212,10 +3351,6 @@
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit)
 
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
diff --git a/tools/jfuzz/README.md b/tools/jfuzz/README.md
index c87e714..10d175b 100644
--- a/tools/jfuzz/README.md
+++ b/tools/jfuzz/README.md
@@ -48,6 +48,7 @@
                           [--report_script=SCRIPT]
                           [--jfuzz_arg=ARG]
                           [--true_divergence]
+                          [--use_dx]
 
 where
 
@@ -63,6 +64,7 @@
     --report_script   : path to script called for each divergence
     --jfuzz_arg       : argument for jfuzz
     --true_divergence : don't bisect timeout divergences
+    --use_dx          : use dx (rather than jack)
 
 How to start JFuzz nightly testing
 ==================================
@@ -83,12 +85,14 @@
                           [--num_tests=NUM_TESTS]
                           [--num_inputs=NUM_INPUTS]
                           [--device=DEVICE]
+                          [--use_dx]
 
 where
 
     --num_tests : number of tests to run (10000 by default)
     --num_inputs: number of JFuzz programs to generate
     --device    : target device serial number (passed to adb -s)
+    --use_dx    : use dx (rather than jack)
 
 Background
 ==========
diff --git a/tools/jfuzz/run_dex_fuzz_test.py b/tools/jfuzz/run_dex_fuzz_test.py
index c1d2e4f..ca0aec0 100755
--- a/tools/jfuzz/run_dex_fuzz_test.py
+++ b/tools/jfuzz/run_dex_fuzz_test.py
@@ -19,6 +19,7 @@
 import shutil
 import sys
 
+from glob import glob
 from subprocess import call
 from tempfile import mkdtemp
 
@@ -40,13 +41,14 @@
 class DexFuzzTester(object):
   """Tester that feeds JFuzz programs into DexFuzz testing."""
 
-  def  __init__(self, num_tests, num_inputs, device):
+  def  __init__(self, num_tests, num_inputs, device, use_dx):
     """Constructor for the tester.
 
     Args:
       num_tests: int, number of tests to run
       num_inputs: int, number of JFuzz programs to generate
       device: string, target device serial number (or None)
+      use_dx: boolean, if True use dx rather than jack
     """
     self._num_tests = num_tests
     self._num_inputs = num_inputs
@@ -56,6 +58,7 @@
     self._dexfuzz_dir = None
     self._inputs_dir = None
     self._dexfuzz_env = None
+    self._use_dx = use_dx
 
   def __enter__(self):
     """On entry, enters new temp directory after saving current directory.
@@ -100,6 +103,35 @@
     self.GenerateJFuzzPrograms()
     self.RunDexFuzz()
 
+  def CompileOnHost(self):
+    """Compiles Test.java into classes.dex using either javac/dx or jack.
+
+    Raises:
+      FatalError: error when compilation fails
+    """
+    if self._use_dx:
+      if RunCommand(['javac', 'Test.java'],
+                    out=None, err='jerr.txt', timeout=30) != RetCode.SUCCESS:
+        print('Unexpected error while running javac')
+        raise FatalError('Unexpected error while running javac')
+      cfiles = glob('*.class')
+      if RunCommand(['dx', '--dex', '--output=classes.dex'] + cfiles,
+                    out=None, err='dxerr.txt', timeout=30) != RetCode.SUCCESS:
+        print('Unexpected error while running dx')
+        raise FatalError('Unexpected error while running dx')
+      # Cleanup on success (nothing to see).
+      for cfile in cfiles:
+        os.unlink(cfile)
+      os.unlink('jerr.txt')
+      os.unlink('dxerr.txt')
+    else:
+      jack_args = ['-cp', GetJackClassPath(), '--output-dex', '.', 'Test.java']
+      if RunCommand(['jack'] + jack_args, out=None, err='jackerr.txt',
+                    timeout=30) != RetCode.SUCCESS:
+        print('Unexpected error while running Jack')
+        raise FatalError('Unexpected error while running Jack')
+      # Cleanup on success (nothing to see).
+      os.unlink('jackerr.txt')
 
   def GenerateJFuzzPrograms(self):
     """Generates JFuzz programs.
@@ -109,17 +141,12 @@
     """
     os.chdir(self._inputs_dir)
     for i in range(1, self._num_inputs + 1):
-      jack_args = ['-cp', GetJackClassPath(), '--output-dex', '.', 'Test.java']
       if RunCommand(['jfuzz'], out='Test.java', err=None) != RetCode.SUCCESS:
         print('Unexpected error while running JFuzz')
         raise FatalError('Unexpected error while running JFuzz')
-      if RunCommand(['jack'] + jack_args, out=None, err='jackerr.txt',
-                    timeout=30) != RetCode.SUCCESS:
-        print('Unexpected error while running Jack')
-        raise FatalError('Unexpected error while running Jack')
+      self.CompileOnHost()
       shutil.move('Test.java', '../Test' + str(i) + '.java')
       shutil.move('classes.dex', 'classes' + str(i) + '.dex')
-    os.unlink('jackerr.txt')
 
   def RunDexFuzz(self):
     """Starts the DexFuzz testing."""
@@ -152,10 +179,12 @@
                       type=int, help='number of tests to run')
   parser.add_argument('--num_inputs', default=10,
                       type=int, help='number of JFuzz program to generate')
+  parser.add_argument('--use_dx', default=False, action='store_true',
+                      help='use dx (rather than jack)')
   parser.add_argument('--device', help='target device serial number')
   args = parser.parse_args()
   # Run the DexFuzz tester.
-  with DexFuzzTester(args.num_tests, args.num_inputs, args.device) as fuzzer:
+  with DexFuzzTester(args.num_tests, args.num_inputs, args.device, args.use_dx) as fuzzer:
     fuzzer.Run()
 
 if __name__ == '__main__':
diff --git a/tools/jfuzz/run_jfuzz_test.py b/tools/jfuzz/run_jfuzz_test.py
index 58bc737..dac1c79 100755
--- a/tools/jfuzz/run_jfuzz_test.py
+++ b/tools/jfuzz/run_jfuzz_test.py
@@ -606,7 +606,7 @@
   parser.add_argument('--true_divergence', default=False, action='store_true',
                       help='don\'t bisect timeout divergences')
   parser.add_argument('--use_dx', default=False, action='store_true',
-                      help='use old-style dx (rather than jack)')
+                      help='use dx (rather than jack)')
   args = parser.parse_args()
   if args.mode1 == args.mode2:
     raise FatalError('Identical execution modes given')