AArch64: Add memcmp16() for Arm64; ensure xSELF not clobbered
This patch modifies memcmp() to memcmp16(). Please note that this
implementation of memcmp16() is based on the bionic's memcmp().
However, to reflect a recent specification change, the file has been
modified to respect the new String.compareTo() behavior.
A test for memcmp16() has been added. The string_compareto test in
stub_test has been changed to invoke __memcmp16 in assembly stubs.
Add artIsAssignableFromCode to the list of native downcalls to
store and reload x18. Remove CheckSuspendFromCode, as it is unused.
Signed-off-by: Serban Constantinescu <serban.constantinescu@arm.com>
Change-Id: Ie0b5425ecfb62906d29a5d02e84c7e07ffb34a11
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 45bdbcb..28a91c1 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -66,6 +66,7 @@
RUNTIME_GTEST_COMMON_SRC_FILES := \
runtime/arch/arch_test.cc \
+ runtime/arch/memcmp16_test.cc \
runtime/arch/stub_test.cc \
runtime/barrier_test.cc \
runtime/base/bit_field_test.cc \
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 0b7272c..254faac 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -180,7 +180,7 @@
EXPECT_EQ(80U, sizeof(OatHeader));
EXPECT_EQ(8U, sizeof(OatMethodOffsets));
EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
- EXPECT_EQ(78 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+ EXPECT_EQ(77 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
}
TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 3ce053c..7f5cf0c 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -223,6 +223,7 @@
arch/arm64/context_arm64.cc \
arch/arm64/entrypoints_init_arm64.cc \
arch/arm64/jni_entrypoints_arm64.S \
+ arch/arm64/memcmp16_arm64.S \
arch/arm64/portable_entrypoints_arm64.S \
arch/arm64/quick_entrypoints_arm64.S \
arch/arm64/thread_arm64.cc \
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index ebceb63..3fa09cb 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -46,9 +46,6 @@
extern "C" void* art_quick_initialize_type_and_verify_access(uint32_t, void*);
extern "C" void* art_quick_resolve_string(void*, uint32_t);
-// Exception entrypoints.
-extern "C" void* GetAndClearException(Thread*);
-
// Field entrypoints.
extern "C" int art_quick_set32_instance(uint32_t, void*, int32_t);
extern "C" int art_quick_set32_static(uint32_t, int32_t);
@@ -116,7 +113,6 @@
extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
// Thread entrypoints.
-extern void CheckSuspendFromCode(Thread* thread);
extern "C" void art_quick_test_suspend();
// Throw entrypoints.
@@ -226,7 +222,6 @@
qpoints->pInvokeVirtualTrampolineWithAccessCheck = art_quick_invoke_virtual_trampoline_with_access_check;
// Thread
- qpoints->pCheckSuspend = CheckSuspendFromCode;
qpoints->pTestSuspend = art_quick_test_suspend;
// Throws
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index cbb2c27..c19b79e 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -35,7 +35,7 @@
extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*);
// Cast entrypoints.
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
+extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass,
const mirror::Class* ref_class);
extern "C" void art_quick_check_cast(void*, void*);
@@ -45,9 +45,6 @@
extern "C" void* art_quick_initialize_type_and_verify_access(uint32_t, void*);
extern "C" void* art_quick_resolve_string(void*, uint32_t);
-// Exception entrypoints.
-extern "C" void* GetAndClearException(Thread*);
-
// Field entrypoints.
extern "C" int art_quick_set32_instance(uint32_t, void*, int32_t);
extern "C" int art_quick_set32_static(uint32_t, int32_t);
@@ -96,7 +93,6 @@
extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
// Thread entrypoints.
-extern void CheckSuspendFromCode(Thread* thread);
extern "C" void art_quick_test_suspend();
// Throw entrypoints.
@@ -129,7 +125,7 @@
ResetQuickAllocEntryPoints(qpoints);
// Cast
- qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
+ qpoints->pInstanceofNonTrivial = art_quick_assignable_from_code;
qpoints->pCheckCast = art_quick_check_cast;
// DexCache
@@ -209,7 +205,6 @@
qpoints->pInvokeVirtualTrampolineWithAccessCheck = art_quick_invoke_virtual_trampoline_with_access_check;
// Thread
- qpoints->pCheckSuspend = CheckSuspendFromCode;
qpoints->pTestSuspend = art_quick_test_suspend;
// Throws
diff --git a/runtime/arch/arm64/memcmp.S b/runtime/arch/arm64/memcmp.S
deleted file mode 100644
index d73fb67..0000000
--- a/runtime/arch/arm64/memcmp.S
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- */
-
-#include <private/bionic_asm.h>
-
-/* Parameters and result. */
-#define src1 x0
-#define src2 x1
-#define limit x2
-#define result x0
-
-/* Internal variables. */
-#define data1 x3
-#define data1w w3
-#define data2 x4
-#define data2w w4
-#define has_nul x5
-#define diff x6
-#define endloop x7
-#define tmp1 x8
-#define tmp2 x9
-#define tmp3 x10
-#define pos x11
-#define limit_wd x12
-#define mask x13
-
-ENTRY(memcmp)
- cbz limit, .Lret0
- eor tmp1, src1, src2
- tst tmp1, #7
- b.ne .Lmisaligned8
- ands tmp1, src1, #7
- b.ne .Lmutual_align
- add limit_wd, limit, #7
- lsr limit_wd, limit_wd, #3
- /* Start of performance-critical section -- one 64B cache line. */
-.Lloop_aligned:
- ldr data1, [src1], #8
- ldr data2, [src2], #8
-.Lstart_realigned:
- subs limit_wd, limit_wd, #1
- eor diff, data1, data2 /* Non-zero if differences found. */
- csinv endloop, diff, xzr, ne /* Last Dword or differences. */
- cbz endloop, .Lloop_aligned
- /* End of performance-critical section -- one 64B cache line. */
-
- /* Not reached the limit, must have found a diff. */
- cbnz limit_wd, .Lnot_limit
-
- /* Limit % 8 == 0 => all bytes significant. */
- ands limit, limit, #7
- b.eq .Lnot_limit
-
- lsl limit, limit, #3 /* Bits -> bytes. */
- mov mask, #~0
-#ifdef __AARCH64EB__
- lsr mask, mask, limit
-#else
- lsl mask, mask, limit
-#endif
- bic data1, data1, mask
- bic data2, data2, mask
-
- orr diff, diff, mask
-.Lnot_limit:
-
-#ifndef __AARCH64EB__
- rev diff, diff
- rev data1, data1
- rev data2, data2
-#endif
- /* The MS-non-zero bit of DIFF marks either the first bit
- that is different, or the end of the significant data.
- Shifting left now will bring the critical information into the
- top bits. */
- clz pos, diff
- lsl data1, data1, pos
- lsl data2, data2, pos
- /* But we need to zero-extend (char is unsigned) the value and then
- perform a signed 32-bit subtraction. */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
- ret
-
-.Lmutual_align:
- /* Sources are mutually aligned, but are not currently at an
- alignment boundary. Round down the addresses and then mask off
- the bytes that precede the start point. */
- bic src1, src1, #7
- bic src2, src2, #7
- add limit, limit, tmp1 /* Adjust the limit for the extra. */
- lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
- ldr data1, [src1], #8
- neg tmp1, tmp1 /* Bits to alignment -64. */
- ldr data2, [src2], #8
- mov tmp2, #~0
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
-#endif
- add limit_wd, limit, #7
- orr data1, data1, tmp2
- orr data2, data2, tmp2
- lsr limit_wd, limit_wd, #3
- b .Lstart_realigned
-
-.Lret0:
- mov result, #0
- ret
-
- .p2align 6
-.Lmisaligned8:
- sub limit, limit, #1
-1:
- /* Perhaps we can do better than this. */
- ldrb data1w, [src1], #1
- ldrb data2w, [src2], #1
- subs limit, limit, #1
- ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
- b.eq 1b
- sub result, data1, data2
- ret
-END(memcmp)
diff --git a/runtime/arch/arm64/memcmp16_arm64.S b/runtime/arch/arm64/memcmp16_arm64.S
new file mode 100644
index 0000000..582940a
--- /dev/null
+++ b/runtime/arch/arm64/memcmp16_arm64.S
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ */
+
+#ifndef ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_
+#define ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_
+
+#include "asm_support_arm64.S"
+
+/* Parameters and result. */
+#define src1 x0
+#define src2 x1
+#define limit x2
+#define result x0
+
+/* Internal variables. */
+#define data1 x3
+#define data1w w3
+#define data2 x4
+#define data2w w4
+#define has_nul x5
+#define diff x6
+#define endloop x7
+#define tmp1 x8
+#define tmp2 x9
+#define tmp3 x10
+#define limit_wd x12
+#define mask x13
+
+// WARNING: If you change this code to use x14 and x15, you must also change
+// art_quick_string_compareto, which relies on these temps being unused.
+
+ENTRY __memcmp16
+ cbz limit, .Lret0
+ lsl limit, limit, #1 /* Half-words to bytes. */
+ eor tmp1, src1, src2
+ tst tmp1, #7
+ b.ne .Lmisaligned8
+ ands tmp1, src1, #7
+ b.ne .Lmutual_align
+ add limit_wd, limit, #7
+ lsr limit_wd, limit_wd, #3
+ /* Start of performance-critical section -- one 64B cache line. */
+.Lloop_aligned:
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+.Lstart_realigned:
+ subs limit_wd, limit_wd, #1
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ csinv endloop, diff, xzr, ne /* Last Dword or differences. */
+ cbz endloop, .Lloop_aligned
+ /* End of performance-critical section -- one 64B cache line. */
+
+ /* Not reached the limit, must have found a diff. */
+ cbnz limit_wd, .Lnot_limit
+
+ /* Limit % 8 == 0 => all bytes significant. */
+ ands limit, limit, #7
+ b.eq .Lnot_limit
+
+ lsl limit, limit, #3 /* Bits -> bytes. */
+ mov mask, #~0
+ lsl mask, mask, limit
+ bic data1, data1, mask
+ bic data2, data2, mask
+
+.Lnot_limit:
+
+ // Swap the byte order of diff. Exact reverse is not important, as we only need to detect
+ // the half-word.
+ rev diff, diff
+ // The most significant bit of DIFF marks the least significant bit of change between DATA1/2
+ clz diff, diff
+ // Mask off 0xF to have shift amount. Why does ARM64 not have BIC with immediate?!?!
+ bfi diff, xzr, #0, #4
+ // Create a 16b mask
+ mov mask, #0xFFFF
+ // Shift to the right half-word.
+ lsr data1, data1, diff
+ lsr data2, data2, diff
+ // Mask the lowest half-word.
+ and data1, data1, mask
+ and data2, data2, mask
+ // Compute difference.
+ sub result, data1, data2
+ ret
+
+.Lmutual_align:
+ /* Sources are mutually aligned, but are not currently at an
+ alignment boundary. Round down the addresses and then mask off
+ the bytes that precede the start point. */
+ bic src1, src1, #7
+ bic src2, src2, #7
+ add limit, limit, tmp1 /* Adjust the limit for the extra. */
+ lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */
+ ldr data1, [src1], #8
+ neg tmp1, tmp1 /* Bits to alignment -64. */
+ ldr data2, [src2], #8
+ mov tmp2, #~0
+ /* Little-endian. Early bytes are at LSB. */
+ lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */
+ add limit_wd, limit, #7
+ orr data1, data1, tmp2
+ orr data2, data2, tmp2
+ lsr limit_wd, limit_wd, #3
+ b .Lstart_realigned
+
+.Lret0:
+ mov result, #0
+ ret
+
+ .p2align 6
+.Lmisaligned8:
+ sub limit, limit, #1
+1:
+ /* Perhaps we can do better than this. */
+ ldrh data1w, [src1], #2
+ ldrh data2w, [src2], #2
+ subs limit, limit, #2
+ ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
+ b.eq 1b
+ sub result, data1, data2
+ ret
+END __memcmp16
+
+#endif // ART_RUNTIME_ARCH_ARM64_MEMCMP16_ARM64_S_
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 2e60b93..e088751 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1632,6 +1632,8 @@
ldr x0, [sp], 16 // Restore integer result, and drop stack area.
.cfi_adjust_cfa_offset 16
+ // Need to restore x18.
+ ldr xSELF, [sp, #72]
POP_REF_ONLY_CALLEE_SAVE_FRAME
br x9 // Tail-call out.
@@ -1647,6 +1649,7 @@
mov x0, xSELF // Pass thread.
mov x1, sp // Pass SP.
bl artDeoptimize // artDeoptimize(Thread*, SP)
+ brk 0
END art_quick_deoptimize
@@ -1757,7 +1760,7 @@
* x1: comp object pointer
*
*/
- .extern memcmp16_generic_static
+ .extern __memcmp16
ENTRY art_quick_string_compareto
mov x2, x0 // x0 is return, use x2 for first input.
sub x0, x2, x1 // Same string object?
@@ -1850,16 +1853,17 @@
ret
.Ldo_memcmp16:
- str x0, [sp,#-16]! // Save x0
+ mov x14, x0 // Save x0 and LR. __memcmp16 does not use these temps.
+ mov x15, xLR // TODO: Codify and check that?
mov x0, x2
uxtw x2, w3
- bl memcmp16_generic_static
+ bl __memcmp16
- ldr x1, [sp], #16 // Restore old x0 = length diff
+ mov xLR, x15 // Restore LR.
- cmp x0, #0 // Check the memcmp difference
- csel x0, x0, x1, ne // x0 := x0 != 0 ? x0 : x1
+ cmp x0, #0 // Check the memcmp difference.
+ csel x0, x0, x14, ne // x0 := x0 != 0 ? x14(prev x0=length diff) : x1.
ret
END art_quick_string_compareto
@@ -1869,11 +1873,9 @@
.macro NATIVE_DOWNCALL name, entrypoint
.extern \entrypoint
ENTRY \name
- sub sp, sp, #16
- stp xSELF, xLR, [sp]
+ stp xSELF, xLR, [sp, #-16]!
bl \entrypoint
- ldp xSELF, xLR, [sp]
- add sp, sp, #16
+ ldp xSELF, xLR, [sp], #16
ret
END \name
.endm
@@ -1881,3 +1883,4 @@
NATIVE_DOWNCALL art_quick_fmod fmod
NATIVE_DOWNCALL art_quick_fmodf fmodf
NATIVE_DOWNCALL art_quick_memcpy memcpy
+NATIVE_DOWNCALL art_quick_assignable_from_code artIsAssignableFromCode
diff --git a/runtime/arch/memcmp16.h b/runtime/arch/memcmp16.h
index ad58588..1144c8c 100644
--- a/runtime/arch/memcmp16.h
+++ b/runtime/arch/memcmp16.h
@@ -30,7 +30,7 @@
//
// In both cases, MemCmp16 is declared.
-#if defined(__arm__) || defined(__mips)
+#if defined(__aarch64__) || defined(__arm__) || defined(__mips)
extern "C" uint32_t __memcmp16(const uint16_t* s0, const uint16_t* s1, size_t count);
#define MemCmp16 __memcmp16
diff --git a/runtime/arch/memcmp16_test.cc b/runtime/arch/memcmp16_test.cc
new file mode 100644
index 0000000..5747c67
--- /dev/null
+++ b/runtime/arch/memcmp16_test.cc
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+#include "memcmp16.h"
+
+class RandGen {
+ public:
+ explicit RandGen(uint32_t seed) : val_(seed) {}
+
+ uint32_t next() {
+ val_ = val_ * 48271 % 2147483647 + 13;
+ return val_;
+ }
+
+ uint32_t val_;
+};
+
+class MemCmp16Test : public testing::Test {
+};
+
+// A simple implementation to compare against.
+// Note: this version is equivalent to the generic one used when no optimized version is available.
+int32_t memcmp16_compare(const uint16_t* s0, const uint16_t* s1, size_t count) {
+ for (size_t i = 0; i < count; i++) {
+ if (s0[i] != s1[i]) {
+ return static_cast<int32_t>(s0[i]) - static_cast<int32_t>(s1[i]);
+ }
+ }
+ return 0;
+}
+
+static constexpr size_t kMemCmp16Rounds = 100000;
+
+static void CheckSeparate(size_t max_length, size_t min_length) {
+ RandGen r(0x1234);
+ size_t range_of_tests = 7; // All four (weighted) tests active in the beginning.
+
+ for (size_t round = 0; round < kMemCmp16Rounds; ++round) {
+ size_t type = r.next() % range_of_tests;
+ size_t count1, count2;
+ uint16_t *s1, *s2; // Use raw pointers to simplify using clobbered addresses
+
+ switch (type) {
+ case 0: // random, non-zero lengths of both strings
+ case 1:
+ case 2:
+ case 3:
+ count1 = (r.next() % max_length) + min_length;
+ count2 = (r.next() % max_length) + min_length;
+ break;
+
+ case 4: // random non-zero length of first, second is zero
+ count1 = (r.next() % max_length) + min_length;
+ count2 = 0U;
+ break;
+
+ case 5: // random non-zero length of second, first is zero
+ count1 = 0U;
+ count2 = (r.next() % max_length) + min_length;
+ break;
+
+ case 6: // both zero-length
+ count1 = 0U;
+ count2 = 0U;
+ range_of_tests = 6; // Don't do zero-zero again.
+ break;
+
+ default:
+ ASSERT_TRUE(false) << "Should not get here.";
+ continue;
+ }
+
+ if (count1 > 0U) {
+ s1 = new uint16_t[count1];
+ } else {
+ // Leave a random pointer, should not be touched.
+ s1 = reinterpret_cast<uint16_t*>(0xebad1001);
+ }
+
+ if (count2 > 0U) {
+ s2 = new uint16_t[count2];
+ } else {
+ // Leave a random pointer, should not be touched.
+ s2 = reinterpret_cast<uint16_t*>(0xebad2002);
+ }
+
+ size_t min = count1 < count2 ? count1 : count2;
+ bool fill_same = r.next() % 1 == 1;
+
+ if (fill_same) {
+ for (size_t i = 0; i < min; ++i) {
+ s1[i] = static_cast<uint16_t>(r.next() & 0xFFFF);
+ s2[i] = s1[i];
+ }
+ for (size_t i = min; i < count1; ++i) {
+ s1[i] = static_cast<uint16_t>(r.next() & 0xFFFF);
+ }
+ for (size_t i = min; i < count2; ++i) {
+ s2[i] = static_cast<uint16_t>(r.next() & 0xFFFF);
+ }
+ } else {
+ for (size_t i = 0; i < count1; ++i) {
+ s1[i] = static_cast<uint16_t>(r.next() & 0xFFFF);
+ }
+ for (size_t i = 0; i < count2; ++i) {
+ s2[i] = static_cast<uint16_t>(r.next() & 0xFFFF);
+ }
+ }
+
+ uint16_t* s1_pot_unaligned = s1;
+ uint16_t* s2_pot_unaligned = s2;
+ size_t c1_mod = count1;
+ size_t c2_mod = count2;
+
+ if (!fill_same) { // Don't waste a good "long" test.
+ if (count1 > 1 && r.next() % 10 == 0) {
+ c1_mod--;
+ s1_pot_unaligned++;
+ }
+ if (count2 > 1 && r.next() % 10 == 0) {
+ c2_mod--;
+ s2_pot_unaligned++;
+ }
+ }
+ size_t mod_min = c1_mod < c2_mod ? c1_mod : c2_mod;
+
+ int32_t expected = memcmp16_compare(s1_pot_unaligned, s2_pot_unaligned, mod_min);
+ int32_t computed = MemCmp16(s1_pot_unaligned, s2_pot_unaligned, mod_min);
+
+ ASSERT_EQ(expected, computed) << "Run " << round << ", c1=" << count1 << " c2=" << count2;
+
+ if (count1 > 0U) {
+ delete s1;
+ }
+ if (count2 > 0U) {
+ delete s2;
+ }
+ }
+}
+
+TEST_F(MemCmp16Test, RandomSeparateShort) {
+ CheckSeparate(5U, 1U);
+}
+
+TEST_F(MemCmp16Test, RandomSeparateLong) {
+ CheckSeparate(64U, 32U);
+}
+
+// TODO: What's a good test for overlapping memory. Is it important?
+// TEST_F(MemCmp16Test, RandomOverlay) {
+//
+// }
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 08caa80..70a9619 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -45,9 +45,6 @@
extern "C" void* art_quick_initialize_type_and_verify_access(uint32_t, void*);
extern "C" void* art_quick_resolve_string(void*, uint32_t);
-// Exception entrypoints.
-extern "C" void* GetAndClearException(Thread*);
-
// Field entrypoints.
extern "C" int art_quick_set32_instance(uint32_t, void*, int32_t);
extern "C" int art_quick_set32_static(uint32_t, int32_t);
@@ -117,7 +114,6 @@
extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
// Thread entrypoints.
-extern void CheckSuspendFromCode(Thread* thread);
extern "C" void art_quick_test_suspend();
// Throw entrypoints.
@@ -229,7 +225,6 @@
qpoints->pInvokeVirtualTrampolineWithAccessCheck = art_quick_invoke_virtual_trampoline_with_access_check;
// Thread
- qpoints->pCheckSuspend = CheckSuspendFromCode;
qpoints->pTestSuspend = art_quick_test_suspend;
// Throws
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index a31c08b..eb490eb 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -1222,8 +1222,12 @@
// Use array so we can index into it and use a matrix for expected results
// Setup: The first half is standard. The second half uses a non-zero offset.
// TODO: Shared backing arrays.
- static constexpr size_t kBaseStringCount = 7;
- const char* c[kBaseStringCount] = { "", "", "a", "aa", "ab", "aac", "aac" , };
+ static constexpr size_t kBaseStringCount = 8;
+ const char* c[kBaseStringCount] = { "", "", "a", "aa", "ab",
+ "aacaacaacaacaacaac", // This one's under the default limit to go to __memcmp16.
+ "aacaacaacaacaacaacaacaacaacaacaacaac", // This one's over.
+ "aacaacaacaacaacaacaacaacaacaacaacaaca" }; // As is this one. We need a separate one to
+ // defeat object-equal optimizations.
static constexpr size_t kStringCount = 2 * kBaseStringCount;
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index a85e250..b217cd6 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -93,7 +93,6 @@
extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
// Thread entrypoints.
-extern void CheckSuspendFromCode(Thread* thread);
extern "C" void art_quick_test_suspend();
// Throw entrypoints.
@@ -205,7 +204,6 @@
qpoints->pInvokeVirtualTrampolineWithAccessCheck = art_quick_invoke_virtual_trampoline_with_access_check;
// Thread
- qpoints->pCheckSuspend = CheckSuspendFromCode;
qpoints->pTestSuspend = art_quick_test_suspend;
// Throws
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index b6f51f7..609d1c6 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -94,7 +94,6 @@
extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
// Thread entrypoints.
-extern void CheckSuspendFromCode(Thread* thread);
extern "C" void art_quick_test_suspend();
// Throw entrypoints.
@@ -209,7 +208,6 @@
qpoints->pInvokeVirtualTrampolineWithAccessCheck = art_quick_invoke_virtual_trampoline_with_access_check;
// Thread
- qpoints->pCheckSuspend = CheckSuspendFromCode;
qpoints->pTestSuspend = art_quick_test_suspend;
// Throws
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 469d373..032f6be 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -129,7 +129,6 @@
void (*pInvokeVirtualTrampolineWithAccessCheck)(uint32_t, void*);
// Thread
- void (*pCheckSuspend)(Thread*); // Stub that is called when the suspend count is non-zero
void (*pTestSuspend)(); // Stub that is periodically called to test the suspend count
// Throws
diff --git a/runtime/entrypoints/quick/quick_thread_entrypoints.cc b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
index f61c754..5c48fc7 100644
--- a/runtime/entrypoints/quick/quick_thread_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
@@ -21,13 +21,6 @@
namespace art {
-void CheckSuspendFromCode(Thread* thread)
- SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
- // Called when thread->suspend_count_ != 0 on JNI return. JNI method acts as callee-save frame.
- thread->VerifyStack();
- CheckSuspend(thread);
-}
-
extern "C" void artTestSuspendFromCode(Thread* thread, StackReference<mirror::ArtMethod>* sp)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
// Called when suspend count check value is 0 and thread->suspend_count_ != 0
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 0dd33cf..c572baf 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -251,8 +251,7 @@
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInvokeSuperTrampolineWithAccessCheck,
pInvokeVirtualTrampolineWithAccessCheck, kPointerSize);
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInvokeVirtualTrampolineWithAccessCheck,
- pCheckSuspend, kPointerSize);
- EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckSuspend, pTestSuspend, kPointerSize);
+ pTestSuspend, kPointerSize);
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pTestSuspend, pDeliverException, kPointerSize);
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pDeliverException, pThrowArrayBounds, kPointerSize);
diff --git a/runtime/oat.cc b/runtime/oat.cc
index f4721f2..857c0a2 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
namespace art {
const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '3', '5', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '3', '6', '\0' };
OatHeader::OatHeader() {
memset(this, 0, sizeof(*this));
diff --git a/runtime/thread.cc b/runtime/thread.cc
index ca8c2d7..d3487d0 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1881,7 +1881,6 @@
QUICK_ENTRY_POINT_INFO(pInvokeStaticTrampolineWithAccessCheck)
QUICK_ENTRY_POINT_INFO(pInvokeSuperTrampolineWithAccessCheck)
QUICK_ENTRY_POINT_INFO(pInvokeVirtualTrampolineWithAccessCheck)
- QUICK_ENTRY_POINT_INFO(pCheckSuspend)
QUICK_ENTRY_POINT_INFO(pTestSuspend)
QUICK_ENTRY_POINT_INFO(pDeliverException)
QUICK_ENTRY_POINT_INFO(pThrowArrayBounds)