Implement art_quick_aput_object stubs for X86-64 and ARM64
Implement the aput_object stubs for 64b architectures and enable
their testing in stub_test.
Fix missing @PLT for x86.
Add automatic _local labels in function definitions in x86-64 so we
can make local jumps (instead of PLT hoops).
Change-Id: I614b88fd5966acd8a564b87c47d4c50ee605320c
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 2083051..85a2c9e 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -970,7 +970,21 @@
br xLR
END art_quick_do_long_jump
-UNIMPLEMENTED art_quick_handle_fill_data
+ /*
+ * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
+ * failure.
+ */
+ .extern artHandleFillArrayDataFromCode
+// TODO: xSELF -> x19.
+ENTRY art_quick_handle_fill_data
+ SETUP_REF_ONLY_CALLEE_SAVE_FRAME // Save callee saves in case exception allocation triggers GC.
+ mov x2, xSELF // Pass Thread::Current.
+ mov x3, sp // Pass SP.
+ bl artHandleFillArrayDataFromCode // (Array*, const DexFile::Payload*, Thread*, SP)
+ RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+ RETURN_IF_RESULT_IS_ZERO
+ DELIVER_PENDING_EXCEPTION
+END art_quick_handle_fill_data
UNIMPLEMENTED art_quick_lock_object
UNIMPLEMENTED art_quick_unlock_object
@@ -1026,9 +1040,116 @@
brk 0 // We should not return here...
END art_quick_check_cast
-UNIMPLEMENTED art_quick_aput_obj_with_null_and_bound_check
-UNIMPLEMENTED art_quick_aput_obj_with_bound_check
-UNIMPLEMENTED art_quick_aput_obj
+ /*
+ * Entry from managed code for array put operations of objects where the value being stored
+ * needs to be checked for compatibility.
+ * x0 = array, x1 = index, x2 = value
+ *
+ * Currently all values should fit into w0/w1/w2, and w1 always will as indices are 32b. We
+ * assume, though, that the upper 32b are zeroed out. At least for x1/w1 we can do better by
+ * using index-zero-extension in load/stores.
+ *
+ * Temporaries: x3, x4
+ * TODO: x4 OK? ip seems wrong here.
+ */
+ENTRY art_quick_aput_obj_with_null_and_bound_check
+ tst x0, x0
+ bne art_quick_aput_obj_with_bound_check
+ b art_quick_throw_null_pointer_exception
+END art_quick_aput_obj_with_null_and_bound_check
+
+ENTRY art_quick_aput_obj_with_bound_check
+ ldr w3, [x0, #ARRAY_LENGTH_OFFSET]
+ cmp w3, w1
+ bhi art_quick_aput_obj
+ mov x0, x1
+ mov x1, x3
+ b art_quick_throw_array_bounds
+END art_quick_aput_obj_with_bound_check
+
+ENTRY art_quick_aput_obj
+ cbz x2, .Ldo_aput_null
+ ldr w3, [x0, #CLASS_OFFSET] // Heap reference = 32b
+ // This also zero-extends to x3
+ ldr w4, [x2, #CLASS_OFFSET] // Heap reference = 32b
+ // This also zero-extends to x4
+ ldr w3, [x3, #CLASS_COMPONENT_TYPE_OFFSET] // Heap reference = 32b
+ // This also zero-extends to x3
+ cmp w3, w4 // value's type == array's component type - trivial assignability
+ bne .Lcheck_assignability
+.Ldo_aput:
+ add x3, x0, #OBJECT_ARRAY_DATA_OFFSET
+ // "Compress" = do nothing
+ str w2, [x3, x1, lsl #2] // Heap reference = 32b
+ ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
+ lsr x0, x0, #7
+ strb w3, [x3, x0]
+ ret
+.Ldo_aput_null:
+ add x3, x0, #OBJECT_ARRAY_DATA_OFFSET
+ // "Compress" = do nothing
+ str w2, [x3, x1, lsl #2] // Heap reference = 32b
+ ret
+.Lcheck_assignability:
+ // Store arguments and link register
+ sub sp, sp, #48 // Stack needs to be 16b aligned on calls
+ .cfi_adjust_cfa_offset 48
+ stp x0, x1, [sp]
+ .cfi_rel_offset x0, 0
+ .cfi_rel_offset x1, 8
+ stp x2, xSELF, [sp, #16]
+ .cfi_rel_offset x2, 16
+ .cfi_rel_offset x18, 24
+ str xLR, [sp, #32]
+ .cfi_rel_offset x30, 32
+
+ // Call runtime code
+ mov x0, x3 // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
+ mov x1, x4 // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
+ bl artIsAssignableFromCode
+
+ // Check for exception
+ cbz x0, .Lthrow_array_store_exception
+
+ // Restore
+ ldp x0, x1, [sp]
+ .cfi_restore x0
+ .cfi_restore x1
+ ldp x2, xSELF, [sp, #16]
+ .cfi_restore x2
+ .cfi_restore x18
+ ldr xLR, [sp, #32]
+ .cfi_restore x30
+ add sp, sp, #48
+ .cfi_adjust_cfa_offset -48
+
+ add x3, x0, #OBJECT_ARRAY_DATA_OFFSET
+ // "Compress" = do nothing
+ str w2, [x3, x1, lsl #2] // Heap reference = 32b
+ ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
+ lsr x0, x0, #7
+ strb w3, [x3, x0]
+ ret
+.Lthrow_array_store_exception:
+ ldp x0, x1, [sp]
+ .cfi_restore x0
+ .cfi_restore x1
+ ldp x2, xSELF, [sp, #16]
+ .cfi_restore x2
+ .cfi_restore x18
+ ldr xLR, [sp, #32]
+ .cfi_restore x30
+ add sp, sp, #48
+ .cfi_adjust_cfa_offset -48
+
+ SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
+ mov x1, x2 // Pass value.
+ mov x2, xSELF // Pass Thread::Current.
+ mov x3, sp // Pass SP.
+ b artThrowArrayStoreException // (Object*, Object*, Thread*, SP).
+ brk 0 // Unreached.
+END art_quick_aput_obj
+
UNIMPLEMENTED art_quick_initialize_static_storage
UNIMPLEMENTED art_quick_initialize_type
UNIMPLEMENTED art_quick_initialize_type_and_verify_access
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 20dc53b..7027b32 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -122,13 +122,13 @@
"pushq $0\n\t" // 16B alignment padding
".cfi_adjust_cfa_offset 16\n\t"
"call *%%rax\n\t" // Call the stub
- "addq $16, %%rsp" // Pop nullptr and padding
- // ".cfi_adjust_cfa_offset -16\n\t"
+ "addq $16, %%rsp\n\t" // Pop nullptr and padding
+ ".cfi_adjust_cfa_offset -16\n\t"
: "=a" (result)
// Use the result from rax
: "D"(arg0), "S"(arg1), "d"(arg2), "a"(code)
// This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
- : "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); // clobber all
+ : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); // clobber all
// TODO: Should we clobber the other registers?
#else
LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
@@ -273,7 +273,7 @@
}
-#if defined(__i386__) || defined(__arm__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
extern "C" void art_quick_aput_obj_with_null_and_bound_check(void);
// Do not check non-checked ones, we'd need handlers and stuff...
#endif
@@ -281,7 +281,7 @@
TEST_F(StubTest, APutObj) {
TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
-#if defined(__i386__) || defined(__arm__)
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
Thread* self = Thread::Current();
// Create an object
ScopedObjectAccess soa(self);
@@ -296,7 +296,7 @@
// Build a string array of size 1
SirtRef<mirror::ObjectArray<mirror::Object> > array(soa.Self(),
- mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), ca.get(), 1));
+ mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), ca.get(), 10));
// Build a string -> should be assignable
SirtRef<mirror::Object> str_obj(soa.Self(),
@@ -308,7 +308,7 @@
// Play with it...
// 1) Success cases
- // 1.1) Assign str_obj to array[0]
+ // 1.1) Assign str_obj to array[0..3]
EXPECT_FALSE(self->IsExceptionPending());
@@ -316,13 +316,51 @@
reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
EXPECT_FALSE(self->IsExceptionPending());
+ EXPECT_EQ(str_obj.get(), array->Get(0));
- // 1.2) Assign null to array[0]
+ Invoke3(reinterpret_cast<size_t>(array.get()), 1U, reinterpret_cast<size_t>(str_obj.get()),
+ reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
+
+ EXPECT_FALSE(self->IsExceptionPending());
+ EXPECT_EQ(str_obj.get(), array->Get(1));
+
+ Invoke3(reinterpret_cast<size_t>(array.get()), 2U, reinterpret_cast<size_t>(str_obj.get()),
+ reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
+
+ EXPECT_FALSE(self->IsExceptionPending());
+ EXPECT_EQ(str_obj.get(), array->Get(2));
+
+ Invoke3(reinterpret_cast<size_t>(array.get()), 3U, reinterpret_cast<size_t>(str_obj.get()),
+ reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
+
+ EXPECT_FALSE(self->IsExceptionPending());
+ EXPECT_EQ(str_obj.get(), array->Get(3));
+
+ // 1.2) Assign null to array[0..3]
Invoke3(reinterpret_cast<size_t>(array.get()), 0U, reinterpret_cast<size_t>(nullptr),
reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
EXPECT_FALSE(self->IsExceptionPending());
+ EXPECT_EQ(nullptr, array->Get(0));
+
+ Invoke3(reinterpret_cast<size_t>(array.get()), 1U, reinterpret_cast<size_t>(nullptr),
+ reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
+
+ EXPECT_FALSE(self->IsExceptionPending());
+ EXPECT_EQ(nullptr, array->Get(1));
+
+ Invoke3(reinterpret_cast<size_t>(array.get()), 2U, reinterpret_cast<size_t>(nullptr),
+ reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
+
+ EXPECT_FALSE(self->IsExceptionPending());
+ EXPECT_EQ(nullptr, array->Get(2));
+
+ Invoke3(reinterpret_cast<size_t>(array.get()), 3U, reinterpret_cast<size_t>(nullptr),
+ reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
+
+ EXPECT_FALSE(self->IsExceptionPending());
+ EXPECT_EQ(nullptr, array->Get(3));
// TODO: Check _which_ exception is thrown. Then make 3) check that it's the right check order.
@@ -347,7 +385,7 @@
// 2.3) Index > 0
- Invoke3(reinterpret_cast<size_t>(array.get()), 1U, reinterpret_cast<size_t>(str_obj.get()),
+ Invoke3(reinterpret_cast<size_t>(array.get()), 10U, reinterpret_cast<size_t>(str_obj.get()),
reinterpret_cast<uintptr_t>(&art_quick_aput_obj_with_null_and_bound_check), self);
EXPECT_TRUE(self->IsExceptionPending());
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index 642d9a3..72e6db4 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -86,7 +86,7 @@
// Symbols.
#if !defined(__APPLE__)
#define SYMBOL(name) name
- #define PLT_SYMBOL(name) name
+ #define PLT_SYMBOL(name) name ## @PLT
#else
// Mac OS' symbols have an _ prefix.
#define SYMBOL(name) _ ## name
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index ad65033..34c8b82 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -103,6 +103,8 @@
.globl VAR(c_name, 0)
ALIGN_FUNCTION_ENTRY
VAR(c_name, 0):
+ // Have a local entrypoint that's not globl
+VAR(c_name, 0)_local:
CFI_STARTPROC
// Ensure we get a sane starting CFA.
CFI_DEF_CFA(rsp, 8)
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index bc9907b..4fefd20 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -174,7 +174,6 @@
MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
DEFINE_FUNCTION VAR(c_name, 0)
- UNTESTED
SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // save all registers as basis for long jump context
// Outgoing argument set up
movq %rsp, %rsi // pass SP
@@ -197,7 +196,6 @@
MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
DEFINE_FUNCTION VAR(c_name, 0)
- UNTESTED
SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // save all registers as basis for long jump context
// Outgoing argument set up
movq %rsp, %rcx // pass SP
@@ -696,14 +694,112 @@
int3 // unreached
END_FUNCTION art_quick_check_cast
+
/*
* Entry from managed code for array put operations of objects where the value being stored
* needs to be checked for compatibility.
- * eax = array, ecx = index, edx = value
+ *
+ * Currently all the parameters should fit into the 32b portions of the registers. Index always
+ * will. So we optimize for a tighter encoding. The 64b versions are in comments.
+ *
+ * rdi(edi) = array, rsi(esi) = index, rdx(edx) = value
*/
-UNIMPLEMENTED art_quick_aput_obj_with_null_and_bound_check
-UNIMPLEMENTED art_quick_aput_obj_with_bound_check
-UNIMPLEMENTED art_quick_aput_obj
+DEFINE_FUNCTION art_quick_aput_obj_with_null_and_bound_check
+ testl %edi, %edi
+// testq %rdi, %rdi
+ jnz art_quick_aput_obj_with_bound_check_local
+ jmp art_quick_throw_null_pointer_exception_local
+END_FUNCTION art_quick_aput_obj_with_null_and_bound_check
+
+
+DEFINE_FUNCTION art_quick_aput_obj_with_bound_check
+ movl ARRAY_LENGTH_OFFSET(%edi), %ebx
+// movl ARRAY_LENGTH_OFFSET(%rdi), %ebx // This zero-extends, so value(%rbx)=value(%ebx)
+ cmpl %ebx, %esi
+ jb art_quick_aput_obj_local
+ mov %esi, %edi
+// mov %rsi, %rdi
+ mov %ebx, %esi
+// mov %rbx, %rsi
+ jmp art_quick_throw_array_bounds_local
+END_FUNCTION art_quick_aput_obj_with_bound_check
+
+
+DEFINE_FUNCTION art_quick_aput_obj
+ testl %edx, %edx // store of null
+// test %rdx, %rdx
+ jz .Ldo_aput_null
+ movl CLASS_OFFSET(%edi), %ebx
+// movq CLASS_OFFSET(%rdi), %rbx
+ movl CLASS_COMPONENT_TYPE_OFFSET(%ebx), %ebx
+// movq CLASS_COMPONENT_TYPE_OFFSET(%rbx), %rbx
+ cmpl CLASS_OFFSET(%edx), %ebx // value's type == array's component type - trivial assignability
+// cmpq CLASS_OFFSET(%rdx), %rbx
+ jne .Lcheck_assignability
+.Ldo_aput:
+ movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
+// movq %rdx, OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
+ movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
+ shrl LITERAL(7), %edi
+// shrl LITERAL(7), %rdi
+ movb %dl, (%rdx, %rdi) // Note: this assumes that top 32b of %rdi are zero
+ ret
+.Ldo_aput_null:
+ movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
+// movq %rdx, OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
+ ret
+.Lcheck_assignability:
+ // Save arguments.
+ PUSH rdi
+ PUSH rsi
+ PUSH rdx
+ subq LITERAL(8), %rsp // Alignment padding.
+ CFI_ADJUST_CFA_OFFSET(8)
+
+ // "Uncompress" = do nothing, as already zero-extended on load.
+ movl CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class.
+ movq %rbx, %rdi // Pass arg1 = array's component type.
+
+ call PLT_SYMBOL(artIsAssignableFromCode) // (Class* a, Class* b)
+
+ // Exception?
+ testq %rax, %rax
+ jz .Lthrow_array_store_exception
+
+ // Restore arguments.
+ addq LITERAL(8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-8)
+ POP rdx
+ POP rsi
+ POP rdi
+
+ movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
+// movq %rdx, OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
+ movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
+ shrl LITERAL(7), %edi
+// shrl LITERAL(7), %rdi
+ movb %dl, (%rdx, %rdi) // Note: this assumes that top 32b of %rdi are zero
+// movb %dl, (%rdx, %rdi)
+ ret
+.Lthrow_array_store_exception:
+ // Restore arguments.
+ addq LITERAL(8), %rsp
+ CFI_ADJUST_CFA_OFFSET(-8)
+ POP rdx
+ POP rsi
+ POP rdi
+
+ SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // Save all registers as basis for long jump context.
+
+ // Outgoing argument set up.
+ movq %rsp, %rcx // Pass arg 4 = SP.
+ movq %rdx, %rsi // Pass arg 2 = value.
+ movq %gs:THREAD_SELF_OFFSET, %rdx // Pass arg 3 = Thread::Current().
+ // Pass arg 1 = array.
+
+ call PLT_SYMBOL(artThrowArrayStoreException) // (array, value, Thread*, SP)
+ int3 // unreached
+END_FUNCTION art_quick_aput_obj
// TODO: This is quite silly on X86_64 now.
DEFINE_FUNCTION art_quick_memcpy