diff options
author | 2017-01-23 22:50:24 +0000 | |
---|---|---|
committer | 2017-01-26 09:56:36 +0000 | |
commit | b048cb74b742b03eb6dd5f1d6dd49e559f730b36 (patch) | |
tree | b1f663cbb343488a548cce4db352dbc4af720a89 | |
parent | f34077c96af3389e8eae65252d4c5d51cf630039 (diff) |
Add per array size allocation entrypoints.
- Update architectures that have fast paths for
array allocation to use it.
- Will add more fast paths in follow-up CLs.
Test: test-art-target test-art-host.
Change-Id: I138cccd16464a85de22a8ed31c915f876e78fb04
-rw-r--r-- | compiler/oat_test.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.cc | 18 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 4 | ||||
-rw-r--r-- | compiler/utils/assembler_thumb_test_expected.cc.inc | 2 | ||||
-rw-r--r-- | runtime/arch/arm64/quick_entrypoints_arm64.S | 89 | ||||
-rw-r--r-- | runtime/arch/quick_alloc_entrypoints.S | 65 | ||||
-rw-r--r-- | runtime/arch/x86_64/quick_entrypoints_x86_64.S | 137 | ||||
-rw-r--r-- | runtime/asm_support.h | 2 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_alloc_entrypoints.cc | 23 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_entrypoints_list.h | 4 | ||||
-rw-r--r-- | runtime/entrypoints_order_test.cc | 10 | ||||
-rw-r--r-- | runtime/oat.h | 2 | ||||
-rw-r--r-- | runtime/thread.cc | 4 |
15 files changed, 281 insertions, 87 deletions
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 34b33a13a3..d5842a8c9d 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -487,7 +487,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(72U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(20U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(157 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), + EXPECT_EQ(161 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), sizeof(QuickEntryPoints)); } diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 99427f05da..d68aa51b1b 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -1417,4 +1417,22 @@ void CodeGenerator::EmitJitRoots(uint8_t* code, EmitJitRootPatches(code, roots_data); } +QuickEntrypointEnum CodeGenerator::GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass) { + ScopedObjectAccess soa(Thread::Current()); + if (array_klass.Get() == nullptr) { + // This can only happen for non-primitive arrays, as primitive arrays can always + // be resolved. + return kQuickAllocArrayResolved32; + } + + switch (array_klass->GetComponentSize()) { + case 1: return kQuickAllocArrayResolved8; + case 2: return kQuickAllocArrayResolved16; + case 4: return kQuickAllocArrayResolved32; + case 8: return kQuickAllocArrayResolved64; + } + LOG(FATAL) << "Unreachable"; + return kQuickAllocArrayResolved; +} + } // namespace art diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 2d129aff22..b912672792 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -573,6 +573,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { uint32_t GetReferenceSlowFlagOffset() const; uint32_t GetReferenceDisableFlagOffset() const; + static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass); + protected: // Patch info used for recording locations of required linker patches and their targets, // i.e. target method, string, type or code identified by their dex file and index. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 9762ee81b1..1e89ba590c 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -4762,7 +4762,9 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. - codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc()); + QuickEntrypointEnum entrypoint = + CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); + codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index c4caf4bf9d..abd8246325 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -4096,7 +4096,9 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. - codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc()); + QuickEntrypointEnum entrypoint = + CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); + codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); DCHECK(!codegen_->IsLeafMethod()); } diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index f132e2737d..071cd575e1 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -5610,7 +5610,7 @@ const char* const VixlJniHelpersResults[] = { " 214: ecbd 8a10 vpop {s16-s31}\n", " 218: e8bd 8de0 ldmia.w sp!, {r5, r6, r7, r8, sl, fp, pc}\n", " 21c: 4660 mov r0, ip\n", - " 21e: f8d9 c2a4 ldr.w ip, [r9, #676] ; 0x2a4\n", + " 21e: f8d9 c2b4 ldr.w ip, [r9, #692] ; 0x2b4\n", " 222: 47e0 blx ip\n", nullptr }; diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 00518e16be..3bd6fe275b 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1646,6 +1646,10 @@ GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) @@ -1731,29 +1735,7 @@ ENTRY art_quick_alloc_object_resolved_rosalloc RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER END art_quick_alloc_object_resolved_rosalloc - -// The common fast path code for art_quick_alloc_array_region_tlab. -.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2 - // Array classes are never finalizable or uninitialized, no need to check. - ldr \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type - UNPOISON_HEAP_REF \wTemp0 - ldr \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET] - lsr \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16 - // bits. - // xCount is holding a 32 bit value, - // it can not overflow. - lsl \xTemp1, \xCount, \xTemp0 // Calculate data size - // Add array data offset and alignment. - add \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) -#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4 -#error Long array data offset must be 4 greater than int array data offset. -#endif - - add \xTemp0, \xTemp0, #1 // Add 4 to the length only if the - // component size shift is 3 - // (for 64 bit alignment). - and \xTemp0, \xTemp0, #4 - add \xTemp1, \xTemp1, \xTemp0 +.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2 and \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignemnt mask // (addr + 7) & ~7. The mask must // be 64 bits to keep high bits in @@ -1868,8 +1850,7 @@ GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB art_quick_alloc_object_initialized_re // TODO: We could use this macro for the normal tlab allocator too. -// The common code for art_quick_alloc_array_*region_tlab -.macro GENERATE_ALLOC_ARRAY_REGION_TLAB name, entrypoint, fast_path +.macro GENERATE_ALLOC_ARRAY_REGION_TLAB name, entrypoint, size_setup ENTRY \name // Fast path array allocation for region tlab allocation. // x0: mirror::Class* type @@ -1880,7 +1861,8 @@ ENTRY \name ret // Return -1. #endif mov x3, x0 - \fast_path .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6 + \size_setup x3, w3, x1, w1, x4, w4, x5, w5, x6, w6 + ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6 .Lslow_path\name: // x0: mirror::Class* klass // x1: int32_t component_count @@ -1893,7 +1875,60 @@ ENTRY \name END \name .endm -GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED +.macro COMPUTE_ARRAY_SIZE_UNKNOWN xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2 + // Array classes are never finalizable or uninitialized, no need to check. + ldr \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type + UNPOISON_HEAP_REF \wTemp0 + ldr \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET] + lsr \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16 + // bits. + // xCount is holding a 32 bit value, + // it can not overflow. + lsl \xTemp1, \xCount, \xTemp0 // Calculate data size + // Add array data offset and alignment. + add \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) +#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4 +#error Long array data offset must be 4 greater than int array data offset. +#endif + + add \xTemp0, \xTemp0, #1 // Add 4 to the length only if the + // component size shift is 3 + // (for 64 bit alignment). + and \xTemp0, \xTemp0, #4 + add \xTemp1, \xTemp1, \xTemp0 +.endm + +.macro COMPUTE_ARRAY_SIZE_8 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2 + // Add array data offset and alignment. + add \xTemp1, \xCount, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) +.endm + +.macro COMPUTE_ARRAY_SIZE_16 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2 + lsl \xTemp1, \xCount, #1 + // Add array data offset and alignment. + add \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) +.endm + +.macro COMPUTE_ARRAY_SIZE_32 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2 + lsl \xTemp1, \xCount, #2 + // Add array data offset and alignment. + add \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) +.endm + +.macro COMPUTE_ARRAY_SIZE_64 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2 + lsl \xTemp1, \xCount, #3 + // Add array data offset and alignment. + // Add 4 to the size for 64 bit alignment. + add \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK + 4) +.endm + +# TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm64, remove +# the entrypoint once all backends have been updated to use the size variants. +GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN +GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8 +GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16 +GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32 +GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64 /* * Called by managed code when the thread has been asked to suspend. diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S index e79dc6029b..9204d85279 100644 --- a/runtime/arch/quick_alloc_entrypoints.S +++ b/runtime/arch/quick_alloc_entrypoints.S @@ -30,6 +30,11 @@ FOUR_ARG_DOWNCALL art_quick_alloc_string_from_bytes\c_suffix, artAllocStringFrom THREE_ARG_DOWNCALL art_quick_alloc_string_from_chars\c_suffix, artAllocStringFromCharsFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // Called by managed code to allocate a string from string ONE_ARG_DOWNCALL art_quick_alloc_string_from_string\c_suffix, artAllocStringFromStringFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + +TWO_ARG_DOWNCALL art_quick_alloc_array_resolved8\c_suffix, artAllocArrayFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +TWO_ARG_DOWNCALL art_quick_alloc_array_resolved16\c_suffix, artAllocArrayFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +TWO_ARG_DOWNCALL art_quick_alloc_array_resolved32\c_suffix, artAllocArrayFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +TWO_ARG_DOWNCALL art_quick_alloc_array_resolved64\c_suffix, artAllocArrayFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER .endm .macro GENERATE_ALL_ALLOC_ENTRYPOINTS @@ -56,14 +61,22 @@ GENERATE_ALLOC_ENTRYPOINTS _region_tlab_instrumented, RegionTLABInstrumented ONE_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \ ONE_ARG_DOWNCALL art_quick_alloc_object_with_checks ## c_suffix, artAllocObjectFromCodeWithChecks ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \ - TWO_ARG_DOWNCALL art_quick_alloc_array_resolved ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(c_suffix, cxx_suffix) \ FOUR_ARG_DOWNCALL art_quick_alloc_string_from_bytes ## c_suffix, artAllocStringFromBytesFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(c_suffix, cxx_suffix) \ THREE_ARG_DOWNCALL art_quick_alloc_string_from_chars ## c_suffix, artAllocStringFromCharsFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(c_suffix, cxx_suffix) \ ONE_ARG_DOWNCALL art_quick_alloc_string_from_string ## c_suffix, artAllocStringFromStringFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \ + TWO_ARG_DOWNCALL art_quick_alloc_array_resolved ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(c_suffix, cxx_suffix) \ + TWO_ARG_DOWNCALL art_quick_alloc_array_resolved8 ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(c_suffix, cxx_suffix) \ + TWO_ARG_DOWNCALL art_quick_alloc_array_resolved16 ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(c_suffix, cxx_suffix) \ + TWO_ARG_DOWNCALL art_quick_alloc_array_resolved32 ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(c_suffix, cxx_suffix) \ + TWO_ARG_DOWNCALL art_quick_alloc_array_resolved64 ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS @@ -76,6 +89,10 @@ GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_TLAB_ALLOCATOR GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) @@ -87,6 +104,10 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB) @@ -102,6 +123,10 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc, DlMalloc) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_dlmalloc, DlMalloc) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_dlmalloc, DlMalloc) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_dlmalloc, DlMalloc) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc, DlMalloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc, DlMalloc) @@ -110,6 +135,10 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallo GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc_instrumented, DlMallocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_dlmalloc_instrumented, DlMallocInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_dlmalloc_instrumented, DlMallocInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_dlmalloc_instrumented, DlMallocInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_dlmalloc_instrumented, DlMallocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc_instrumented, DlMallocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMallocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented) @@ -119,6 +148,10 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMa GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc, RosAlloc) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_rosalloc, RosAlloc) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_rosalloc, RosAlloc) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_rosalloc, RosAlloc) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc, RosAlloc) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc, RosAlloc) @@ -127,6 +160,10 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllo GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc_instrumented, RosAllocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_rosalloc_instrumented, RosAllocInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_rosalloc_instrumented, RosAllocInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_rosalloc_instrumented, RosAllocInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_rosalloc_instrumented, RosAllocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc_instrumented, RosAllocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc_instrumented, RosAllocInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc_instrumented, RosAllocInstrumented) @@ -135,6 +172,10 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer, BumpPointer) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_bump_pointer, BumpPointer) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_bump_pointer, BumpPointer) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_bump_pointer, BumpPointer) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer, BumpPointer) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer, BumpPointer) @@ -143,6 +184,10 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, Bum GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer_instrumented, BumpPointerInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_bump_pointer_instrumented, BumpPointerInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_bump_pointer_instrumented, BumpPointerInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_bump_pointer_instrumented, BumpPointerInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_bump_pointer_instrumented, BumpPointerInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer_instrumented, BumpPointerInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented) @@ -151,6 +196,10 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrum GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab_instrumented, TLABInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab_instrumented, TLABInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab_instrumented, TLABInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab_instrumented, TLABInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab_instrumented, TLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab_instrumented, TLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab_instrumented, TLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab_instrumented, TLABInstrumented) @@ -159,6 +208,10 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region, Region) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region, Region) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region, Region) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region, Region) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region, Region) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region, Region) @@ -167,6 +220,10 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_instrumented, RegionIns GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_instrumented, RegionInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_instrumented, RegionInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_instrumented, RegionInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_instrumented, RegionInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_instrumented, RegionInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_instrumented, RegionInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_instrumented, RegionInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented) @@ -175,6 +232,10 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, Regi GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab_instrumented, RegionTLABInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab_instrumented, RegionTLABInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab_instrumented, RegionTLABInstrumented) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab_instrumented, RegionTLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab_instrumented, RegionTLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab_instrumented, RegionTLABInstrumented) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab_instrumented, RegionTLABInstrumented) diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 46bee399f4..f3d4256ea3 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -987,6 +987,10 @@ GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) @@ -995,6 +999,10 @@ GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB) @@ -1112,26 +1120,11 @@ MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel) END_MACRO // The fast path code for art_quick_alloc_array_region_tlab. -// Inputs: RDI: the class, RSI: int32_t component_count -// Free temps: RCX, RDX, R8, R9 +// Inputs: RDI: the class, RSI: int32_t component_count, R9: total_size +// Free temps: RCX, RDX, R8 // Output: RAX: return value. -MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED, slowPathLabel) - movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rdi), %ecx // Load component type. - UNPOISON_HEAP_REF ecx - movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type. - shrq LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx // Get component size shift. - movq %rsi, %r9 - salq %cl, %r9 // Calculate array count shifted. - // Add array header + alignment rounding. - addq LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 - // Add 4 extra bytes if we are doing a long array. - addq LITERAL(1), %rcx - andq LITERAL(4), %rcx - addq %rcx, %r9 +MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE, slowPathLabel) movq %gs:THREAD_SELF_OFFSET, %rcx // rcx = thread -#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4 -#error Long array data offset must be 4 greater than int array data offset. -#endif // Mask out the unaligned part to make sure we are 8 byte aligned. andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %r9 movq THREAD_LOCAL_POS_OFFSET(%rcx), %rax @@ -1149,7 +1142,6 @@ MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED, slowPathLabel) ret // Fast path succeeded. END_MACRO - // The common slow path code for art_quick_alloc_object_{resolved, initialized}_tlab // and art_quick_alloc_object_{resolved, initialized}_region_tlab. MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name) @@ -1161,16 +1153,6 @@ MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name) RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception END_MACRO -// The slow path code for art_quick_alloc_array_region_tlab. -MACRO1(ALLOC_ARRAY_TLAB_SLOW_PATH, cxx_name) - SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC - // Outgoing argument set up - movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() - call CALLVAR(cxx_name) // cxx_name(arg0, arg1, Thread*) - RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address - RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception -END_MACRO - // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be // called with CC if the GC is not active. DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab @@ -1191,25 +1173,92 @@ DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB END_FUNCTION art_quick_alloc_object_initialized_tlab -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB). -DEFINE_FUNCTION art_quick_alloc_array_resolved_tlab +MACRO0(COMPUTE_ARRAY_SIZE_UNKNOWN) + movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rdi), %ecx // Load component type. + UNPOISON_HEAP_REF ecx + movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type. + shrq LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx // Get component size shift. + movq %rsi, %r9 + salq %cl, %r9 // Calculate array count shifted. + // Add array header + alignment rounding. + addq LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 + // Add 4 extra bytes if we are doing a long array. + addq LITERAL(1), %rcx + andq LITERAL(4), %rcx +#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4 +#error Long array data offset must be 4 greater than int array data offset. +#endif + addq %rcx, %r9 +END_MACRO + +MACRO0(COMPUTE_ARRAY_SIZE_8) // RDI: mirror::Class* klass, RSI: int32_t component_count // RDX, RCX, R8, R9: free. RAX: return val. - ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_resolved_tlab_slow_path -.Lart_quick_alloc_array_resolved_tlab_slow_path: - ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedTLAB -END_FUNCTION art_quick_alloc_array_resolved_tlab + movq %rsi, %r9 + // Add array header + alignment rounding. + addq LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 +END_MACRO -// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB). -DEFINE_FUNCTION art_quick_alloc_array_resolved_region_tlab - // Fast path region tlab allocation. +MACRO0(COMPUTE_ARRAY_SIZE_16) // RDI: mirror::Class* klass, RSI: int32_t component_count - // RCX, RDX, R8, R9: free. RAX: return val. - ASSERT_USE_READ_BARRIER - ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_resolved_region_tlab_slow_path -.Lart_quick_alloc_array_resolved_region_tlab_slow_path: - ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedRegionTLAB -END_FUNCTION art_quick_alloc_array_resolved_region_tlab + // RDX, RCX, R8, R9: free. RAX: return val. + movq %rsi, %r9 + salq LITERAL(1), %r9 + // Add array header + alignment rounding. + addq LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 +END_MACRO + +MACRO0(COMPUTE_ARRAY_SIZE_32) + // RDI: mirror::Class* klass, RSI: int32_t component_count + // RDX, RCX, R8, R9: free. RAX: return val. + movq %rsi, %r9 + salq LITERAL(2), %r9 + // Add array header + alignment rounding. + addq LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 +END_MACRO + +MACRO0(COMPUTE_ARRAY_SIZE_64) + // RDI: mirror::Class* klass, RSI: int32_t component_count + // RDX, RCX, R8, R9: free. RAX: return val. + movq %rsi, %r9 + salq LITERAL(3), %r9 + // Add array header + alignment rounding. + // Add 4 extra bytes for array data alignment + addq LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK + 4), %r9 +END_MACRO + +// The slow path code for art_quick_alloc_array_*tlab. +MACRO1(ALLOC_ARRAY_TLAB_SLOW_PATH, cxx_name) +END_MACRO + +MACRO3(GENERATE_ALLOC_ARRAY_TLAB, c_entrypoint, cxx_name, size_setup) + DEFINE_FUNCTION VAR(c_entrypoint) + // RDI: mirror::Class* klass, RSI: int32_t component_count + // RDX, RCX, R8, R9: free. RAX: return val. + CALL_MACRO(size_setup) + ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\c_entrypoint +.Lslow_path\c_entrypoint: + SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC + // Outgoing argument set up + movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() + call CALLVAR(cxx_name) // cxx_name(arg0, arg1, Thread*) + RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address + RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception + END_FUNCTION VAR(c_entrypoint) +END_MACRO + + +GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN +GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8 +GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16 +GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32 +GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64 + +GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN +GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8 +GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16 +GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32 +GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB). DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab diff --git a/runtime/asm_support.h b/runtime/asm_support.h index ed83f1c57f..46f2c08663 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -104,7 +104,7 @@ ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET, // Offset of field Thread::tlsPtr_.mterp_current_ibase. #define THREAD_CURRENT_IBASE_OFFSET \ - (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_SIZE_T__ + (1 + 157) * __SIZEOF_POINTER__) + (THREAD_LOCAL_OBJECTS_OFFSET + __SIZEOF_SIZE_T__ + (1 + 161) * __SIZEOF_POINTER__) ADD_TEST_EQ(THREAD_CURRENT_IBASE_OFFSET, art::Thread::MterpCurrentIBaseOffset<POINTER_SIZE>().Int32Value()) // Offset of field Thread::tlsPtr_.mterp_default_ibase. diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc index e9f09b2bd9..582f0cff48 100644 --- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc @@ -129,29 +129,34 @@ GENERATE_ENTRYPOINTS_FOR_ALLOCATOR(RegionTLAB, gc::kAllocatorTypeRegionTLAB) #define GENERATE_ENTRYPOINTS(suffix) \ extern "C" void* art_quick_alloc_array_resolved##suffix(mirror::Class* klass, int32_t); \ +extern "C" void* art_quick_alloc_array_resolved8##suffix(mirror::Class* klass, int32_t); \ +extern "C" void* art_quick_alloc_array_resolved16##suffix(mirror::Class* klass, int32_t); \ +extern "C" void* art_quick_alloc_array_resolved32##suffix(mirror::Class* klass, int32_t); \ +extern "C" void* art_quick_alloc_array_resolved64##suffix(mirror::Class* klass, int32_t); \ extern "C" void* art_quick_alloc_object_resolved##suffix(mirror::Class* klass); \ extern "C" void* art_quick_alloc_object_initialized##suffix(mirror::Class* klass); \ extern "C" void* art_quick_alloc_object_with_checks##suffix(mirror::Class* klass); \ -extern "C" void* art_quick_check_and_alloc_array##suffix(uint32_t, int32_t, ArtMethod* ref); \ -extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix(uint32_t, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_alloc_string_from_bytes##suffix(void*, int32_t, int32_t, int32_t); \ extern "C" void* art_quick_alloc_string_from_chars##suffix(int32_t, int32_t, void*); \ extern "C" void* art_quick_alloc_string_from_string##suffix(void*); \ -extern "C" void* art_quick_alloc_array##suffix##_instrumented(uint32_t, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_alloc_array_resolved##suffix##_instrumented(mirror::Class* klass, int32_t); \ -extern "C" void* art_quick_alloc_array_with_access_check##suffix##_instrumented(uint32_t, int32_t, ArtMethod* ref); \ -extern "C" void* art_quick_alloc_object##suffix##_instrumented(uint32_t type_idx, ArtMethod* ref); \ +extern "C" void* art_quick_alloc_array_resolved8##suffix##_instrumented(mirror::Class* klass, int32_t); \ +extern "C" void* art_quick_alloc_array_resolved16##suffix##_instrumented(mirror::Class* klass, int32_t); \ +extern "C" void* art_quick_alloc_array_resolved32##suffix##_instrumented(mirror::Class* klass, int32_t); \ +extern "C" void* art_quick_alloc_array_resolved64##suffix##_instrumented(mirror::Class* klass, int32_t); \ extern "C" void* art_quick_alloc_object_resolved##suffix##_instrumented(mirror::Class* klass); \ extern "C" void* art_quick_alloc_object_initialized##suffix##_instrumented(mirror::Class* klass); \ extern "C" void* art_quick_alloc_object_with_checks##suffix##_instrumented(mirror::Class* klass); \ -extern "C" void* art_quick_check_and_alloc_array##suffix##_instrumented(uint32_t, int32_t, ArtMethod* ref); \ -extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented(uint32_t, int32_t, ArtMethod* ref); \ extern "C" void* art_quick_alloc_string_from_bytes##suffix##_instrumented(void*, int32_t, int32_t, int32_t); \ extern "C" void* art_quick_alloc_string_from_chars##suffix##_instrumented(int32_t, int32_t, void*); \ extern "C" void* art_quick_alloc_string_from_string##suffix##_instrumented(void*); \ void SetQuickAllocEntryPoints##suffix(QuickEntryPoints* qpoints, bool instrumented) { \ if (instrumented) { \ qpoints->pAllocArrayResolved = art_quick_alloc_array_resolved##suffix##_instrumented; \ + qpoints->pAllocArrayResolved8 = art_quick_alloc_array_resolved8##suffix##_instrumented; \ + qpoints->pAllocArrayResolved16 = art_quick_alloc_array_resolved16##suffix##_instrumented; \ + qpoints->pAllocArrayResolved32 = art_quick_alloc_array_resolved32##suffix##_instrumented; \ + qpoints->pAllocArrayResolved64 = art_quick_alloc_array_resolved64##suffix##_instrumented; \ qpoints->pAllocObjectResolved = art_quick_alloc_object_resolved##suffix##_instrumented; \ qpoints->pAllocObjectInitialized = art_quick_alloc_object_initialized##suffix##_instrumented; \ qpoints->pAllocObjectWithChecks = art_quick_alloc_object_with_checks##suffix##_instrumented; \ @@ -160,6 +165,10 @@ void SetQuickAllocEntryPoints##suffix(QuickEntryPoints* qpoints, bool instrument qpoints->pAllocStringFromString = art_quick_alloc_string_from_string##suffix##_instrumented; \ } else { \ qpoints->pAllocArrayResolved = art_quick_alloc_array_resolved##suffix; \ + qpoints->pAllocArrayResolved8 = art_quick_alloc_array_resolved8##suffix; \ + qpoints->pAllocArrayResolved16 = art_quick_alloc_array_resolved16##suffix; \ + qpoints->pAllocArrayResolved32 = art_quick_alloc_array_resolved32##suffix; \ + qpoints->pAllocArrayResolved64 = art_quick_alloc_array_resolved64##suffix; \ qpoints->pAllocObjectResolved = art_quick_alloc_object_resolved##suffix; \ qpoints->pAllocObjectInitialized = art_quick_alloc_object_initialized##suffix; \ qpoints->pAllocObjectWithChecks = art_quick_alloc_object_with_checks##suffix; \ diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index 22b0f921c7..e0a2e3cf8f 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -21,6 +21,10 @@ #define QUICK_ENTRYPOINT_LIST(V) \ V(AllocArrayResolved, void*, mirror::Class*, int32_t) \ + V(AllocArrayResolved8, void*, mirror::Class*, int32_t) \ + V(AllocArrayResolved16, void*, mirror::Class*, int32_t) \ + V(AllocArrayResolved32, void*, mirror::Class*, int32_t) \ + V(AllocArrayResolved64, void*, mirror::Class*, int32_t) \ V(AllocObjectResolved, void*, mirror::Class*) \ V(AllocObjectInitialized, void*, mirror::Class*) \ V(AllocObjectWithChecks, void*, mirror::Class*) \ diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index 8e84d76e83..d0687ce7b0 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -152,7 +152,15 @@ class EntrypointsOrderTest : public CommonRuntimeTest { void CheckQuickEntryPoints() { CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pAllocArrayResolved) == 0, QuickEntryPoints_start_with_allocarray_resoved); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArrayResolved, pAllocObjectResolved, + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArrayResolved, pAllocArrayResolved8, + sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArrayResolved8, pAllocArrayResolved16, + sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArrayResolved16, pAllocArrayResolved32, + sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArrayResolved32, pAllocArrayResolved64, + sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArrayResolved64, pAllocObjectResolved, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectResolved, pAllocObjectInitialized, sizeof(void*)); diff --git a/runtime/oat.h b/runtime/oat.h index 29821a2eea..4a68036e00 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '1', '0', '3', '\0' }; // Native pc change + static constexpr uint8_t kOatVersion[] = { '1', '0', '4', '\0' }; // Array allocation entrypoints static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/thread.cc b/runtime/thread.cc index d93eab10a0..66a03a6826 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -2663,6 +2663,10 @@ void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) { return; \ } QUICK_ENTRY_POINT_INFO(pAllocArrayResolved) + QUICK_ENTRY_POINT_INFO(pAllocArrayResolved8) + QUICK_ENTRY_POINT_INFO(pAllocArrayResolved16) + QUICK_ENTRY_POINT_INFO(pAllocArrayResolved32) + QUICK_ENTRY_POINT_INFO(pAllocArrayResolved64) QUICK_ENTRY_POINT_INFO(pAllocObjectResolved) QUICK_ENTRY_POINT_INFO(pAllocObjectInitialized) QUICK_ENTRY_POINT_INFO(pAllocObjectWithChecks) |