diff options
67 files changed, 3620 insertions, 696 deletions
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk index 449502c771..d2e3371551 100644 --- a/build/Android.common_test.mk +++ b/build/Android.common_test.mk @@ -23,7 +23,10 @@ include art/build/Android.common_path.mk ifneq ($(TMPDIR),) ART_HOST_TEST_DIR := $(TMPDIR)/test-art-$(shell echo $$PPID) else -ART_HOST_TEST_DIR := /tmp/$(USER)/test-art-$(shell echo $$PPID) +# Use a BSD checksum calculated from ANDROID_BUILD_TOP and USER as one of the +# path components for the test output. This should allow us to run tests from multiple +# repositories at the same time. +ART_HOST_TEST_DIR := /tmp/test-art-$(shell echo ${ANDROID_BUILD_TOP}-${USER} | sum | cut -d ' ' -f1) endif # List of known broken tests that we won't attempt to execute. The test name must be the full diff --git a/build/Android.oat.mk b/build/Android.oat.mk index e297b4f531..3b273a2202 100644 --- a/build/Android.oat.mk +++ b/build/Android.oat.mk @@ -215,9 +215,24 @@ define create-core-oat-target-rules $(4)TARGET_CORE_IMAGE_$(1)_$(2)_64 := $$(core_image_name) else $(4)TARGET_CORE_IMAGE_$(1)_$(2)_32 := $$(core_image_name) + ifdef ART_USE_VIXL_ARM_BACKEND + ifeq ($(1),optimizing) + # TODO(VIXL): The ARM VIXL backend is still work in progress. Therefore for now we do not + # compile the core image with the Optimizing backend when ART_USE_VIXL_ARM_BACKEND is + # defined. + core_compile_options += --compiler-filter=interpret-only + endif + endif endif else $(4)TARGET_CORE_IMAGE_$(1)_$(2)_32 := $$(core_image_name) + ifdef ART_USE_VIXL_ARM_BACKEND + ifeq ($(1),optimizing) + # TODO(VIXL): The ARM VIXL backend is still work in progress. Therefore for now we do not + # compile the core image with the Optimizing backend when ART_USE_VIXL_ARM_BACKEND is defined. + core_compile_options += --compiler-filter=interpret-only + endif + endif endif $(4)TARGET_CORE_IMG_OUTS += $$(core_image_name) $(4)TARGET_CORE_OAT_OUTS += $$(core_oat_name) diff --git a/build/art.go b/build/art.go index 1164cbc553..ccaa11dfe7 100644 --- a/build/art.go +++ b/build/art.go @@ -74,6 +74,12 @@ func globalFlags(ctx android.BaseContext) ([]string, []string) { cflags = append(cflags, "-fstack-protector") } + if envTrue(ctx, "ART_USE_VIXL_ARM_BACKEND") { + // Used to enable the new VIXL-based ARM code generator. + cflags = append(cflags, "-DART_USE_VIXL_ARM_BACKEND=1") + asflags = append(asflags, "-DART_USE_VIXL_ARM_BACKEND=1") + } + return cflags, asflags } diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index afb8fce8d7..ca1dc693eb 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -387,8 +387,7 @@ jobject JniCompilerTest::class_loader_; // Test the normal compiler and normal generic JNI only. // The following features are unsupported in @FastNative: // 1) JNI stubs (lookup via dlsym) when methods aren't explicitly registered -// 2) Returning objects from the JNI function -// 3) synchronized keyword +// 2) synchronized keyword // -- TODO: We can support (1) if we remove the mutator lock assert during stub lookup. # define JNI_TEST_NORMAL_ONLY(TestName) \ TEST_F(JniCompilerTest, TestName ## NormalCompiler) { \ @@ -826,8 +825,7 @@ void JniCompilerTest::CompileAndRunIntObjectObjectMethodImpl() { gJava_MyClassNatives_fooIOO_calls[gCurrentJni] = 0; } -// TODO: Maybe. @FastNative support for returning Objects? -JNI_TEST_NORMAL_ONLY(CompileAndRunIntObjectObjectMethod) +JNI_TEST(CompileAndRunIntObjectObjectMethod) int gJava_MyClassNatives_fooSII_calls[kJniKindCount] = {}; jint Java_MyClassNatives_fooSII(JNIEnv* env ATTRIBUTE_UNUSED, @@ -1047,8 +1045,7 @@ void JniCompilerTest::CompileAndRunStaticIntObjectObjectMethodImpl() { gJava_MyClassNatives_fooSIOO_calls[gCurrentJni] = 0; } -// TODO: Maybe. @FastNative support for returning Objects? -JNI_TEST_NORMAL_ONLY(CompileAndRunStaticIntObjectObjectMethod) +JNI_TEST(CompileAndRunStaticIntObjectObjectMethod) int gJava_MyClassNatives_fooSSIOO_calls[kJniKindCount] = {}; jobject Java_MyClassNatives_fooSSIOO(JNIEnv*, jclass klass, jint x, jobject y, jobject z) { @@ -1216,8 +1213,7 @@ void JniCompilerTest::ReturnGlobalRefImpl() { EXPECT_TRUE(env_->IsSameObject(result, jobj_)); } -// TODO: Maybe. @FastNative support for returning objects? -JNI_TEST_NORMAL_ONLY(ReturnGlobalRef) +JNI_TEST(ReturnGlobalRef) jint local_ref_test(JNIEnv* env, jobject thisObj, jint x) { // Add 10 local references @@ -1357,8 +1353,7 @@ void JniCompilerTest::UpcallReturnTypeChecking_InstanceImpl() { CurrentJniStringSuffix() + "() with CallStaticObjectMethodV"); } -// TODO: Maybe support returning objects for @FastNative? -JNI_TEST_NORMAL_ONLY(UpcallReturnTypeChecking_Instance) +JNI_TEST(UpcallReturnTypeChecking_Instance) void JniCompilerTest::UpcallReturnTypeChecking_StaticImpl() { SetUpForTest(true, "staticMethodThatShouldReturnClass", "()Ljava/lang/Class;", @@ -1385,8 +1380,7 @@ void JniCompilerTest::UpcallReturnTypeChecking_StaticImpl() { CurrentJniStringSuffix() + "() with CallObjectMethodV"); } -// TODO: Maybe support returning objects for @FastNative? -JNI_TEST_NORMAL_ONLY(UpcallReturnTypeChecking_Static) +JNI_TEST(UpcallReturnTypeChecking_Static) // This should take jclass, but we're imitating a bug pattern. void Java_MyClassNatives_instanceMethodThatShouldTakeClass(JNIEnv*, jobject, jclass) { diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index e17144192a..3bd290da17 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -70,6 +70,47 @@ static std::unique_ptr<JNIMacroAssembler<kPointerSize>> GetMacroAssembler( return JNIMacroAssembler<kPointerSize>::Create(arena, isa, features); } +enum class JniEntrypoint { + kStart, + kEnd +}; + +template <PointerSize kPointerSize> +static ThreadOffset<kPointerSize> GetJniEntrypointThreadOffset(JniEntrypoint which, + bool reference_return, + bool is_synchronized, + bool is_fast_native) { + if (which == JniEntrypoint::kStart) { // JniMethodStart + ThreadOffset<kPointerSize> jni_start = + is_synchronized + ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized) + : (is_fast_native + ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastStart) + : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart)); + + return jni_start; + } else { // JniMethodEnd + ThreadOffset<kPointerSize> jni_end(-1); + if (reference_return) { + // Pass result. + jni_end = is_synchronized + ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized) + : (is_fast_native + ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastEndWithReference) + : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference)); + } else { + jni_end = is_synchronized + ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized) + : (is_fast_native + ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastEnd) + : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd)); + } + + return jni_end; + } +} + + // Generate the JNI bridge for the given method, general contract: // - Arguments are in the managed runtime format, either on stack or in // registers, a reference to the method object is supplied as part of this @@ -345,13 +386,11 @@ static CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, FrameOffset locked_object_handle_scope_offset(0xBEEFDEAD); if (LIKELY(!is_critical_native)) { // Skip this for @CriticalNative methods. They do not call JniMethodStart. - ThreadOffset<kPointerSize> jni_start = - is_synchronized - ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized) - : ((is_fast_native && !reference_return) // TODO: support @FastNative returning obj - ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastStart) - : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart)); - + ThreadOffset<kPointerSize> jni_start( + GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kStart, + reference_return, + is_synchronized, + is_fast_native).SizeValue()); main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size)); locked_object_handle_scope_offset = FrameOffset(0); if (is_synchronized) { @@ -543,20 +582,15 @@ static CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, if (LIKELY(!is_critical_native)) { // 12. Call JniMethodEnd - ThreadOffset<kPointerSize> jni_end(-1); + ThreadOffset<kPointerSize> jni_end( + GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kEnd, + reference_return, + is_synchronized, + is_fast_native).SizeValue()); if (reference_return) { // Pass result. - jni_end = is_synchronized - ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized) - : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference); SetNativeParameter(jni_asm.get(), end_jni_conv.get(), end_jni_conv->ReturnRegister()); end_jni_conv->Next(); - } else { - jni_end = is_synchronized - ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized) - : (is_fast_native - ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastEnd) - : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd)); } // Pass saved local reference state. if (end_jni_conv->IsCurrentParamOnStack()) { diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index 3b7788068e..4a9de7f3d1 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -214,7 +214,7 @@ void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative || patch.GetType() == LinkerPatch::Type::kTypeRelative) << patch.GetType(); } else { - // With the read barrier (non-baker) enabled, it could be kDexCacheArray in the + // With the read barrier (non-Baker) enabled, it could be kDexCacheArray in the // HLoadString::LoadKind::kDexCachePcRelative case of VisitLoadString(). DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative || patch.GetType() == LinkerPatch::Type::kTypeRelative || diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 593d8e92f9..ffeff760c6 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -462,7 +462,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(72U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(20U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(163 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), + EXPECT_EQ(164 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), sizeof(QuickEntryPoints)); } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 00530d8140..9f92b20929 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -107,7 +107,7 @@ static size_t SaveContiguousSRegisterList(size_t first, size_t number_of_d_regs = (last - first + 1) / 2; if (number_of_d_regs == 1) { - __ StoreDToOffset(d_reg, SP, stack_offset); + __ StoreDToOffset(d_reg, SP, stack_offset); } else if (number_of_d_regs > 1) { __ add(IP, SP, ShifterOperand(stack_offset)); __ vstmiad(IP, d_reg, number_of_d_regs); @@ -429,33 +429,49 @@ class LoadStringSlowPathARM : public SlowPathCodeARM { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + HLoadString* load = instruction_->AsLoadString(); + const uint32_t string_index = load->GetStringIndex(); + Register out = locations->Out().AsRegister<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier); CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - HLoadString* load = instruction_->AsLoadString(); - const uint32_t string_index = load->GetStringIndex(); + // In the unlucky case that the `temp` is R0, we preserve the address in `out` across + // the kSaveEverything call (or use `out` for the address after non-kSaveEverything call). + bool temp_is_r0 = (temp == calling_convention.GetRegisterAt(0)); + Register entry_address = temp_is_r0 ? out : temp; + DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0)); + if (call_saves_everything_except_r0 && temp_is_r0) { + __ mov(entry_address, ShifterOperand(temp)); + } + __ LoadImmediate(calling_convention.GetRegisterAt(0), string_index); arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); - arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); - RestoreLiveRegisters(codegen, locations); + // Store the resolved String to the .bss entry. + if (call_saves_everything_except_r0) { + // The string entry address was preserved in `entry_address` thanks to kSaveEverything. + __ str(R0, Address(entry_address)); + } else { + // For non-Baker read barrier, we need to re-calculate the address of the string entry. + CodeGeneratorARM::PcRelativePatchInfo* labels = + arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + __ BindTrackedLabel(&labels->movw_label); + __ movw(entry_address, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(entry_address, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(entry_address, entry_address, ShifterOperand(PC)); + __ str(R0, Address(entry_address)); + } - // Store the resolved String to the BSS entry. - // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the - // .bss entry address in the fast path, so that we can avoid another calculation here. - CodeGeneratorARM::PcRelativePatchInfo* labels = - arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); - __ BindTrackedLabel(&labels->movw_label); - __ movw(IP, /* placeholder */ 0u); - __ BindTrackedLabel(&labels->movt_label); - __ movt(IP, /* placeholder */ 0u); - __ BindTrackedLabel(&labels->add_pc_label); - __ add(IP, IP, ShifterOperand(PC)); - __ str(locations->Out().AsRegister<Register>(), Address(IP)); + arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); + RestoreLiveRegisters(codegen, locations); __ b(GetExitLabel()); } @@ -5694,10 +5710,25 @@ void LocationsBuilderARM::VisitLoadString(HLoadString* load) { HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { - locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RegisterLocation(R0)); } else { locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and/or marking to save everything, including temps. + // Note that IP may theoretically be clobbered by saving/restoring the live register + // (only one thanks to the custom calling convention), so we request a different temp. + locations->AddTemp(Location::RequiresRegister()); + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() + // that the the kPrimNot result register is the same as the first argument register. + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } } @@ -5733,15 +5764,16 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { } case HLoadString::LoadKind::kBssEntry: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + Register temp = locations->GetTemp(0).AsRegister<Register>(); CodeGeneratorARM::PcRelativePatchInfo* labels = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); __ BindTrackedLabel(&labels->movw_label); - __ movw(out, /* placeholder */ 0u); + __ movw(temp, /* placeholder */ 0u); __ BindTrackedLabel(&labels->movt_label); - __ movt(out, /* placeholder */ 0u); + __ movt(temp, /* placeholder */ 0u); __ BindTrackedLabel(&labels->add_pc_label); - __ add(out, out, ShifterOperand(PC)); - GenerateGcRootFieldLoad(load, out_loc, out, 0); + __ add(temp, temp, ShifterOperand(PC)); + GenerateGcRootFieldLoad(load, out_loc, temp, 0); SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); codegen_->AddSlowPath(slow_path); __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); @@ -5755,6 +5787,7 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { // TODO: Consider re-adding the compiler code to do string dex cache lookup again. DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex()); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index f02b028541..9e59d8cc38 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -331,13 +331,20 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { public: - explicit LoadStringSlowPathARM64(HLoadString* instruction) : SlowPathCodeARM64(instruction) {} + LoadStringSlowPathARM64(HLoadString* instruction, Register temp, vixl::aarch64::Label* adrp_label) + : SlowPathCodeARM64(instruction), + temp_(temp), + adrp_label_(adrp_label) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + // temp_ is a scratch register. Make sure it's not used for saving/restoring registers. + UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler()); + temps.Exclude(temp_); + __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -352,21 +359,21 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { RestoreLiveRegisters(codegen, locations); // Store the resolved String to the BSS entry. - UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler()); - Register temp = temps.AcquireX(); const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile(); - // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary - // for the ADRP in the fast path, so that we can avoid the ADRP here. - vixl::aarch64::Label* adrp_label = - arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index); - arm64_codegen->EmitAdrpPlaceholder(adrp_label, temp); + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // The string entry page address was preserved in temp_ thanks to kSaveEverything. + } else { + // For non-Baker read barrier, we need to re-calculate the address of the string entry page. + adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index); + arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_); + } vixl::aarch64::Label* strp_label = - arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_); { SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler()); __ Bind(strp_label); __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot), - MemOperand(temp, /* offset placeholder */ 0)); + MemOperand(temp_, /* offset placeholder */ 0)); } __ B(GetExitLabel()); @@ -375,6 +382,9 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; } private: + const Register temp_; + vixl::aarch64::Label* adrp_label_; + DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64); }; @@ -4246,11 +4256,24 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { - locations->SetInAt(0, Location::RequiresRegister()); InvokeRuntimeCallingConvention calling_convention; locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); } else { locations->SetOut(Location::RequiresRegister()); + if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and/or marking to save everything, including temps. + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); + DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), + RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot), + Primitive::kPrimNot).GetCode()); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } } @@ -4285,18 +4308,21 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { const DexFile& dex_file = load->GetDexFile(); uint32_t string_index = load->GetStringIndex(); DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); + Register temp = temps.AcquireX(); vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); - codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); + codegen_->EmitAdrpPlaceholder(adrp_label, temp); // Add LDR with its PC-relative String patch. vixl::aarch64::Label* ldr_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ GenerateGcRootFieldLoad(load, load->GetLocations()->Out(), - out.X(), + temp, /* placeholder */ 0u, ldr_label); - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label); codegen_->AddSlowPath(slow_path); __ Cbz(out.X(), slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -4308,6 +4334,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { // TODO: Re-add the compiler code to do string dex cache lookup again. InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode()); __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex()); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index b522e48a6f..32287a0f2a 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -39,11 +39,20 @@ using namespace vixl32; // NOLINT(build/namespaces) using helpers::DWARFReg; using helpers::FromLowSToD; -using helpers::OutputRegister; -using helpers::InputRegisterAt; +using helpers::HighDRegisterFrom; +using helpers::HighRegisterFrom; using helpers::InputOperandAt; -using helpers::OutputSRegister; +using helpers::InputRegisterAt; using helpers::InputSRegisterAt; +using helpers::InputVRegisterAt; +using helpers::LocationFrom; +using helpers::LowRegisterFrom; +using helpers::LowSRegisterFrom; +using helpers::OutputRegister; +using helpers::OutputSRegister; +using helpers::OutputVRegister; +using helpers::RegisterFrom; +using helpers::SRegisterFrom; using RegisterList = vixl32::RegisterList; @@ -58,10 +67,6 @@ static constexpr size_t kArmInstrMaxSizeInBytes = 4u; #error "ARM Codegen VIXL macro-assembler macro already defined." #endif -// TODO: Remove with later pop when codegen complete. -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-parameter" - // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value() @@ -69,22 +74,222 @@ static constexpr size_t kArmInstrMaxSizeInBytes = 4u; // Marker that code is yet to be, and must, be implemented. #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented " -class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { +// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, +// for each live D registers they treat two corresponding S registers as live ones. +// +// Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build +// from a list of contiguous S registers a list of contiguous D registers (processing first/last +// S registers corner cases) and save/restore this new list treating them as D registers. +// - decreasing code size +// - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is +// restored and then used in regular non SlowPath code as D register. +// +// For the following example (v means the S register is live): +// D names: | D0 | D1 | D2 | D4 | ... +// S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ... +// Live? | | v | v | v | v | v | v | | ... +// +// S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed +// as D registers. +// +// TODO(VIXL): All this code should be unnecessary once the VIXL AArch32 backend provides helpers +// for lists of floating-point registers. +static size_t SaveContiguousSRegisterList(size_t first, + size_t last, + CodeGenerator* codegen, + size_t stack_offset) { + static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes."); + static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes."); + DCHECK_LE(first, last); + if ((first == last) && (first == 0)) { + __ Vstr(vixl32::SRegister(first), MemOperand(sp, stack_offset)); + return stack_offset + kSRegSizeInBytes; + } + if (first % 2 == 1) { + __ Vstr(vixl32::SRegister(first++), MemOperand(sp, stack_offset)); + stack_offset += kSRegSizeInBytes; + } + + bool save_last = false; + if (last % 2 == 0) { + save_last = true; + --last; + } + + if (first < last) { + vixl32::DRegister d_reg = vixl32::DRegister(first / 2); + DCHECK_EQ((last - first + 1) % 2, 0u); + size_t number_of_d_regs = (last - first + 1) / 2; + + if (number_of_d_regs == 1) { + __ Vstr(d_reg, MemOperand(sp, stack_offset)); + } else if (number_of_d_regs > 1) { + UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()); + vixl32::Register base = sp; + if (stack_offset != 0) { + base = temps.Acquire(); + __ Add(base, sp, stack_offset); + } + __ Vstm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs)); + } + stack_offset += number_of_d_regs * kDRegSizeInBytes; + } + + if (save_last) { + __ Vstr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset)); + stack_offset += kSRegSizeInBytes; + } + + return stack_offset; +} + +static size_t RestoreContiguousSRegisterList(size_t first, + size_t last, + CodeGenerator* codegen, + size_t stack_offset) { + static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes."); + static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes."); + DCHECK_LE(first, last); + if ((first == last) && (first == 0)) { + __ Vldr(vixl32::SRegister(first), MemOperand(sp, stack_offset)); + return stack_offset + kSRegSizeInBytes; + } + if (first % 2 == 1) { + __ Vldr(vixl32::SRegister(first++), MemOperand(sp, stack_offset)); + stack_offset += kSRegSizeInBytes; + } + + bool restore_last = false; + if (last % 2 == 0) { + restore_last = true; + --last; + } + + if (first < last) { + vixl32::DRegister d_reg = vixl32::DRegister(first / 2); + DCHECK_EQ((last - first + 1) % 2, 0u); + size_t number_of_d_regs = (last - first + 1) / 2; + if (number_of_d_regs == 1) { + __ Vldr(d_reg, MemOperand(sp, stack_offset)); + } else if (number_of_d_regs > 1) { + UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()); + vixl32::Register base = sp; + if (stack_offset != 0) { + base = temps.Acquire(); + __ Add(base, sp, stack_offset); + } + __ Vldm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs)); + } + stack_offset += number_of_d_regs * kDRegSizeInBytes; + } + + if (restore_last) { + __ Vldr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset)); + stack_offset += kSRegSizeInBytes; + } + + return stack_offset; +} + +void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { + size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); + size_t orig_offset = stack_offset; + + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + for (uint32_t i : LowToHighBits(core_spills)) { + // If the register holds an object, update the stack mask. + if (locations->RegisterContainsObject(i)) { + locations->SetStackBit(stack_offset / kVRegSize); + } + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_core_stack_offsets_[i] = stack_offset; + stack_offset += kArmWordSize; + } + + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset); + + uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + orig_offset = stack_offset; + for (uint32_t i : LowToHighBits(fp_spills)) { + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + saved_fpu_stack_offsets_[i] = stack_offset; + stack_offset += kArmWordSize; + } + + stack_offset = orig_offset; + while (fp_spills != 0u) { + uint32_t begin = CTZ(fp_spills); + uint32_t tmp = fp_spills + (1u << begin); + fp_spills &= tmp; // Clear the contiguous range of 1s. + uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined. + stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset); + } + DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); +} + +void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { + size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); + size_t orig_offset = stack_offset; + + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + for (uint32_t i : LowToHighBits(core_spills)) { + DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); + DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); + stack_offset += kArmWordSize; + } + + // TODO(VIXL): Check the coherency of stack_offset after this with a test. + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset); + + uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + while (fp_spills != 0u) { + uint32_t begin = CTZ(fp_spills); + uint32_t tmp = fp_spills + (1u << begin); + fp_spills &= tmp; // Clear the contiguous range of 1s. + uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp); // CTZ(0) is undefined. + stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset); + } + DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); +} + +class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: - explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction) - : SlowPathCodeARMVIXL(instruction) {} + explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - CodeGeneratorARMVIXL* armvixl_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } - armvixl_codegen->InvokeRuntime(kQuickThrowDivZero, - instruction_, - instruction_->GetDexPc(), - this); + arm_codegen->InvokeRuntime(kQuickThrowNullPointer, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); + } + + bool IsFatal() const OVERRIDE { return true; } + + const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARMVIXL"; } + + private: + DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL); +}; + +class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction) + : SlowPathCodeARMVIXL(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + __ Bind(GetEntryLabel()); + arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } @@ -96,6 +301,98 @@ class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL); }; +class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor) + : SlowPathCodeARMVIXL(instruction), successor_(successor) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + __ Bind(GetEntryLabel()); + arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); + if (successor_ == nullptr) { + __ B(GetReturnLabel()); + } else { + __ B(arm_codegen->GetLabelOf(successor_)); + } + } + + vixl32::Label* GetReturnLabel() { + DCHECK(successor_ == nullptr); + return &return_label_; + } + + HBasicBlock* GetSuccessor() const { + return successor_; + } + + const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARMVIXL"; } + + private: + // If not null, the block to branch to after the suspend check. + HBasicBlock* const successor_; + + // If `successor_` is null, the label to branch to after the suspend check. + vixl32::Label return_label_; + + DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL); +}; + +class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, bool do_clinit) + : SlowPathCodeARMVIXL(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) { + DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = at_->GetLocations(); + + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConventionARMVIXL calling_convention; + __ Mov(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex()); + QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage + : kQuickInitializeType; + arm_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } + + // Move the class to the desired location. + Location out = locations->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); + arm_codegen->Move32(locations->Out(), LocationFrom(r0)); + } + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARMVIXL"; } + + private: + // The class this slow path will load. + HLoadClass* const cls_; + + // The instruction where this slow path is happening. + // (Might be the load class or an initialization check). + HInstruction* const at_; + + // The dex PC of `at_`. + const uint32_t dex_pc_; + + // Whether to initialize the class. + const bool do_clinit_; + + DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL); +}; + inline vixl32::Condition ARMCondition(IfCondition cond) { switch (cond) { case kCondEQ: return eq; @@ -151,16 +448,6 @@ inline vixl32::Condition ARMFPCondition(IfCondition cond, bool gt_bias) { } } -void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen ATTRIBUTE_UNUSED, - LocationSummary* locations ATTRIBUTE_UNUSED) { - TODO_VIXL32(FATAL); -} - -void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen ATTRIBUTE_UNUSED, - LocationSummary* locations ATTRIBUTE_UNUSED) { - TODO_VIXL32(FATAL); -} - void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const { stream << vixl32::Register(reg); } @@ -169,7 +456,7 @@ void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int r stream << vixl32::SRegister(reg); } -static uint32_t ComputeSRegisterMask(const SRegisterList& regs) { +static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) { uint32_t mask = 0; for (uint32_t i = regs.GetFirstSRegister().GetCode(); i <= regs.GetLastSRegister().GetCode(); @@ -190,7 +477,7 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, kNumberOfSRegisters, kNumberOfRegisterPairs, kCoreCalleeSaves.GetList(), - ComputeSRegisterMask(kFpuCalleeSaves), + ComputeSRegisterListMask(kFpuCalleeSaves), compiler_options, stats), block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -234,11 +521,6 @@ void CodeGeneratorARMVIXL::SetupBlockedRegisters() const { } } -void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction, - HBasicBlock* successor) { - TODO_VIXL32(FATAL); -} - InstructionCodeGeneratorARMVIXL::InstructionCodeGeneratorARMVIXL(HGraph* graph, CodeGeneratorARMVIXL* codegen) : InstructionCodeGenerator(graph, codegen), @@ -275,18 +557,16 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { return; } - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); if (!skip_overflow_check) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm))); // The load must immediately precede RecordPcInfo. - { - AssemblerAccurateScope aas(GetVIXLAssembler(), - kArmInstrMaxSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ ldr(temp, MemOperand(temp)); - RecordPcInfo(nullptr, 0); - } + AssemblerAccurateScope aas(GetVIXLAssembler(), + kArmInstrMaxSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ ldr(temp, MemOperand(temp)); + RecordPcInfo(nullptr, 0); } __ Push(RegisterList(core_spill_mask_)); @@ -303,10 +583,7 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { __ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_))); GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_)); - GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0), - 0, - fpu_spill_mask_, - kArmWordSize); + GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0), 0, fpu_spill_mask_, kArmWordSize); } int adjust = GetFrameSize() - FrameEntrySpillSize(); __ Sub(sp, sp, adjust); @@ -332,8 +609,7 @@ void CodeGeneratorARMVIXL::GenerateFrameExit() { __ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_))); GetAssembler()->cfi().AdjustCFAOffset( -static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_)); - GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), - fpu_spill_mask_); + GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_); } // Pop LR into PC to return. DCHECK_NE(core_spill_mask_ & (1 << kLrCode), 0U); @@ -347,28 +623,63 @@ void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); } -void CodeGeneratorARMVIXL::MoveConstant(Location destination, int32_t value) { - TODO_VIXL32(FATAL); -} - -void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, Primitive::Type dst_type) { - TODO_VIXL32(FATAL); -} - -void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location, LocationSummary* locations) { - TODO_VIXL32(FATAL); +void CodeGeneratorARMVIXL::Move32(Location destination, Location source) { + if (source.Equals(destination)) { + return; + } + if (destination.IsRegister()) { + if (source.IsRegister()) { + __ Mov(RegisterFrom(destination), RegisterFrom(source)); + } else if (source.IsFpuRegister()) { + __ Vmov(RegisterFrom(destination), SRegisterFrom(source)); + } else { + GetAssembler()->LoadFromOffset(kLoadWord, + RegisterFrom(destination), + sp, + source.GetStackIndex()); + } + } else if (destination.IsFpuRegister()) { + if (source.IsRegister()) { + __ Vmov(SRegisterFrom(destination), RegisterFrom(source)); + } else if (source.IsFpuRegister()) { + __ Vmov(SRegisterFrom(destination), SRegisterFrom(source)); + } else { + GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex()); + } + } else { + DCHECK(destination.IsStackSlot()) << destination; + if (source.IsRegister()) { + GetAssembler()->StoreToOffset(kStoreWord, + RegisterFrom(source), + sp, + destination.GetStackIndex()); + } else if (source.IsFpuRegister()) { + GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex()); + } else { + DCHECK(source.IsStackSlot()) << source; + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex()); + GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); + } + } } -uintptr_t CodeGeneratorARMVIXL::GetAddressOf(HBasicBlock* block) { +void CodeGeneratorARMVIXL::MoveConstant(Location destination ATTRIBUTE_UNUSED, + int32_t value ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); - return 0; } -void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* null_check) { - TODO_VIXL32(FATAL); +void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, Primitive::Type dst_type) { + // TODO(VIXL): Maybe refactor to have the 'move' implementation here and use it in + // `ParallelMoveResolverARMVIXL::EmitMove`, as is done in the `arm64` backend. + HParallelMove move(GetGraph()->GetArena()); + move.AddMove(src, dst, dst_type, nullptr); + GetMoveResolver()->EmitNativeCode(&move); } -void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* null_check) { +void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location ATTRIBUTE_UNUSED, + LocationSummary* locations ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); } @@ -379,6 +690,8 @@ void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint, ValidateInvokeRuntime(entrypoint, instruction, slow_path); GenerateInvokeRuntime(GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value()); if (EntrypointRequiresStackMap(entrypoint)) { + // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the + // previous instruction. RecordPcInfo(instruction, dex_pc, slow_path); } } @@ -395,44 +708,107 @@ void CodeGeneratorARMVIXL::GenerateInvokeRuntime(int32_t entry_point_offset) { __ Blx(lr); } +void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath); + locations->SetInAt(0, Location::RequiresRegister()); + if (check->HasUses()) { + locations->SetOut(Location::SameAsFirstInput()); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) { + // We assume the class is not null. + LoadClassSlowPathARMVIXL* slow_path = + new (GetGraph()->GetArena()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), + check, + check->GetDexPc(), + /* do_clinit */ true); + codegen_->AddSlowPath(slow_path); + GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); +} + +void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( + LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + GetAssembler()->LoadFromOffset(kLoadWord, + temp, + class_reg, + mirror::Class::StatusOffset().Int32Value()); + __ Cmp(temp, mirror::Class::kStatusInitialized); + __ B(lt, slow_path->GetEntryLabel()); + // Even if the initialized flag is set, we may be in a situation where caches are not synced + // properly. Therefore, we do a memory fence. + __ Dmb(ISH); + __ Bind(slow_path->GetExitLabel()); +} + // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) { - TODO_VIXL32(FATAL); - return desired_string_load_kind; + HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) { + // TODO(VIXL): Implement optimized code paths. For now we always use the simpler fallback code. + return HLoadString::LoadKind::kDexCacheViaMethod; +} + +void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { + LocationSummary::CallKind call_kind = load->NeedsEnvironment() + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); + + // TODO(VIXL): Implement optimized code paths. + // See InstructionCodeGeneratorARMVIXL::VisitLoadString. + HLoadString::LoadKind load_kind = load->GetLoadKind(); + if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + locations->SetInAt(0, Location::RequiresRegister()); + // TODO(VIXL): Use InvokeRuntimeCallingConventionARMVIXL instead. + locations->SetOut(LocationFrom(r0)); + } else { + locations->SetOut(Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) { + // TODO(VIXL): Implement optimized code paths. + // We implemented the simplest solution to get first ART tests passing, we deferred the + // optimized path until later, we should implement it using ARM64 implementation as a + // reference. The same related to LocationsBuilderARMVIXL::VisitLoadString. + + // TODO: Re-add the compiler code to do string dex cache lookup again. + DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex()); + codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); } // Check if the desired_class_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) { - TODO_VIXL32(FATAL); - return desired_class_load_kind; + HLoadClass::LoadKind desired_class_load_kind ATTRIBUTE_UNUSED) { + // TODO(VIXL): Implement optimized code paths. + return HLoadClass::LoadKind::kDexCacheViaMethod; } // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch( - const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) { - TODO_VIXL32(FATAL); - return desired_dispatch_info; -} - -// Generate a call to a static or direct method. -void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { - TODO_VIXL32(FATAL); -} - -// Generate a call to a virtual method. -void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) { - TODO_VIXL32(FATAL); + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info ATTRIBUTE_UNUSED, + HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + // TODO(VIXL): Implement optimized code paths. + return { + HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, + HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, + 0u, + 0u + }; } // Copy the result of a call into the given target. -void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type type) { +void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, + Primitive::Type type ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); } @@ -463,6 +839,17 @@ void InstructionCodeGeneratorARMVIXL::VisitGoto(HGoto* got) { HandleGoto(got, got->GetSuccessor()); } +void LocationsBuilderARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) { + try_boundary->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) { + HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor(); + if (!successor->IsExitBlock()) { + HandleGoto(try_boundary, successor); + } +} + void LocationsBuilderARMVIXL::VisitExit(HExit* exit) { exit->SetLocations(nullptr); } @@ -495,16 +882,14 @@ void InstructionCodeGeneratorARMVIXL::GenerateVcmp(HInstruction* instruction) { __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0); } else { DCHECK_EQ(type, Primitive::kPrimDouble); - __ Vcmp(F64, FromLowSToD(lhs_loc.AsFpuRegisterPairLow<vixl32::SRegister>()), 0.0); + __ Vcmp(F64, FromLowSToD(LowSRegisterFrom(lhs_loc)), 0.0); } } else { if (type == Primitive::kPrimFloat) { - __ Vcmp(F32, InputSRegisterAt(instruction, 0), InputSRegisterAt(instruction, 1)); + __ Vcmp(InputSRegisterAt(instruction, 0), InputSRegisterAt(instruction, 1)); } else { DCHECK_EQ(type, Primitive::kPrimDouble); - __ Vcmp(F64, - FromLowSToD(lhs_loc.AsFpuRegisterPairLow<vixl32::SRegister>()), - FromLowSToD(rhs_loc.AsFpuRegisterPairLow<vixl32::SRegister>())); + __ Vcmp(FromLowSToD(LowSRegisterFrom(lhs_loc)), FromLowSToD(LowSRegisterFrom(rhs_loc))); } } } @@ -525,8 +910,8 @@ void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* c Location right = locations->InAt(1); IfCondition if_cond = cond->GetCondition(); - vixl32::Register left_high = left.AsRegisterPairHigh<vixl32::Register>(); - vixl32::Register left_low = left.AsRegisterPairLow<vixl32::Register>(); + vixl32::Register left_high = HighRegisterFrom(left); + vixl32::Register left_low = LowRegisterFrom(left); IfCondition true_high_cond = if_cond; IfCondition false_high_cond = cond->GetOppositeCondition(); vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part @@ -581,8 +966,8 @@ void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* c // Must be equal high, so compare the lows. __ Cmp(left_low, val_low); } else { - vixl32::Register right_high = right.AsRegisterPairHigh<vixl32::Register>(); - vixl32::Register right_low = right.AsRegisterPairLow<vixl32::Register>(); + vixl32::Register right_high = HighRegisterFrom(right); + vixl32::Register right_low = LowRegisterFrom(right); __ Cmp(left_high, right_high); if (if_cond == kCondNE) { @@ -723,15 +1108,39 @@ void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) { void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) { HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); - vixl32::Label* true_target = - codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? - nullptr : codegen_->GetLabelOf(true_successor); - vixl32::Label* false_target = - codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? - nullptr : codegen_->GetLabelOf(false_successor); + vixl32::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + nullptr : codegen_->GetLabelOf(true_successor); + vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + nullptr : codegen_->GetLabelOf(false_successor); GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } +void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); + if (Primitive::IsFloatingPointType(select->GetType())) { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + } + if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { + locations->SetInAt(2, Location::RequiresRegister()); + } + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { + LocationSummary* locations = select->GetLocations(); + vixl32::Label false_target; + GenerateTestAndBranch(select, + /* condition_input_index */ 2, + /* true_target */ nullptr, + &false_target); + codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); + __ Bind(&false_target); +} + void CodeGeneratorARMVIXL::GenerateNop() { __ Nop(); } @@ -749,7 +1158,7 @@ void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) { } break; - // TODO: https://android-review.googlesource.com/#/c/252265/ + // TODO(VIXL): https://android-review.googlesource.com/#/c/252265/ case Primitive::kPrimFloat: case Primitive::kPrimDouble: locations->SetInAt(0, Location::RequiresFpuRegister()); @@ -773,28 +1182,19 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) { return; } - LocationSummary* locations = cond->GetLocations(); - Location right = locations->InAt(1); vixl32::Register out = OutputRegister(cond); vixl32::Label true_label, false_label; switch (cond->InputAt(0)->GetType()) { default: { // Integer case. - if (right.IsRegister()) { - __ Cmp(InputRegisterAt(cond, 0), InputRegisterAt(cond, 1)); - } else { - DCHECK(right.IsConstant()); - __ Cmp(InputRegisterAt(cond, 0), CodeGenerator::GetInt32ValueOf(right.GetConstant())); - } - { - AssemblerAccurateScope aas(GetVIXLAssembler(), - kArmInstrMaxSizeInBytes * 3u, - CodeBufferCheckScope::kMaximumSize); - __ ite(ARMCondition(cond->GetCondition())); - __ mov(ARMCondition(cond->GetCondition()), OutputRegister(cond), 1); - __ mov(ARMCondition(cond->GetOppositeCondition()), OutputRegister(cond), 0); - } + __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1)); + AssemblerAccurateScope aas(GetVIXLAssembler(), + kArmInstrMaxSizeInBytes * 3u, + CodeBufferCheckScope::kMaximumSize); + __ ite(ARMCondition(cond->GetCondition())); + __ mov(ARMCondition(cond->GetCondition()), OutputRegister(cond), 1); + __ mov(ARMCondition(cond->GetOppositeCondition()), OutputRegister(cond), 0); return; } case Primitive::kPrimLong: @@ -911,6 +1311,16 @@ void InstructionCodeGeneratorARMVIXL::VisitIntConstant(HIntConstant* constant AT // Will be generated at use site. } +void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARMVIXL::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { + // Will be generated at use site. +} + void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); @@ -947,6 +1357,52 @@ void InstructionCodeGeneratorARMVIXL::VisitReturn(HReturn* ret ATTRIBUTE_UNUSED) codegen_->GenerateFrameExit(); } +void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + + // TODO(VIXL): TryDispatch + + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + + // TODO(VIXL): TryGenerateIntrinsicCode + + LocationSummary* locations = invoke->GetLocations(); + DCHECK(locations->HasTemps()); + codegen_->GenerateStaticOrDirectCall(invoke, locations->GetTemp(0)); + // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the + // previous instruction. + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); +} + +void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) { + InvokeDexCallingConventionVisitorARM calling_convention_visitor; + CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); +} + +void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { + // TODO(VIXL): TryDispatch + + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { + // TODO(VIXL): TryGenerateIntrinsicCode + + codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); + DCHECK(!codegen_->IsLeafMethod()); + // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the + // previous instruction. + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); +} + void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) { Primitive::Type result_type = conversion->GetResultType(); Primitive::Type input_type = conversion->GetInputType(); @@ -1050,20 +1506,18 @@ void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimFloat: { // Processing a Dex `float-to-long' instruction. - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::FpuRegisterLocation( - calling_convention.GetFpuRegisterAt(0))); - locations->SetOut(Location::RegisterPairLocation(R0, R1)); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(LocationFrom(r0, r1)); break; } case Primitive::kPrimDouble: { // Processing a Dex `double-to-long' instruction. - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::FpuRegisterPairLocation( - calling_convention.GetFpuRegisterAt(0), - calling_convention.GetFpuRegisterAt(1))); - locations->SetOut(Location::RegisterPairLocation(R0, R1)); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0), + calling_convention.GetFpuRegisterAt(1))); + locations->SetOut(LocationFrom(r0, r1)); break; } @@ -1108,10 +1562,10 @@ void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) { case Primitive::kPrimLong: { // Processing a Dex `long-to-float' instruction. - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); - locations->SetOut(Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0), + calling_convention.GetRegisterAt(1))); + locations->SetOut(LocationFrom(calling_convention.GetFpuRegisterAt(0))); break; } @@ -1178,7 +1632,7 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve switch (input_type) { case Primitive::kPrimLong: // Type conversion from long to byte is a result of code transformations. - __ Sbfx(OutputRegister(conversion), in.AsRegisterPairLow<vixl32::Register>(), 0, 8); + __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8); break; case Primitive::kPrimBoolean: // Boolean input is a result of code transformations. @@ -1199,7 +1653,7 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve switch (input_type) { case Primitive::kPrimLong: // Type conversion from long to short is a result of code transformations. - __ Sbfx(OutputRegister(conversion), in.AsRegisterPairLow<vixl32::Register>(), 0, 16); + __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16); break; case Primitive::kPrimBoolean: // Boolean input is a result of code transformations. @@ -1222,7 +1676,7 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve // Processing a Dex `long-to-int' instruction. DCHECK(out.IsRegister()); if (in.IsRegisterPair()) { - __ Mov(OutputRegister(conversion), in.AsRegisterPairLow<vixl32::Register>()); + __ Mov(OutputRegister(conversion), LowRegisterFrom(in)); } else if (in.IsDoubleStackSlot()) { GetAssembler()->LoadFromOffset(kLoadWord, OutputRegister(conversion), @@ -1238,7 +1692,7 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve case Primitive::kPrimFloat: { // Processing a Dex `float-to-int' instruction. - vixl32::SRegister temp = locations->GetTemp(0).AsFpuRegisterPairLow<vixl32::SRegister>(); + vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0)); __ Vcvt(I32, F32, temp, InputSRegisterAt(conversion, 0)); __ Vmov(OutputRegister(conversion), temp); break; @@ -1246,9 +1700,8 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve case Primitive::kPrimDouble: { // Processing a Dex `double-to-int' instruction. - vixl32::SRegister temp_s = - locations->GetTemp(0).AsFpuRegisterPairLow<vixl32::SRegister>(); - __ Vcvt(I32, F64, temp_s, FromLowSToD(in.AsFpuRegisterPairLow<vixl32::SRegister>())); + vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0)); + __ Vcvt(I32, F64, temp_s, FromLowSToD(LowSRegisterFrom(in))); __ Vmov(OutputRegister(conversion), temp_s); break; } @@ -1270,11 +1723,9 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve // Processing a Dex `int-to-long' instruction. DCHECK(out.IsRegisterPair()); DCHECK(in.IsRegister()); - __ Mov(out.AsRegisterPairLow<vixl32::Register>(), InputRegisterAt(conversion, 0)); + __ Mov(LowRegisterFrom(out), InputRegisterAt(conversion, 0)); // Sign extension. - __ Asr(out.AsRegisterPairHigh<vixl32::Register>(), - out.AsRegisterPairLow<vixl32::Register>(), - 31); + __ Asr(HighRegisterFrom(out), LowRegisterFrom(out), 31); break; case Primitive::kPrimFloat: @@ -1299,7 +1750,7 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve switch (input_type) { case Primitive::kPrimLong: // Type conversion from long to char is a result of code transformations. - __ Ubfx(OutputRegister(conversion), in.AsRegisterPairLow<vixl32::Register>(), 0, 16); + __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16); break; case Primitive::kPrimBoolean: // Boolean input is a result of code transformations. @@ -1338,10 +1789,7 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve case Primitive::kPrimDouble: // Processing a Dex `double-to-float' instruction. - __ Vcvt(F32, - F64, - OutputSRegister(conversion), - FromLowSToD(in.AsFpuRegisterPairLow<vixl32::SRegister>())); + __ Vcvt(F32, F64, OutputSRegister(conversion), FromLowSToD(LowSRegisterFrom(in))); break; default: @@ -1359,37 +1807,30 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve case Primitive::kPrimInt: case Primitive::kPrimChar: { // Processing a Dex `int-to-double' instruction. - __ Vmov(out.AsFpuRegisterPairLow<vixl32::SRegister>(), InputRegisterAt(conversion, 0)); - __ Vcvt(F64, - I32, - FromLowSToD(out.AsFpuRegisterPairLow<vixl32::SRegister>()), - out.AsFpuRegisterPairLow<vixl32::SRegister>()); + __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0)); + __ Vcvt(F64, I32, FromLowSToD(LowSRegisterFrom(out)), LowSRegisterFrom(out)); break; } case Primitive::kPrimLong: { // Processing a Dex `long-to-double' instruction. - vixl32::Register low = in.AsRegisterPairLow<vixl32::Register>(); - vixl32::Register high = in.AsRegisterPairHigh<vixl32::Register>(); + vixl32::Register low = LowRegisterFrom(in); + vixl32::Register high = HighRegisterFrom(in); - vixl32::SRegister out_s = out.AsFpuRegisterPairLow<vixl32::SRegister>(); + vixl32::SRegister out_s = LowSRegisterFrom(out); vixl32::DRegister out_d = FromLowSToD(out_s); - vixl32::SRegister temp_s = - locations->GetTemp(0).AsFpuRegisterPairLow<vixl32::SRegister>(); + vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0)); vixl32::DRegister temp_d = FromLowSToD(temp_s); - vixl32::SRegister constant_s = - locations->GetTemp(1).AsFpuRegisterPairLow<vixl32::SRegister>(); + vixl32::SRegister constant_s = LowSRegisterFrom(locations->GetTemp(1)); vixl32::DRegister constant_d = FromLowSToD(constant_s); // temp_d = int-to-double(high) __ Vmov(temp_s, high); __ Vcvt(F64, I32, temp_d, temp_s); // constant_d = k2Pow32EncodingForDouble - __ Vmov(F64, - constant_d, - vixl32::DOperand(bit_cast<double, int64_t>(k2Pow32EncodingForDouble))); + __ Vmov(constant_d, bit_cast<double, int64_t>(k2Pow32EncodingForDouble)); // out_d = unsigned-to-double(low) __ Vmov(out_s, low); __ Vcvt(F64, U32, out_d, out_s); @@ -1400,10 +1841,7 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve case Primitive::kPrimFloat: // Processing a Dex `float-to-double' instruction. - __ Vcvt(F64, - F32, - FromLowSToD(out.AsFpuRegisterPairLow<vixl32::SRegister>()), - InputSRegisterAt(conversion, 0)); + __ Vcvt(F64, F32, FromLowSToD(LowSRegisterFrom(out)), InputSRegisterAt(conversion, 0)); break; default: @@ -1429,7 +1867,7 @@ void LocationsBuilderARMVIXL::VisitAdd(HAdd* add) { break; } - // TODO: https://android-review.googlesource.com/#/c/254144/ + // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/ case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -1462,28 +1900,17 @@ void InstructionCodeGeneratorARMVIXL::VisitAdd(HAdd* add) { } break; - // TODO: https://android-review.googlesource.com/#/c/254144/ + // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/ case Primitive::kPrimLong: { DCHECK(second.IsRegisterPair()); - __ Adds(out.AsRegisterPairLow<vixl32::Register>(), - first.AsRegisterPairLow<vixl32::Register>(), - Operand(second.AsRegisterPairLow<vixl32::Register>())); - __ Adc(out.AsRegisterPairHigh<vixl32::Register>(), - first.AsRegisterPairHigh<vixl32::Register>(), - second.AsRegisterPairHigh<vixl32::Register>()); + __ Adds(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second)); + __ Adc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second)); break; } - case Primitive::kPrimFloat: { - __ Vadd(F32, OutputSRegister(add), InputSRegisterAt(add, 0), InputSRegisterAt(add, 1)); - } - break; - + case Primitive::kPrimFloat: case Primitive::kPrimDouble: - __ Vadd(F64, - FromLowSToD(out.AsFpuRegisterPairLow<vixl32::SRegister>()), - FromLowSToD(first.AsFpuRegisterPairLow<vixl32::SRegister>()), - FromLowSToD(second.AsFpuRegisterPairLow<vixl32::SRegister>())); + __ Vadd(OutputVRegister(add), InputVRegisterAt(add, 0), InputVRegisterAt(add, 1)); break; default: @@ -1502,7 +1929,7 @@ void LocationsBuilderARMVIXL::VisitSub(HSub* sub) { break; } - // TODO: https://android-review.googlesource.com/#/c/254144/ + // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/ case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -1528,40 +1955,22 @@ void InstructionCodeGeneratorARMVIXL::VisitSub(HSub* sub) { Location second = locations->InAt(1); switch (sub->GetResultType()) { case Primitive::kPrimInt: { - if (second.IsRegister()) { - __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputRegisterAt(sub, 1)); - } else { - __ Sub(OutputRegister(sub), - InputRegisterAt(sub, 0), - second.GetConstant()->AsIntConstant()->GetValue()); - } + __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputOperandAt(sub, 1)); break; } - // TODO: https://android-review.googlesource.com/#/c/254144/ + // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/ case Primitive::kPrimLong: { DCHECK(second.IsRegisterPair()); - __ Subs(out.AsRegisterPairLow<vixl32::Register>(), - first.AsRegisterPairLow<vixl32::Register>(), - Operand(second.AsRegisterPairLow<vixl32::Register>())); - __ Sbc(out.AsRegisterPairHigh<vixl32::Register>(), - first.AsRegisterPairHigh<vixl32::Register>(), - Operand(second.AsRegisterPairHigh<vixl32::Register>())); + __ Subs(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second)); + __ Sbc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second)); break; } - case Primitive::kPrimFloat: { - __ Vsub(F32, OutputSRegister(sub), InputSRegisterAt(sub, 0), InputSRegisterAt(sub, 1)); + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + __ Vsub(OutputVRegister(sub), InputVRegisterAt(sub, 0), InputVRegisterAt(sub, 1)); break; - } - - case Primitive::kPrimDouble: { - __ Vsub(F64, - FromLowSToD(out.AsFpuRegisterPairLow<vixl32::SRegister>()), - FromLowSToD(first.AsFpuRegisterPairLow<vixl32::SRegister>()), - FromLowSToD(second.AsFpuRegisterPairLow<vixl32::SRegister>())); - break; - } default: LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); @@ -1604,12 +2013,12 @@ void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) { break; } case Primitive::kPrimLong: { - vixl32::Register out_hi = out.AsRegisterPairHigh<vixl32::Register>(); - vixl32::Register out_lo = out.AsRegisterPairLow<vixl32::Register>(); - vixl32::Register in1_hi = first.AsRegisterPairHigh<vixl32::Register>(); - vixl32::Register in1_lo = first.AsRegisterPairLow<vixl32::Register>(); - vixl32::Register in2_hi = second.AsRegisterPairHigh<vixl32::Register>(); - vixl32::Register in2_lo = second.AsRegisterPairLow<vixl32::Register>(); + vixl32::Register out_hi = HighRegisterFrom(out); + vixl32::Register out_lo = LowRegisterFrom(out); + vixl32::Register in1_hi = HighRegisterFrom(first); + vixl32::Register in1_lo = LowRegisterFrom(first); + vixl32::Register in2_hi = HighRegisterFrom(second); + vixl32::Register in2_lo = LowRegisterFrom(second); // Extra checks to protect caused by the existence of R1_R2. // The algorithm is wrong if out.hi is either in1.lo or in2.lo: @@ -1632,28 +2041,100 @@ void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) { // out.lo <- (in1.lo * in2.lo)[31:0]; __ Umull(out_lo, temp, in1_lo, in2_lo); // out.hi <- in2.hi * in1.lo + in2.lo * in1.hi + (in1.lo * in2.lo)[63:32] - __ Add(out_hi, out_hi, Operand(temp)); - break; - } - - case Primitive::kPrimFloat: { - __ Vmul(F32, OutputSRegister(mul), InputSRegisterAt(mul, 0), InputSRegisterAt(mul, 1)); + __ Add(out_hi, out_hi, temp); break; } - case Primitive::kPrimDouble: { - __ Vmul(F64, - FromLowSToD(out.AsFpuRegisterPairLow<vixl32::SRegister>()), - FromLowSToD(first.AsFpuRegisterPairLow<vixl32::SRegister>()), - FromLowSToD(second.AsFpuRegisterPairLow<vixl32::SRegister>())); + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + __ Vmul(OutputVRegister(mul), InputVRegisterAt(mul, 0), InputVRegisterAt(mul, 1)); break; - } default: LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); } } +void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetOut(LocationFrom(r0)); + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2))); +} + +void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) { + InvokeRuntimeCallingConventionARMVIXL calling_convention; + __ Mov(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); + // Note: if heap poisoning is enabled, the entry point takes cares + // of poisoning the reference. + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); +} + +void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + if (instruction->IsStringAlloc()) { + locations->AddTemp(LocationFrom(kMethodRegister)); + } else { + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + } + locations->SetOut(LocationFrom(r0)); +} + +void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) { + // Note: if heap poisoning is enabled, the entry point takes cares + // of poisoning the reference. + if (instruction->IsStringAlloc()) { + // String is allocated through StringFactory. Call NewEmptyString entry point. + vixl32::Register temp = RegisterFrom(instruction->GetLocations()->GetTemp(0)); + MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize); + GetAssembler()->LoadFromOffset(kLoadWord, temp, tr, QUICK_ENTRY_POINT(pNewEmptyString)); + GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, code_offset.Int32Value()); + AssemblerAccurateScope aas(GetVIXLAssembler(), + kArmInstrMaxSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ blx(lr); + codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + } else { + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); + } +} + +void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); + if (location.IsStackSlot()) { + location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + } else if (location.IsDoubleStackSlot()) { + location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); + } + locations->SetOut(location); +} + +void InstructionCodeGeneratorARMVIXL::VisitParameterValue( + HParameterValue* instruction ATTRIBUTE_UNUSED) { + // Nothing to do, the parameter is already at its location. +} + +void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetOut(LocationFrom(kMethodRegister)); +} + +void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod( + HCurrentMethod* instruction ATTRIBUTE_UNUSED) { + // Nothing to do, the method is already at its location. +} + void LocationsBuilderARMVIXL::VisitNot(HNot* not_) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall); @@ -1671,10 +2152,8 @@ void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) { break; case Primitive::kPrimLong: - __ Mvn(out.AsRegisterPairLow<vixl32::Register>(), - Operand(in.AsRegisterPairLow<vixl32::Register>())); - __ Mvn(out.AsRegisterPairHigh<vixl32::Register>(), - Operand(in.AsRegisterPairHigh<vixl32::Register>())); + __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in)); + __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in)); break; default: @@ -1682,6 +2161,19 @@ void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) { } } +void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { + locations->SetInAt(i, Location::Any()); + } + locations->SetOut(Location::Any()); +} + +void InstructionCodeGeneratorARMVIXL::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; +} + void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) { // TODO (ported from quick): revisit ARM barrier kinds. DmbOptions flavor = DmbOptions::ISH; // Quiet C++ warnings. @@ -1702,12 +2194,47 @@ void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) { __ Dmb(flavor); } +void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicLoad(vixl32::Register addr, + uint32_t offset, + vixl32::Register out_lo, + vixl32::Register out_hi) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + if (offset != 0) { + vixl32::Register temp = temps.Acquire(); + __ Add(temp, addr, offset); + addr = temp; + } + __ Ldrexd(out_lo, out_hi, addr); +} + +void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr, + uint32_t offset, + vixl32::Register value_lo, + vixl32::Register value_hi, + vixl32::Register temp1, + vixl32::Register temp2, + HInstruction* instruction) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Label fail; + if (offset != 0) { + vixl32::Register temp = temps.Acquire(); + __ Add(temp, addr, offset); + addr = temp; + } + __ Bind(&fail); + // We need a load followed by store. (The address used in a STREX instruction must + // be the same as the address in the most recently executed LDREX instruction.) + __ Ldrexd(temp1, temp2, addr); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ Strexd(temp1, value_lo, value_hi, addr); + __ Cbnz(temp1, &fail); +} + void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); DCHECK(instruction->GetResultType() == Primitive::kPrimInt); - LocationSummary* locations = instruction->GetLocations(); - Location second = locations->InAt(1); + Location second = instruction->GetLocations()->InAt(1); DCHECK(second.IsConstant()); vixl32::Register out = OutputRegister(instruction); @@ -1736,7 +2263,7 @@ void InstructionCodeGeneratorARMVIXL::DivRemByPowerOfTwo(HBinaryOperation* instr vixl32::Register out = OutputRegister(instruction); vixl32::Register dividend = InputRegisterAt(instruction, 0); - vixl32::Register temp = locations->GetTemp(0).AsRegister<vixl32::Register>(); + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); int ctz_imm = CTZ(abs_imm); @@ -1747,16 +2274,16 @@ void InstructionCodeGeneratorARMVIXL::DivRemByPowerOfTwo(HBinaryOperation* instr __ Asr(temp, dividend, 31); __ Lsr(temp, temp, 32 - ctz_imm); } - __ Add(out, temp, Operand(dividend)); + __ Add(out, temp, dividend); if (instruction->IsDiv()) { __ Asr(out, out, ctz_imm); if (imm < 0) { - __ Rsb(out, out, Operand(0)); + __ Rsb(out, out, 0); } } else { __ Ubfx(out, out, 0, ctz_imm); - __ Sub(out, out, Operand(temp)); + __ Sub(out, out, temp); } } @@ -1770,8 +2297,8 @@ void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOpera vixl32::Register out = OutputRegister(instruction); vixl32::Register dividend = InputRegisterAt(instruction, 0); - vixl32::Register temp1 = locations->GetTemp(0).AsRegister<vixl32::Register>(); - vixl32::Register temp2 = locations->GetTemp(1).AsRegister<vixl32::Register>(); + vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0)); + vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1)); int64_t imm = second.GetConstant()->AsIntConstant()->GetValue(); int64_t magic; @@ -1782,9 +2309,9 @@ void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOpera __ Smull(temp2, temp1, dividend, temp1); if (imm > 0 && magic < 0) { - __ Add(temp1, temp1, Operand(dividend)); + __ Add(temp1, temp1, dividend); } else if (imm < 0 && magic > 0) { - __ Sub(temp1, temp1, Operand(dividend)); + __ Sub(temp1, temp1, dividend); } if (shift != 0) { @@ -1806,8 +2333,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateDivRemConstantIntegral( DCHECK(instruction->IsDiv() || instruction->IsRem()); DCHECK(instruction->GetResultType() == Primitive::kPrimInt); - LocationSummary* locations = instruction->GetLocations(); - Location second = locations->InAt(1); + Location second = instruction->GetLocations()->InAt(1); DCHECK(second.IsConstant()); int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); @@ -1880,14 +2406,11 @@ void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) { } void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) { - LocationSummary* locations = div->GetLocations(); - Location out = locations->Out(); - Location first = locations->InAt(0); - Location second = locations->InAt(1); + Location rhs = div->GetLocations()->InAt(1); switch (div->GetResultType()) { case Primitive::kPrimInt: { - if (second.IsConstant()) { + if (rhs.IsConstant()) { GenerateDivRemConstantIntegral(div); } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1)); @@ -1902,18 +2425,10 @@ void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) { break; } - case Primitive::kPrimFloat: { - __ Vdiv(F32, OutputSRegister(div), InputSRegisterAt(div, 0), InputSRegisterAt(div, 1)); + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + __ Vdiv(OutputVRegister(div), InputVRegisterAt(div, 0), InputVRegisterAt(div, 1)); break; - } - - case Primitive::kPrimDouble: { - __ Vdiv(F64, - FromLowSToD(out.AsFpuRegisterPairLow<vixl32::SRegister>()), - FromLowSToD(first.AsFpuRegisterPairLow<vixl32::SRegister>()), - FromLowSToD(second.AsFpuRegisterPairLow<vixl32::SRegister>())); - break; - } default: LOG(FATAL) << "Unexpected div type " << div->GetResultType(); @@ -1921,6 +2436,7 @@ void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) { } void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) { + // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/ LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; @@ -1959,9 +2475,7 @@ void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instructi if (value.IsRegisterPair()) { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); - __ Orrs(temp, - value.AsRegisterPairLow<vixl32::Register>(), - Operand(value.AsRegisterPairHigh<vixl32::Register>())); + __ Orrs(temp, LowRegisterFrom(value), HighRegisterFrom(value)); __ B(eq, slow_path->GetEntryLabel()); } else { DCHECK(value.IsConstant()) << value; @@ -1976,6 +2490,412 @@ void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instructi } } +void LocationsBuilderARMVIXL::HandleFieldSet( + HInstruction* instruction, const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + + Primitive::Type field_type = field_info.GetFieldType(); + if (Primitive::IsFloatingPointType(field_type)) { + locations->SetInAt(1, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } + + bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble; + bool generate_volatile = field_info.IsVolatile() + && is_wide + && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); + // Temporary registers for the write barrier. + // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark. + if (needs_write_barrier) { + locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. + locations->AddTemp(Location::RequiresRegister()); + } else if (generate_volatile) { + // ARM encoding have some additional constraints for ldrexd/strexd: + // - registers need to be consecutive + // - the first register should be even but not R14. + // We don't test for ARM yet, and the assertion makes sure that we + // revisit this if we ever enable ARM encoding. + DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); + + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + if (field_type == Primitive::kPrimDouble) { + // For doubles we need two more registers to copy the value. + locations->AddTemp(LocationFrom(r2)); + locations->AddTemp(LocationFrom(r3)); + } + } +} + +void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + bool value_can_be_null) { + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); + + LocationSummary* locations = instruction->GetLocations(); + vixl32::Register base = InputRegisterAt(instruction, 0); + Location value = locations->InAt(1); + + bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); + + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } + + switch (field_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: { + GetAssembler()->StoreToOffset(kStoreByte, RegisterFrom(value), base, offset); + break; + } + + case Primitive::kPrimShort: + case Primitive::kPrimChar: { + GetAssembler()->StoreToOffset(kStoreHalfword, RegisterFrom(value), base, offset); + break; + } + + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + if (kPoisonHeapReferences && needs_write_barrier) { + // Note that in the case where `value` is a null reference, + // we do not enter this block, as a null reference does not + // need poisoning. + DCHECK_EQ(field_type, Primitive::kPrimNot); + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); + __ Mov(temp, RegisterFrom(value)); + GetAssembler()->PoisonHeapReference(temp); + GetAssembler()->StoreToOffset(kStoreWord, temp, base, offset); + } else { + GetAssembler()->StoreToOffset(kStoreWord, RegisterFrom(value), base, offset); + } + break; + } + + case Primitive::kPrimLong: { + if (is_volatile && !atomic_ldrd_strd) { + GenerateWideAtomicStore(base, + offset, + LowRegisterFrom(value), + HighRegisterFrom(value), + RegisterFrom(locations->GetTemp(0)), + RegisterFrom(locations->GetTemp(1)), + instruction); + } else { + GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + break; + } + + case Primitive::kPrimFloat: { + GetAssembler()->StoreSToOffset(SRegisterFrom(value), base, offset); + break; + } + + case Primitive::kPrimDouble: { + vixl32::DRegister value_reg = FromLowSToD(LowSRegisterFrom(value)); + if (is_volatile && !atomic_ldrd_strd) { + vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0)); + vixl32::Register value_reg_hi = RegisterFrom(locations->GetTemp(1)); + + __ Vmov(value_reg_lo, value_reg_hi, value_reg); + + GenerateWideAtomicStore(base, + offset, + value_reg_lo, + value_reg_hi, + RegisterFrom(locations->GetTemp(2)), + RegisterFrom(locations->GetTemp(3)), + instruction); + } else { + GetAssembler()->StoreDToOffset(value_reg, base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + break; + } + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << field_type; + UNREACHABLE(); + } + + // Longs and doubles are handled in the switch. + if (field_type != Primitive::kPrimLong && field_type != Primitive::kPrimDouble) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); + vixl32::Register card = RegisterFrom(locations->GetTemp(1)); + codegen_->MarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null); + } + + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } +} + +void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (field_info.GetFieldType() == Primitive::kPrimNot); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_field_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } + locations->SetInAt(0, Location::RequiresRegister()); + + bool volatile_for_double = field_info.IsVolatile() + && (field_info.GetFieldType() == Primitive::kPrimDouble) + && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); + // The output overlaps in case of volatile long: we don't want the + // code generated by GenerateWideAtomicLoad to overwrite the + // object's location. Likewise, in the case of an object field get + // with read barriers enabled, we do not want the load to overwrite + // the object's location, as we need it to emit the read barrier. + bool overlap = (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) || + object_field_get_with_read_barrier; + + if (Primitive::IsFloatingPointType(instruction->GetType())) { + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetOut(Location::RequiresRegister(), + (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap)); + } + if (volatile_for_double) { + // ARM encoding have some additional constraints for ldrexd/strexd: + // - registers need to be consecutive + // - the first register should be even but not R14. + // We don't test for ARM yet, and the assertion makes sure that we + // revisit this if we ever enable ARM encoding. + DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + LocationSummary* locations = instruction->GetLocations(); + vixl32::Register base = InputRegisterAt(instruction, 0); + Location out = locations->Out(); + bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); + Primitive::Type field_type = field_info.GetFieldType(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + + switch (field_type) { + case Primitive::kPrimBoolean: + GetAssembler()->LoadFromOffset(kLoadUnsignedByte, RegisterFrom(out), base, offset); + break; + + case Primitive::kPrimByte: + GetAssembler()->LoadFromOffset(kLoadSignedByte, RegisterFrom(out), base, offset); + break; + + case Primitive::kPrimShort: + GetAssembler()->LoadFromOffset(kLoadSignedHalfword, RegisterFrom(out), base, offset); + break; + + case Primitive::kPrimChar: + GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, RegisterFrom(out), base, offset); + break; + + case Primitive::kPrimInt: + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset); + break; + + case Primitive::kPrimNot: { + // /* HeapReference<Object> */ out = *(base + offset) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + TODO_VIXL32(FATAL); + } else { + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset); + // TODO(VIXL): Scope to guarantee the position immediately after the load. + codegen_->MaybeRecordImplicitNullCheck(instruction); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, locations->InAt(0), offset); + } + break; + } + + case Primitive::kPrimLong: + if (is_volatile && !atomic_ldrd_strd) { + GenerateWideAtomicLoad(base, offset, LowRegisterFrom(out), HighRegisterFrom(out)); + } else { + GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out), base, offset); + } + break; + + case Primitive::kPrimFloat: + GetAssembler()->LoadSFromOffset(SRegisterFrom(out), base, offset); + break; + + case Primitive::kPrimDouble: { + vixl32::DRegister out_dreg = FromLowSToD(LowSRegisterFrom(out)); + if (is_volatile && !atomic_ldrd_strd) { + vixl32::Register lo = RegisterFrom(locations->GetTemp(0)); + vixl32::Register hi = RegisterFrom(locations->GetTemp(1)); + GenerateWideAtomicLoad(base, offset, lo, hi); + // TODO(VIXL): Do we need to be immediately after the ldrexd instruction? If so we need a + // scope. + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ Vmov(out_dreg, lo, hi); + } else { + GetAssembler()->LoadDFromOffset(out_dreg, base, offset); + // TODO(VIXL): Scope to guarantee the position immediately after the load. + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + break; + } + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unreachable type " << field_type; + UNREACHABLE(); + } + + if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimDouble) { + // Potential implicit null checks, in the case of reference or + // double fields, are handled in the previous switch statement. + } else { + // Address cases other than reference and double that may require an implicit null check. + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + if (is_volatile) { + if (field_type == Primitive::kPrimNot) { + // Memory barriers, in the case of references, are also handled + // in the previous switch statement. + } else { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + } +} + +void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { + HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); +} + +void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) { + HandleFieldGet(instruction, instruction->GetFieldInfo()); +} + +void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) { + // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/ + LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock() + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + if (instruction->HasUses()) { + locations->SetOut(Location::SameAsFirstInput()); + } +} + +void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) { + if (CanMoveNullCheckToUser(instruction)) { + return; + } + + UseScratchRegisterScope temps(GetVIXLAssembler()); + AssemblerAccurateScope aas(GetVIXLAssembler(), + kArmInstrMaxSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ ldr(temps.Acquire(), MemOperand(InputRegisterAt(instruction, 0))); + RecordPcInfo(instruction, instruction->GetDexPc()); +} + +void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* instruction) { + NullCheckSlowPathARMVIXL* slow_path = + new (GetGraph()->GetArena()) NullCheckSlowPathARMVIXL(instruction); + AddSlowPath(slow_path); + __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel()); +} + +void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) { + codegen_->GenerateNullCheck(instruction); +} + +void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction) { + uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); + vixl32::Register obj = InputRegisterAt(instruction, 0); + vixl32::Register out = OutputRegister(instruction); + GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // TODO(VIXL): https://android-review.googlesource.com/#/c/272625/ +} + +void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, + vixl32::Register card, + vixl32::Register object, + vixl32::Register value, + bool can_be_null) { + vixl32::Label is_null; + if (can_be_null) { + __ Cbz(value, &is_null); + } + GetAssembler()->LoadFromOffset( + kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value()); + __ Lsr(temp, object, gc::accounting::CardTable::kCardShift); + __ Strb(card, MemOperand(card, temp)); + if (can_be_null) { + __ Bind(&is_null); + } +} + void LocationsBuilderARMVIXL::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { LOG(FATAL) << "Unreachable"; } @@ -1984,49 +2904,107 @@ void InstructionCodeGeneratorARMVIXL::VisitParallelMove(HParallelMove* instructi codegen_->GetMoveResolver()->EmitNativeCode(instruction); } +void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) { + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/ and related. +} + +void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) { + HBasicBlock* block = instruction->GetBlock(); + if (block->GetLoopInformation() != nullptr) { + DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); + // The back edge will generate the suspend check. + return; + } + if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { + // The goto will generate the suspend check. + return; + } + GenerateSuspendCheck(instruction, nullptr); +} + +void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction, + HBasicBlock* successor) { + SuspendCheckSlowPathARMVIXL* slow_path = + down_cast<SuspendCheckSlowPathARMVIXL*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARMVIXL(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + + UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + GetAssembler()->LoadFromOffset( + kLoadUnsignedHalfword, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value()); + if (successor == nullptr) { + __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetReturnLabel()); + } else { + __ Cbz(temp, codegen_->GetLabelOf(successor)); + __ B(slow_path->GetEntryLabel()); + } +} + ArmVIXLAssembler* ParallelMoveResolverARMVIXL::GetAssembler() const { return codegen_->GetAssembler(); } void ParallelMoveResolverARMVIXL::EmitMove(size_t index) { + UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); if (source.IsRegister()) { if (destination.IsRegister()) { - __ Mov(destination.AsRegister<vixl32::Register>(), source.AsRegister<vixl32::Register>()); + __ Mov(RegisterFrom(destination), RegisterFrom(source)); } else if (destination.IsFpuRegister()) { - __ Vmov(destination.AsFpuRegister<vixl32::SRegister>(), - source.AsRegister<vixl32::Register>()); + __ Vmov(SRegisterFrom(destination), RegisterFrom(source)); } else { DCHECK(destination.IsStackSlot()); GetAssembler()->StoreToOffset(kStoreWord, - source.AsRegister<vixl32::Register>(), + RegisterFrom(source), sp, destination.GetStackIndex()); } } else if (source.IsStackSlot()) { - TODO_VIXL32(FATAL); + if (destination.IsRegister()) { + GetAssembler()->LoadFromOffset(kLoadWord, + RegisterFrom(destination), + sp, + source.GetStackIndex()); + } else if (destination.IsFpuRegister()) { + GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex()); + } else { + DCHECK(destination.IsStackSlot()); + vixl32::Register temp = temps.Acquire(); + GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex()); + GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); + } } else if (source.IsFpuRegister()) { TODO_VIXL32(FATAL); } else if (source.IsDoubleStackSlot()) { TODO_VIXL32(FATAL); } else if (source.IsRegisterPair()) { if (destination.IsRegisterPair()) { - __ Mov(destination.AsRegisterPairLow<vixl32::Register>(), - source.AsRegisterPairLow<vixl32::Register>()); - __ Mov(destination.AsRegisterPairHigh<vixl32::Register>(), - source.AsRegisterPairHigh<vixl32::Register>()); + __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source)); + __ Mov(HighRegisterFrom(destination), HighRegisterFrom(source)); } else if (destination.IsFpuRegisterPair()) { - __ Vmov(FromLowSToD(destination.AsFpuRegisterPairLow<vixl32::SRegister>()), - source.AsRegisterPairLow<vixl32::Register>(), - source.AsRegisterPairHigh<vixl32::Register>()); + __ Vmov(FromLowSToD(LowSRegisterFrom(destination)), + LowRegisterFrom(source), + HighRegisterFrom(source)); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; DCHECK(ExpectedPairLayout(source)); GetAssembler()->StoreToOffset(kStoreWordPair, - source.AsRegisterPairLow<vixl32::Register>(), + LowRegisterFrom(source), sp, destination.GetStackIndex()); } @@ -2038,10 +3016,9 @@ void ParallelMoveResolverARMVIXL::EmitMove(size_t index) { if (constant->IsIntConstant() || constant->IsNullConstant()) { int32_t value = CodeGenerator::GetInt32ValueOf(constant); if (destination.IsRegister()) { - __ Mov(destination.AsRegister<vixl32::Register>(), value); + __ Mov(RegisterFrom(destination), value); } else { DCHECK(destination.IsStackSlot()); - UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); __ Mov(temp, value); GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); @@ -2049,11 +3026,10 @@ void ParallelMoveResolverARMVIXL::EmitMove(size_t index) { } else if (constant->IsLongConstant()) { int64_t value = constant->AsLongConstant()->GetValue(); if (destination.IsRegisterPair()) { - __ Mov(destination.AsRegisterPairLow<vixl32::Register>(), Low32Bits(value)); - __ Mov(destination.AsRegisterPairHigh<vixl32::Register>(), High32Bits(value)); + __ Mov(LowRegisterFrom(destination), Low32Bits(value)); + __ Mov(HighRegisterFrom(destination), High32Bits(value)); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; - UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); __ Mov(temp, Low32Bits(value)); GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); @@ -2066,15 +3042,14 @@ void ParallelMoveResolverARMVIXL::EmitMove(size_t index) { } else if (constant->IsDoubleConstant()) { double value = constant->AsDoubleConstant()->GetValue(); if (destination.IsFpuRegisterPair()) { - __ Vmov(F64, FromLowSToD(destination.AsFpuRegisterPairLow<vixl32::SRegister>()), value); + __ Vmov(FromLowSToD(LowSRegisterFrom(destination)), value); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; uint64_t int_value = bit_cast<uint64_t, double>(value); - UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); - GetAssembler()->LoadImmediate(temp, Low32Bits(int_value)); + __ Mov(temp, Low32Bits(int_value)); GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); - GetAssembler()->LoadImmediate(temp, High32Bits(int_value)); + __ Mov(temp, High32Bits(int_value)); GetAssembler()->StoreToOffset(kStoreWord, temp, sp, @@ -2084,27 +3059,28 @@ void ParallelMoveResolverARMVIXL::EmitMove(size_t index) { DCHECK(constant->IsFloatConstant()) << constant->DebugName(); float value = constant->AsFloatConstant()->GetValue(); if (destination.IsFpuRegister()) { - __ Vmov(F32, destination.AsFpuRegister<vixl32::SRegister>(), value); + __ Vmov(SRegisterFrom(destination), value); } else { DCHECK(destination.IsStackSlot()); - UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); - GetAssembler()->LoadImmediate(temp, bit_cast<int32_t, float>(value)); + __ Mov(temp, bit_cast<int32_t, float>(value)); GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex()); } } } } -void ParallelMoveResolverARMVIXL::Exchange(Register reg, int mem) { +void ParallelMoveResolverARMVIXL::Exchange(Register reg ATTRIBUTE_UNUSED, + int mem ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); } -void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) { +void ParallelMoveResolverARMVIXL::Exchange(int mem1 ATTRIBUTE_UNUSED, + int mem2 ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); } -void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) { +void ParallelMoveResolverARMVIXL::EmitSwap(size_t index ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); } @@ -2116,9 +3092,274 @@ void ParallelMoveResolverARMVIXL::RestoreScratch(int reg ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); } +void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { + if (cls->NeedsAccessCheck()) { + InvokeRuntimeCallingConventionARMVIXL calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + LocationFrom(calling_convention.GetRegisterAt(0)), + LocationFrom(r0), + /* code_generator_supports_read_barrier */ true); + return; + } -// TODO: Remove when codegen complete. -#pragma GCC diagnostic pop + // TODO(VIXL): read barrier code. + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); + if (load_kind == HLoadClass::LoadKind::kReferrersClass || + load_kind == HLoadClass::LoadKind::kDexCacheViaMethod || + load_kind == HLoadClass::LoadKind::kDexCachePcRelative) { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) { + LocationSummary* locations = cls->GetLocations(); + if (cls->NeedsAccessCheck()) { + codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex()); + codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc()); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); + return; + } + + Location out_loc = locations->Out(); + vixl32::Register out = OutputRegister(cls); + + // TODO(VIXL): read barrier code. + bool generate_null_check = false; + switch (cls->GetLoadKind()) { + case HLoadClass::LoadKind::kReferrersClass: { + DCHECK(!cls->CanCallRuntime()); + DCHECK(!cls->MustGenerateClinitCheck()); + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + vixl32::Register current_method = InputRegisterAt(cls, 0); + GenerateGcRootFieldLoad(cls, + out_loc, + current_method, + ArtMethod::DeclaringClassOffset().Int32Value()); + break; + } + case HLoadClass::LoadKind::kDexCacheViaMethod: { + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + vixl32::Register current_method = InputRegisterAt(cls, 0); + const int32_t resolved_types_offset = + ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value(); + GetAssembler()->LoadFromOffset(kLoadWord, out, current_method, resolved_types_offset); + // /* GcRoot<mirror::Class> */ out = out[type_index] + size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + GenerateGcRootFieldLoad(cls, out_loc, out, offset); + generate_null_check = !cls->IsInDexCache(); + break; + } + default: + TODO_VIXL32(FATAL); + } + + if (generate_null_check || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + LoadClassSlowPathARMVIXL* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARMVIXL( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (generate_null_check) { + __ Cbz(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( + HInstruction* instruction ATTRIBUTE_UNUSED, + Location root, + vixl32::Register obj, + uint32_t offset, + bool requires_read_barrier) { + vixl32::Register root_reg = RegisterFrom(root); + if (requires_read_barrier) { + TODO_VIXL32(FATAL); + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. + } +} + +vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter( + HInvokeStaticOrDirect* invoke, vixl32::Register temp) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); + if (!invoke->GetLocations()->Intrinsified()) { + return RegisterFrom(location); + } + // For intrinsics we allow any location, so it may be on the stack. + if (!location.IsRegister()) { + GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, location.GetStackIndex()); + return temp; + } + // For register locations, check if the register was saved. If so, get it from the stack. + // Note: There is a chance that the register was saved but not overwritten, so we could + // save one load. However, since this is just an intrinsic slow path we prefer this + // simple and more robust approach rather that trying to determine if that's the case. + SlowPathCode* slow_path = GetCurrentSlowPath(); + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(RegisterFrom(location).GetCode())) { + int stack_offset = slow_path->GetStackOffsetOfCoreRegister(RegisterFrom(location).GetCode()); + GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, stack_offset); + return temp; + } + return RegisterFrom(location); +} + +void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp) { + Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. + vixl32::Register temp_reg = RegisterFrom(temp); + + switch (invoke->GetMethodLoadKind()) { + case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { + uint32_t offset = + GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); + // temp = thread->string_init_entrypoint + GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, tr, offset); + break; + } + case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); + vixl32::Register method_reg; + if (current_method.IsRegister()) { + method_reg = RegisterFrom(current_method); + } else { + TODO_VIXL32(FATAL); + } + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; + GetAssembler()->LoadFromOffset( + kLoadWord, + temp_reg, + method_reg, + ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value()); + // temp = temp[index_in_cache]; + // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. + uint32_t index_in_cache = invoke->GetDexMethodIndex(); + GetAssembler()->LoadFromOffset( + kLoadWord, temp_reg, temp_reg, CodeGenerator::GetCachePointerOffset(index_in_cache)); + break; + } + default: + TODO_VIXL32(FATAL); + } + + // TODO(VIXL): Support `CodePtrLocation` values other than `kCallArtMethod`. + if (invoke->GetCodePtrLocation() != HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod) { + TODO_VIXL32(FATAL); + } + + // LR = callee_method->entry_point_from_quick_compiled_code_ + GetAssembler()->LoadFromOffset( + kLoadWord, + lr, + RegisterFrom(callee_method), + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); + // LR() + __ Blx(lr); + + DCHECK(!IsLeafMethod()); +} + +void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { + vixl32::Register temp = RegisterFrom(temp_location); + uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( + invoke->GetVTableIndex(), kArmPointerSize).Uint32Value(); + + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConventionARMVIXL calling_convention; + vixl32::Register receiver = calling_convention.GetRegisterAt(0); + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + // /* HeapReference<Class> */ temp = receiver->klass_ + GetAssembler()->LoadFromOffset(kLoadWord, temp, receiver, class_offset); + MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). + GetAssembler()->MaybeUnpoisonHeapReference(temp); + + // temp = temp->GetMethodAt(method_offset); + uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArmPointerSize).Int32Value(); + GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset); + // LR = temp->GetEntryPoint(); + GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point); + // LR(); + __ Blx(lr); +} + +static int32_t GetExceptionTlsOffset() { + return Thread::ExceptionOffset<kArmPointerSize>().Int32Value(); +} + +void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) { + vixl32::Register out = OutputRegister(load); + GetAssembler()->LoadFromOffset(kLoadWord, out, tr, GetExceptionTlsOffset()); +} + +void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) { + new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); +} + +void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + __ Mov(temp, 0); + GetAssembler()->StoreToOffset(kStoreWord, temp, tr, GetExceptionTlsOffset()); +} + +void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); +} + +void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) { + codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); +} + +void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction ATTRIBUTE_UNUSED, + Location out, + Location ref ATTRIBUTE_UNUSED, + Location obj ATTRIBUTE_UNUSED, + uint32_t offset ATTRIBUTE_UNUSED, + Location index ATTRIBUTE_UNUSED) { + if (kEmitCompilerReadBarrier) { + DCHECK(!kUseBakerReadBarrier); + TODO_VIXL32(FATAL); + } else if (kPoisonHeapReferences) { + GetAssembler()->UnpoisonHeapReference(RegisterFrom(out)); + } +} #undef __ #undef QUICK_ENTRY_POINT diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 39913465c0..c749f8620a 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -29,7 +29,7 @@ #pragma GCC diagnostic pop // True if VIXL32 should be used for codegen on ARM. -#ifdef USE_VIXL_ARM_BACKEND +#ifdef ART_USE_VIXL_ARM_BACKEND static constexpr bool kArmUseVIXL32 = true; #else static constexpr bool kArmUseVIXL32 = false; @@ -38,20 +38,79 @@ static constexpr bool kArmUseVIXL32 = false; namespace art { namespace arm { +static const vixl::aarch32::Register kParameterCoreRegistersVIXL[] = { + vixl::aarch32::r1, + vixl::aarch32::r2, + vixl::aarch32::r3 +}; +static const size_t kParameterCoreRegistersLengthVIXL = arraysize(kParameterCoreRegisters); +static const vixl::aarch32::SRegister kParameterFpuRegistersVIXL[] = { + vixl::aarch32::s0, + vixl::aarch32::s1, + vixl::aarch32::s2, + vixl::aarch32::s3, + vixl::aarch32::s4, + vixl::aarch32::s5, + vixl::aarch32::s6, + vixl::aarch32::s7, + vixl::aarch32::s8, + vixl::aarch32::s9, + vixl::aarch32::s10, + vixl::aarch32::s11, + vixl::aarch32::s12, + vixl::aarch32::s13, + vixl::aarch32::s14, + vixl::aarch32::s15 +}; +static const size_t kParameterFpuRegistersLengthVIXL = arraysize(kParameterFpuRegisters); + static const vixl::aarch32::Register kMethodRegister = vixl::aarch32::r0; + static const vixl::aarch32::Register kCoreAlwaysSpillRegister = vixl::aarch32::r5; -static const vixl::aarch32::RegisterList kCoreCalleeSaves = vixl::aarch32::RegisterList( - (1 << R5) | (1 << R6) | (1 << R7) | (1 << R8) | (1 << R10) | (1 << R11) | (1 << LR)); -// Callee saves s16 to s31 inc. + +// Callee saves core registers r5, r6, r7, r8, r10, r11, and lr. +static const vixl::aarch32::RegisterList kCoreCalleeSaves = vixl::aarch32::RegisterList::Union( + vixl::aarch32::RegisterList(vixl::aarch32::r5, + vixl::aarch32::r6, + vixl::aarch32::r7, + vixl::aarch32::r8), + vixl::aarch32::RegisterList(vixl::aarch32::r10, + vixl::aarch32::r11, + vixl::aarch32::lr)); + +// Callee saves FP registers s16 to s31 inclusive. static const vixl::aarch32::SRegisterList kFpuCalleeSaves = vixl::aarch32::SRegisterList(vixl::aarch32::s16, 16); +static const vixl::aarch32::Register kRuntimeParameterCoreRegistersVIXL[] = { + vixl::aarch32::r0, + vixl::aarch32::r1, + vixl::aarch32::r2, + vixl::aarch32::r3 +}; +static const size_t kRuntimeParameterCoreRegistersLengthVIXL = + arraysize(kRuntimeParameterCoreRegisters); +static const vixl::aarch32::SRegister kRuntimeParameterFpuRegistersVIXL[] = { + vixl::aarch32::s0, + vixl::aarch32::s1, + vixl::aarch32::s2, + vixl::aarch32::s3 +}; +static const size_t kRuntimeParameterFpuRegistersLengthVIXL = + arraysize(kRuntimeParameterFpuRegisters); + +class LoadClassSlowPathARMVIXL; + #define FOR_EACH_IMPLEMENTED_INSTRUCTION(M) \ M(Above) \ M(AboveOrEqual) \ M(Add) \ + M(ArrayLength) \ M(Below) \ M(BelowOrEqual) \ + M(ClearException) \ + M(ClinitCheck) \ + M(CurrentMethod) \ M(Div) \ M(DivZeroCheck) \ M(Equal) \ @@ -60,69 +119,65 @@ static const vixl::aarch32::SRegisterList kFpuCalleeSaves = M(GreaterThan) \ M(GreaterThanOrEqual) \ M(If) \ + M(InstanceFieldGet) \ + M(InstanceFieldSet) \ M(IntConstant) \ + M(InvokeStaticOrDirect) \ + M(InvokeVirtual) \ M(LessThan) \ M(LessThanOrEqual) \ + M(LoadClass) \ + M(LoadException) \ + M(LoadString) \ M(LongConstant) \ M(MemoryBarrier) \ M(Mul) \ + M(NewArray) \ + M(NewInstance) \ M(Not) \ M(NotEqual) \ + M(NullCheck) \ + M(NullConstant) \ M(ParallelMove) \ + M(ParameterValue) \ + M(Phi) \ M(Return) \ M(ReturnVoid) \ + M(Select) \ + M(StaticFieldGet) \ M(Sub) \ + M(SuspendCheck) \ + M(Throw) \ + M(TryBoundary) \ M(TypeConversion) \ // TODO: Remove once the VIXL32 backend is implemented completely. #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M) \ M(And) \ M(ArrayGet) \ - M(ArrayLength) \ M(ArraySet) \ M(BooleanNot) \ M(BoundsCheck) \ M(BoundType) \ M(CheckCast) \ M(ClassTableGet) \ - M(ClearException) \ - M(ClinitCheck) \ M(Compare) \ - M(CurrentMethod) \ M(Deoptimize) \ M(DoubleConstant) \ M(FloatConstant) \ - M(InstanceFieldGet) \ - M(InstanceFieldSet) \ M(InstanceOf) \ M(InvokeInterface) \ - M(InvokeStaticOrDirect) \ M(InvokeUnresolved) \ - M(InvokeVirtual) \ - M(LoadClass) \ - M(LoadException) \ - M(LoadString) \ M(MonitorOperation) \ M(NativeDebugInfo) \ M(Neg) \ - M(NewArray) \ - M(NewInstance) \ - M(NullCheck) \ - M(NullConstant) \ M(Or) \ M(PackedSwitch) \ - M(ParameterValue) \ - M(Phi) \ M(Rem) \ M(Ror) \ - M(Select) \ M(Shl) \ M(Shr) \ - M(StaticFieldGet) \ M(StaticFieldSet) \ - M(SuspendCheck) \ - M(Throw) \ - M(TryBoundary) \ M(UnresolvedInstanceFieldGet) \ M(UnresolvedInstanceFieldSet) \ M(UnresolvedStaticFieldGet) \ @@ -132,6 +187,34 @@ static const vixl::aarch32::SRegisterList kFpuCalleeSaves = class CodeGeneratorARMVIXL; +class InvokeRuntimeCallingConventionARMVIXL + : public CallingConvention<vixl::aarch32::Register, vixl::aarch32::SRegister> { + public: + InvokeRuntimeCallingConventionARMVIXL() + : CallingConvention(kRuntimeParameterCoreRegistersVIXL, + kRuntimeParameterCoreRegistersLengthVIXL, + kRuntimeParameterFpuRegistersVIXL, + kRuntimeParameterFpuRegistersLengthVIXL, + kArmPointerSize) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConventionARMVIXL); +}; + +class InvokeDexCallingConventionARMVIXL + : public CallingConvention<vixl::aarch32::Register, vixl::aarch32::SRegister> { + public: + InvokeDexCallingConventionARMVIXL() + : CallingConvention(kParameterCoreRegistersVIXL, + kParameterCoreRegistersLengthVIXL, + kParameterFpuRegistersVIXL, + kParameterFpuRegistersLengthVIXL, + kArmPointerSize) {} + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionARMVIXL); +}; + class SlowPathCodeARMVIXL : public SlowPathCode { public: explicit SlowPathCodeARMVIXL(HInstruction* instruction) @@ -192,7 +275,10 @@ class LocationsBuilderARMVIXL : public HGraphVisitor { LOG(FATAL) << "Unimplemented Instruction: " << instruction->DebugName(); } + void HandleInvoke(HInvoke* invoke); void HandleCondition(HCondition* condition); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); CodeGeneratorARMVIXL* const codegen_; InvokeDexCallingConventionVisitorARM parameter_visitor_; @@ -216,9 +302,42 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { LOG(FATAL) << "Unimplemented Instruction: " << instruction->DebugName(); } + // Generate code for the given suspend check. If not null, `successor` + // is the block to branch to if the suspend check is not needed, and after + // the suspend call. void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); + void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path, + vixl32::Register class_reg); void HandleGoto(HInstruction* got, HBasicBlock* successor); void HandleCondition(HCondition* condition); + + void GenerateWideAtomicStore(vixl::aarch32::Register addr, + uint32_t offset, + vixl::aarch32::Register value_lo, + vixl::aarch32::Register value_hi, + vixl::aarch32::Register temp1, + vixl::aarch32::Register temp2, + HInstruction* instruction); + void GenerateWideAtomicLoad(vixl::aarch32::Register addr, + uint32_t offset, + vixl::aarch32::Register out_lo, + vixl::aarch32::Register out_hi); + + void HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + bool value_can_be_null); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers if `requires_read_barrier` is true. + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + vixl::aarch32::Register obj, + uint32_t offset, + bool requires_read_barrier = kEmitCompilerReadBarrier); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, vixl::aarch32::Label* true_target, @@ -259,7 +378,14 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void GenerateFrameEntry() OVERRIDE; void GenerateFrameExit() OVERRIDE; + void Bind(HBasicBlock* block) OVERRIDE; + + vixl::aarch32::Label* GetLabelOf(HBasicBlock* block) { + block = FirstNonEmptyBlock(block); + return &(block_labels_[block->GetBlockId()]); + } + void MoveConstant(Location destination, int32_t value) OVERRIDE; void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; @@ -274,12 +400,16 @@ class CodeGeneratorARMVIXL : public CodeGenerator { size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return vixl::aarch32::kRegSizeInBytes; } + uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { + vixl::aarch32::Label* block_entry_label = GetLabelOf(block); + DCHECK(block_entry_label->IsBound()); + return block_entry_label->GetLocation(); + } + HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; } - uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE; - void GenerateMemoryBarrier(MemBarrierKind kind); void Finalize(CodeAllocator* allocator) OVERRIDE; void SetupBlockedRegisters() const OVERRIDE; @@ -289,6 +419,9 @@ class CodeGeneratorARMVIXL : public CodeGenerator { InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kThumb2; } + // Helper method to move a 32-bit value between two locations. + void Move32(Location destination, Location source); + const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; } vixl::aarch32::Label* GetFrameEntryLabel() { return &frame_entry_label_; } @@ -346,6 +479,23 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void GenerateInvokeRuntime(int32_t entry_point_offset); + // Emit a write barrier. + void MarkGCCard(vixl::aarch32::Register temp, + vixl::aarch32::Register card, + vixl::aarch32::Register object, + vixl::aarch32::Register value, + bool can_be_null); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind GetSupportedLoadStringKind( @@ -369,12 +519,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void GenerateNop() OVERRIDE; - vixl::aarch32::Label* GetLabelOf(HBasicBlock* block) { - block = FirstNonEmptyBlock(block); - return &(block_labels_[block->GetBlockId()]); - } - private: + vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, + vixl::aarch32::Register temp); + // Labels for each block that will be compiled. // We use a deque so that the `vixl::aarch32::Label` objects do not move in memory. ArenaDeque<vixl::aarch32::Label> block_labels_; // Indexed by block id. diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index e74e60514d..02c1c3b69f 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -6048,8 +6048,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod || - load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || + if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || load_kind == HLoadString::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -6057,6 +6056,17 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { locations->SetOut(Location::RegisterLocation(EAX)); } else { locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and/or marking to save everything. + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } } @@ -6103,6 +6113,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { // TODO: Re-add the compiler code to do string dex cache lookup again. InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex())); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5cabc8fa06..4b64c1b6ff 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -299,9 +299,9 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - InvokeRuntimeCallingConvention calling_convention; const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); - __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index)); + // Custom calling convention: RAX serves as both input and output. + __ movl(CpuRegister(RAX), Immediate(string_index)); x86_64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), @@ -5456,10 +5456,20 @@ void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { - locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RegisterLocation(RAX)); } else { locations->SetOut(Location::RequiresRegister()); + if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and/or marking to save everything. + // Custom calling convention: RAX serves as both input and output. + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(RAX)); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } } @@ -5499,9 +5509,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { } // TODO: Re-add the compiler code to do string dex cache lookup again. - InvokeRuntimeCallingConvention calling_convention; - __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), - Immediate(load->GetStringIndex())); + // Custom calling convention: RAX serves as both input and output. + __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex())); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h index 853541754d..5d92bfd9cc 100644 --- a/compiler/optimizing/common_arm.h +++ b/compiler/optimizing/common_arm.h @@ -42,6 +42,26 @@ inline vixl::aarch32::DRegister FromLowSToD(vixl::aarch32::SRegister reg) { return vixl::aarch32::DRegister(reg.GetCode() / 2); } +inline vixl::aarch32::Register HighRegisterFrom(Location location) { + DCHECK(location.IsRegisterPair()) << location; + return vixl::aarch32::Register(location.AsRegisterPairHigh<vixl32::Register>()); +} + +inline vixl::aarch32::DRegister HighDRegisterFrom(Location location) { + DCHECK(location.IsFpuRegisterPair()) << location; + return vixl::aarch32::DRegister(location.AsFpuRegisterPairHigh<vixl32::DRegister>()); +} + +inline vixl::aarch32::Register LowRegisterFrom(Location location) { + DCHECK(location.IsRegisterPair()) << location; + return vixl::aarch32::Register(location.AsRegisterPairLow<vixl32::Register>()); +} + +inline vixl::aarch32::SRegister LowSRegisterFrom(Location location) { + DCHECK(location.IsFpuRegisterPair()) << location; + return vixl::aarch32::SRegister(location.AsFpuRegisterPairLow<vixl32::SRegister>()); +} + inline vixl::aarch32::Register RegisterFrom(Location location) { DCHECK(location.IsRegister()) << location; return vixl::aarch32::Register(location.reg()); @@ -53,8 +73,10 @@ inline vixl::aarch32::Register RegisterFrom(Location location, Primitive::Type t } inline vixl::aarch32::DRegister DRegisterFrom(Location location) { - DCHECK(location.IsFpuRegister()) << location; - return vixl::aarch32::DRegister(location.reg()); + DCHECK(location.IsFpuRegisterPair()) << location; + int reg_code = location.low(); + DCHECK_EQ(reg_code % 2, 0) << reg_code; + return vixl::aarch32::DRegister(reg_code / 2); } inline vixl::aarch32::SRegister SRegisterFrom(Location location) { @@ -74,6 +96,15 @@ inline vixl::aarch32::DRegister OutputDRegister(HInstruction* instr) { return DRegisterFrom(instr->GetLocations()->Out()); } +inline vixl::aarch32::VRegister OutputVRegister(HInstruction* instr) { + Primitive::Type type = instr->GetType(); + if (type == Primitive::kPrimFloat) { + return OutputSRegister(instr); + } else { + return OutputDRegister(instr); + } +} + inline vixl::aarch32::SRegister InputSRegisterAt(HInstruction* instr, int input_index) { Primitive::Type type = instr->InputAt(input_index)->GetType(); DCHECK_EQ(type, Primitive::kPrimFloat) << type; @@ -86,6 +117,15 @@ inline vixl::aarch32::DRegister InputDRegisterAt(HInstruction* instr, int input_ return DRegisterFrom(instr->GetLocations()->InAt(input_index)); } +inline vixl::aarch32::VRegister InputVRegisterAt(HInstruction* instr, int input_index) { + Primitive::Type type = instr->InputAt(input_index)->GetType(); + if (type == Primitive::kPrimFloat) { + return InputSRegisterAt(instr, input_index); + } else { + return InputDRegisterAt(instr, input_index); + } +} + inline vixl::aarch32::Register OutputRegister(HInstruction* instr) { return RegisterFrom(instr->GetLocations()->Out(), instr->GetType()); } @@ -120,6 +160,24 @@ inline vixl::aarch32::Operand InputOperandAt(HInstruction* instr, int input_inde instr->InputAt(input_index)->GetType()); } +inline Location LocationFrom(const vixl::aarch32::Register& reg) { + return Location::RegisterLocation(reg.GetCode()); +} + +inline Location LocationFrom(const vixl::aarch32::SRegister& reg) { + return Location::FpuRegisterLocation(reg.GetCode()); +} + +inline Location LocationFrom(const vixl::aarch32::Register& low, + const vixl::aarch32::Register& high) { + return Location::RegisterPairLocation(low.GetCode(), high.GetCode()); +} + +inline Location LocationFrom(const vixl::aarch32::SRegister& low, + const vixl::aarch32::SRegister& high) { + return Location::FpuRegisterPairLocation(low.GetCode(), high.GetCode()); +} + } // namespace helpers } // namespace arm } // namespace art diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index a1e923bd73..bacf9940ca 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -158,7 +158,8 @@ class OptimizingCFITest : public CFITest { TestImpl(isa, #isa, expected_asm, expected_cfi); \ } -#ifdef ART_ENABLE_CODEGEN_arm +// TODO(VIXL): Support this test for the VIXL backend. +#if defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_USE_VIXL_ARM_BACKEND) TEST_ISA(kThumb2) #endif #ifdef ART_ENABLE_CODEGEN_arm64 @@ -177,7 +178,8 @@ TEST_ISA(kMips) TEST_ISA(kMips64) #endif -#ifdef ART_ENABLE_CODEGEN_arm +// TODO(VIXL): Support this test for the VIXL backend. +#if defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_USE_VIXL_ARM_BACKEND) TEST_F(OptimizingCFITest, kThumb2Adjust) { std::vector<uint8_t> expected_asm( expected_asm_kThumb2_adjust, diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 6ba0963720..7972387536 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -602,8 +602,13 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, UNUSED(codegen); // To avoid compilation error when compiling for svelte OptimizingCompilerStats* stats = compilation_stats_.get(); ArenaAllocator* arena = graph->GetArena(); +#ifdef ART_USE_VIXL_ARM_BACKEND + UNUSED(arena); + UNUSED(pass_observer); + UNUSED(stats); +#endif switch (instruction_set) { -#ifdef ART_ENABLE_CODEGEN_arm +#if defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_USE_VIXL_ARM_BACKEND) case kThumb2: case kArm: { arm::DexCacheArrayFixups* fixups = diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc index 3c5973ebe6..8045bd2c6a 100644 --- a/compiler/utils/arm/assembler_arm_vixl.cc +++ b/compiler/utils/arm/assembler_arm_vixl.cc @@ -346,6 +346,51 @@ void ArmVIXLAssembler::LoadDFromOffset(vixl32::DRegister reg, ___ Vldr(reg, MemOperand(base, offset)); } +// Prefer Str to Add/Stm in ArmVIXLAssembler::StoreRegisterList and +// ArmVIXLAssembler::LoadRegisterList where this generates less code (size). +static constexpr int kRegListThreshold = 4; + +void ArmVIXLAssembler::StoreRegisterList(RegList regs, size_t stack_offset) { + int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs)); + if (number_of_regs != 0) { + if (number_of_regs > kRegListThreshold) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register base = sp; + if (stack_offset != 0) { + base = temps.Acquire(); + DCHECK_EQ(regs & (1u << base.GetCode()), 0u); + ___ Add(base, sp, stack_offset); + } + ___ Stm(base, NO_WRITE_BACK, RegisterList(regs)); + } else { + for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) { + ___ Str(vixl32::Register(i), MemOperand(sp, stack_offset)); + stack_offset += kRegSizeInBytes; + } + } + } +} + +void ArmVIXLAssembler::LoadRegisterList(RegList regs, size_t stack_offset) { + int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs)); + if (number_of_regs != 0) { + if (number_of_regs > kRegListThreshold) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register base = sp; + if (stack_offset != 0) { + base = temps.Acquire(); + ___ Add(base, sp, stack_offset); + } + ___ Ldm(base, NO_WRITE_BACK, RegisterList(regs)); + } else { + for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) { + ___ Ldr(vixl32::Register(i), MemOperand(sp, stack_offset)); + stack_offset += kRegSizeInBytes; + } + } + } +} + void ArmVIXLAssembler::AddConstant(vixl32::Register rd, int32_t value) { AddConstant(rd, rd, value); } diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h index c8f3a9b863..c5575faaa1 100644 --- a/compiler/utils/arm/assembler_arm_vixl.h +++ b/compiler/utils/arm/assembler_arm_vixl.h @@ -90,6 +90,9 @@ class ArmVIXLAssembler FINAL : public Assembler { void LoadSFromOffset(vixl32::SRegister reg, vixl32::Register base, int32_t offset); void LoadDFromOffset(vixl32::DRegister reg, vixl32::Register base, int32_t offset); + void LoadRegisterList(RegList regs, size_t stack_offset); + void StoreRegisterList(RegList regs, size_t stack_offset); + bool ShifterOperandCanAlwaysHold(uint32_t immediate); bool ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc); bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits, diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 86a4aa2245..10bed13dad 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -158,7 +158,7 @@ void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* } if (CompareIgnoringSpace(results[lineindex], testline) != 0) { LOG(FATAL) << "Output is not as expected at line: " << lineindex - << results[lineindex] << "/" << testline; + << results[lineindex] << "/" << testline << ", test name: " << testname; } ++lineindex; } diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 91f397087c..69e1d8f6fa 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -5544,7 +5544,7 @@ const char* const VixlJniHelpersResults[] = { " 10c: ecbd 8a10 vpop {s16-s31}\n", " 110: e8bd 8de0 ldmia.w sp!, {r5, r6, r7, r8, sl, fp, pc}\n", " 114: 4660 mov r0, ip\n", - " 116: f8d9 c2ac ldr.w ip, [r9, #684] ; 0x2ac\n", + " 116: f8d9 c2b0 ldr.w ip, [r9, #688] ; 0x2b0\n", " 11a: 47e0 blx ip\n", nullptr }; diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index cdb4c251a8..bf70c554b1 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -239,6 +239,30 @@ .cfi_adjust_cfa_offset -56 .endm +.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0 + add sp, #8 @ rewind sp + .cfi_adjust_cfa_offset -8 + vpop {d0-d15} + .cfi_adjust_cfa_offset -128 + add sp, #4 @ skip r0 + .cfi_adjust_cfa_offset -4 + .cfi_restore r0 @ debugger can no longer restore caller's r0 + pop {r1-r12, lr} @ 13 words of callee saves + .cfi_restore r1 + .cfi_restore r2 + .cfi_restore r3 + .cfi_restore r5 + .cfi_restore r6 + .cfi_restore r7 + .cfi_restore r8 + .cfi_restore r9 + .cfi_restore r10 + .cfi_restore r11 + .cfi_restore r12 + .cfi_restore lr + .cfi_adjust_cfa_offset -52 +.endm + .macro RETURN_IF_RESULT_IS_ZERO cbnz r0, 1f @ result non-zero branch over bx lr @ return @@ -252,17 +276,23 @@ .endm /* - * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending - * exception is Thread::Current()->exception_ + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_ when the runtime method frame is ready. */ -.macro DELIVER_PENDING_EXCEPTION - .fnend - .fnstart - SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 @ save callee saves for throw +.macro DELIVER_PENDING_EXCEPTION_FRAME_READY mov r0, r9 @ pass Thread::Current bl artDeliverPendingExceptionFromCode @ artDeliverPendingExceptionFromCode(Thread*) .endm + /* + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_. + */ +.macro DELIVER_PENDING_EXCEPTION + SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 @ save callee saves for throw + DELIVER_PENDING_EXCEPTION_FRAME_READY +.endm + .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name .extern \cxx_name ENTRY \c_name @@ -1078,41 +1108,71 @@ END art_quick_set64_instance */ ENTRY art_quick_resolve_string - ldr r1, [sp] @ load referrer - ldr r1, [r1, #ART_METHOD_DECLARING_CLASS_OFFSET] @ load declaring class - ldr r1, [r1, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] @ load string dex cache - ubfx r2, r0, #0, #STRING_DEX_CACHE_HASH_BITS - add r1, r1, r2, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT - ldrd r2, r3, [r1] @ load index into r3 and pointer into r2 - cmp r0, r3 + push {r10-r12, lr} + .cfi_adjust_cfa_offset 16 + .cfi_rel_offset r10, 0 + .cfi_rel_offset r11, 4 + .cfi_rel_offset ip, 8 + .cfi_rel_offset lr, 12 + ldr r10, [sp, #16] @ load referrer + ldr r10, [r10, #ART_METHOD_DECLARING_CLASS_OFFSET] @ load declaring class + ldr r10, [r10, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] @ load string dex cache + ubfx r11, r0, #0, #STRING_DEX_CACHE_HASH_BITS + add r10, r10, r11, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT + ldrd r10, r11, [r10] @ load index into r11 and pointer into r10 + cmp r0, r11 bne .Lart_quick_resolve_string_slow_path #ifdef USE_READ_BARRIER - ldr r3, [rSELF, #THREAD_IS_GC_MARKING_OFFSET] - cbnz r3, .Lart_quick_resolve_string_marking + ldr r0, [rSELF, #THREAD_IS_GC_MARKING_OFFSET] + cbnz r0, .Lart_quick_resolve_string_marking +.Lart_quick_resolve_string_no_rb: #endif - mov r0, r2 - bx lr -// Slow path case, the index did not match -.Lart_quick_resolve_string_slow_path: - SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC - mov r1, r9 @ pass Thread::Current - mov r3, sp - bl artResolveStringFromCode @ (uint32_t type_idx, Method* method, Thread*) - RESTORE_SAVE_REFS_ONLY_FRAME - RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER + mov r0, r10 + pop {r10-r12, pc} + +#ifdef USE_READ_BARRIER // GC is marking case, need to check the mark bit. .Lart_quick_resolve_string_marking: - ldr r3, [r2, MIRROR_OBJECT_LOCK_WORD_OFFSET] - tst r3, #LOCK_WORD_MARK_BIT_MASK_SHIFTED - mov r0, r2 - bne .Lart_quick_resolve_string_no_rb - push {r1, r2, r3, lr} @ Save x1, LR - .cfi_adjust_cfa_offset 16 - bl artReadBarrierMark @ Get the marked string back. - pop {r1, r2, r3, lr} @ Restore registers. + ldr r0, [r10, MIRROR_OBJECT_LOCK_WORD_OFFSET] + lsrs r0, #(LOCK_WORD_MARK_BIT_SHIFT + 1) + bcs .Lart_quick_resolve_string_no_rb + mov r0, r10 + .cfi_remember_state + pop {r10-r12, lr} .cfi_adjust_cfa_offset -16 -.Lart_quick_resolve_string_no_rb: + .cfi_restore r10 + .cfi_restore r11 + .cfi_restore r12 + .cfi_restore lr + // Note: art_quick_read_barrier_mark_reg00 clobbers IP but the .Lslow_rb_* does not. + b .Lslow_rb_art_quick_read_barrier_mark_reg00 @ Get the marked string back. + .cfi_restore_state +#endif + +// Slow path case, the index did not match +.Lart_quick_resolve_string_slow_path: + push {r0-r9} @ 10 words of callee saves and args; {r10-r12, lr} already saved. + .cfi_adjust_cfa_offset 40 + .cfi_rel_offset r0, 0 + .cfi_rel_offset r1, 4 + .cfi_rel_offset r2, 8 + .cfi_rel_offset r3, 12 + .cfi_rel_offset r4, 16 + .cfi_rel_offset r5, 20 + .cfi_rel_offset r6, 24 + .cfi_rel_offset r7, 28 + .cfi_rel_offset r8, 32 + .cfi_rel_offset r9, 36 + SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1 @ save callee saves in case of GC + mov r1, r9 @ pass Thread::Current + bl artResolveStringFromCode @ (uint32_t type_idx, Thread*) + cbz r0, 1f @ If result is null, deliver the OOME. + .cfi_remember_state + RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0 bx lr + .cfi_restore_state +1: + DELIVER_PENDING_EXCEPTION_FRAME_READY END art_quick_resolve_string // Generate the allocation entrypoints for each allocator. @@ -1920,6 +1980,8 @@ END art_quick_l2f * getting its argument and returning its result through register * `reg`, saving and restoring all caller-save registers. * + * IP is clobbered; `reg` must not be IP. + * * If `reg` is different from `r0`, the generated function follows a * non-standard runtime calling convention: * - register `reg` is used to pass the (sole) argument of this @@ -1936,36 +1998,71 @@ ENTRY \name SMART_CBZ \reg, .Lret_rb_\name // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked. ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET] - ands ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED + tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED beq .Lslow_rb_\name // Already marked, return right away. +.Lret_rb_\name: bx lr .Lslow_rb_\name: - push {r0-r5, r9, lr} @ save return address and core caller-save registers - @ also save callee save r5 for 16 byte alignment + // Save IP: the kSaveEverything entrypoint art_quick_resolve_string makes a tail call here. + push {r0-r4, r9, ip, lr} @ save return address, core caller-save registers and ip .cfi_adjust_cfa_offset 32 .cfi_rel_offset r0, 0 .cfi_rel_offset r1, 4 .cfi_rel_offset r2, 8 .cfi_rel_offset r3, 12 .cfi_rel_offset r4, 16 - .cfi_rel_offset r5, 20 - .cfi_rel_offset r9, 24 + .cfi_rel_offset r9, 20 + .cfi_rel_offset ip, 24 .cfi_rel_offset lr, 28 - vpush {s0-s15} @ save floating-point caller-save registers - .cfi_adjust_cfa_offset 64 .ifnc \reg, r0 mov r0, \reg @ pass arg1 - obj from `reg` .endif + + vpush {s0-s15} @ save floating-point caller-save registers + .cfi_adjust_cfa_offset 64 bl artReadBarrierMark @ r0 <- artReadBarrierMark(obj) - mov ip, r0 @ Save result in IP vpop {s0-s15} @ restore floating-point registers .cfi_adjust_cfa_offset -64 - pop {r0-r5, r9, lr} @ restore caller-save registers - mov \reg, ip @ copy result to reg -.Lret_rb_\name: + + .ifc \reg, r0 @ Save result to the stack slot or destination register. + str r0, [sp, #0] + .else + .ifc \reg, r1 + str r0, [sp, #4] + .else + .ifc \reg, r2 + str r0, [sp, #8] + .else + .ifc \reg, r3 + str r0, [sp, #12] + .else + .ifc \reg, r4 + str r0, [sp, #16] + .else + .ifc \reg, r9 + str r0, [sp, #20] + .else + mov \reg, r0 + .endif + .endif + .endif + .endif + .endif + .endif + + pop {r0-r4, r9, ip, lr} @ restore caller-save registers + .cfi_adjust_cfa_offset -32 + .cfi_restore r0 + .cfi_restore r1 + .cfi_restore r2 + .cfi_restore r3 + .cfi_restore r4 + .cfi_restore r9 + .cfi_restore ip + .cfi_restore lr bx lr END \name .endm diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 04a3cc6cae..483cee3100 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -337,7 +337,7 @@ SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR .endm -.macro RESTORE_SAVE_EVERYTHING_FRAME +.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0 // Restore FP registers. // For better performance, load d0 and d31 separately, so that all LDPs are 16-byte aligned. ldr d0, [sp, #8] @@ -359,7 +359,6 @@ ldr d31, [sp, #256] // Restore core registers. - RESTORE_REG x0, 264 RESTORE_TWO_REGS x1, x2, 272 RESTORE_TWO_REGS x3, x4, 288 RESTORE_TWO_REGS x5, x6, 304 @@ -379,6 +378,11 @@ DECREASE_FRAME 512 .endm +.macro RESTORE_SAVE_EVERYTHING_FRAME + RESTORE_REG x0, 264 + RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0 +.endm + .macro RETURN_IF_RESULT_IS_ZERO cbnz x0, 1f // result non-zero branch over ret // return @@ -392,11 +396,10 @@ .endm /* - * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending - * exception is Thread::Current()->exception_ + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_ when the runtime method frame is ready. */ -.macro DELIVER_PENDING_EXCEPTION - SETUP_SAVE_ALL_CALLEE_SAVES_FRAME +.macro DELIVER_PENDING_EXCEPTION_FRAME_READY mov x0, xSELF // Point of no return. @@ -404,6 +407,15 @@ brk 0 // Unreached .endm + /* + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_. + */ +.macro DELIVER_PENDING_EXCEPTION + SETUP_SAVE_ALL_CALLEE_SAVES_FRAME + DELIVER_PENDING_EXCEPTION_FRAME_READY +.endm + .macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg ldr \reg, [xSELF, # THREAD_EXCEPTION_OFFSET] // Get exception field. cbnz \reg, 1f @@ -1638,40 +1650,54 @@ END art_quick_set64_static */ ENTRY art_quick_resolve_string - ldr x1, [sp] // load referrer - ldr w2, [x1, #ART_METHOD_DECLARING_CLASS_OFFSET] // load declaring class - ldr x1, [x2, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] // load string dex cache - ubfx x2, x0, #0, #STRING_DEX_CACHE_HASH_BITS // get masked string index into x2 - ldr x2, [x1, x2, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT] // load dex cache pair into x2 - cmp x0, x2, lsr #32 // compare against upper 32 bits + SAVE_TWO_REGS_INCREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__ + ldr x29, [sp, #(2 * __SIZEOF_POINTER__)] // load referrer + ldr w29, [x29, #ART_METHOD_DECLARING_CLASS_OFFSET] // load declaring class + ldr x29, [x29, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] // load string dex cache + ubfx lr, x0, #0, #STRING_DEX_CACHE_HASH_BITS // get masked string index into LR + ldr x29, [x29, lr, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT] // load dex cache pair into x29 + cmp x0, x29, lsr #32 // compare against upper 32 bits bne .Lart_quick_resolve_string_slow_path - ubfx x0, x2, #0, #32 // extract lower 32 bits into x0 + ubfx x0, x29, #0, #32 // extract lower 32 bits into x0 #ifdef USE_READ_BARRIER // Most common case: GC is not marking. - ldr w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET] - cbnz x3, .Lart_quick_resolve_string_marking + ldr w29, [xSELF, #THREAD_IS_GC_MARKING_OFFSET] + cbnz x29, .Lart_quick_resolve_string_marking +.Lart_quick_resolve_string_no_rb: #endif + .cfi_remember_state + RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__ ret + .cfi_restore_state + .cfi_def_cfa_offset 16 // workaround for clang bug: 31975598 + +#ifdef USE_READ_BARRIER +// GC is marking case, need to check the mark bit. +.Lart_quick_resolve_string_marking: + ldr x29, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + tbnz x29, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb + .cfi_remember_state + RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__ + // Note: art_quick_read_barrier_mark_reg00 clobbers IP0 but the .Lslow_rb_* does not. + b .Lslow_rb_art_quick_read_barrier_mark_reg00 // Get the marked string back. + .cfi_restore_state + .cfi_def_cfa_offset 16 // workaround for clang bug: 31975598 +#endif // Slow path case, the index did not match. .Lart_quick_resolve_string_slow_path: - SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC + INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__) + SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR // save callee saves in case of GC mov x1, xSELF // pass Thread::Current bl artResolveStringFromCode // (int32_t string_idx, Thread* self) - RESTORE_SAVE_REFS_ONLY_FRAME - RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER - -// GC is marking case, need to check the mark bit. -.Lart_quick_resolve_string_marking: - ldr x3, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - tbnz x3, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb - // Save LR so that we can return, also x1 for alignment purposes. - SAVE_TWO_REGS_INCREASE_FRAME x1, xLR, 16 // Save x1, LR. - bl artReadBarrierMark // Get the marked string back. - RESTORE_TWO_REGS_DECREASE_FRAME x1, xLR, 16 // Restore registers. -.Lart_quick_resolve_string_no_rb: - ret - + cbz w0, 1f // If result is null, deliver the OOME. + .cfi_remember_state + RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0 + ret // return + .cfi_restore_state + .cfi_def_cfa_offset FRAME_SIZE_SAVE_EVERYTHING // workaround for clang bug: 31975598 +1: + DELIVER_PENDING_EXCEPTION_FRAME_READY END art_quick_resolve_string // Generate the allocation entrypoints for each allocator. @@ -2513,9 +2539,10 @@ ENTRY \name */ // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler. ldr wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - tbz wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_path_rb_\name + tbz wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_rb_\name +.Lret_rb_\name: ret -.Lslow_path_rb_\name: +.Lslow_rb_\name: // Save all potentially live caller-save core registers. SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 368 SAVE_TWO_REGS x2, x3, 16 @@ -2580,7 +2607,6 @@ ENTRY \name // Restore return address and remove padding. RESTORE_REG xLR, 360 DECREASE_FRAME 368 -.Lret_rb_\name: ret END \name .endm diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 7bb59efdbf..f4f9a68e30 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -224,12 +224,11 @@ END_MACRO /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveEverything) - * when EDI is already saved. + * when EDI and ESI are already saved. */ -MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED, got_reg, temp_reg) +MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED, got_reg, temp_reg) // Save core registers from highest to lowest to agree with core spills bitmap. - // EDI, or at least a placeholder for it, is already on the stack. - PUSH esi + // EDI and ESI, or at least placeholders for them, are already on the stack. PUSH ebp PUSH ebx PUSH edx @@ -268,13 +267,25 @@ END_MACRO /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveEverything) + * when EDI is already saved. + */ +MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED, got_reg, temp_reg) + // Save core registers from highest to lowest to agree with core spills bitmap. + // EDI, or at least a placeholder for it, is already on the stack. + PUSH esi + SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED RAW_VAR(got_reg), RAW_VAR(temp_reg) +END_MACRO + + /* + * Macro that sets up the callee save frame to conform with + * Runtime::CreateCalleeSaveMethod(kSaveEverything) */ MACRO2(SETUP_SAVE_EVERYTHING_FRAME, got_reg, temp_reg) PUSH edi SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED RAW_VAR(got_reg), RAW_VAR(temp_reg) END_MACRO -MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS) // Restore FPRs. Method and padding is still on the stack. movsd 16(%esp), %xmm0 movsd 24(%esp), %xmm1 @@ -284,13 +295,10 @@ MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) movsd 56(%esp), %xmm5 movsd 64(%esp), %xmm6 movsd 72(%esp), %xmm7 +END_MACRO - // Remove save everything callee save method, stack alignment padding and FPRs. - addl MACRO_LITERAL(16 + 8 * 8), %esp - CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8)) - - // Restore core registers. - POP eax +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_EAX) + // Restore core registers (except eax). POP ecx POP edx POP ebx @@ -299,12 +307,32 @@ MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) POP edi END_MACRO +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) + RESTORE_SAVE_EVERYTHING_FRAME_FRPS + + // Remove save everything callee save method, stack alignment padding and FPRs. + addl MACRO_LITERAL(16 + 8 * 8), %esp + CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8)) + + POP eax + RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_EAX +END_MACRO + +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_EAX) + RESTORE_SAVE_EVERYTHING_FRAME_FRPS + + // Remove save everything callee save method, stack alignment padding and FPRs, skip EAX. + addl MACRO_LITERAL(16 + 8 * 8 + 4), %esp + CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8 + 4)) + + RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_EAX +END_MACRO + /* - * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending - * exception is Thread::Current()->exception_. + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_ when the runtime method frame is ready. */ -MACRO0(DELIVER_PENDING_EXCEPTION) - SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save callee saves for throw +MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY) // Outgoing argument set up subl MACRO_LITERAL(12), %esp // alignment padding CFI_ADJUST_CFA_OFFSET(12) @@ -314,6 +342,15 @@ MACRO0(DELIVER_PENDING_EXCEPTION) UNREACHABLE END_MACRO + /* + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_. + */ +MACRO0(DELIVER_PENDING_EXCEPTION) + SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save callee saves for throw + DELIVER_PENDING_EXCEPTION_FRAME_READY +END_MACRO + MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name) DEFINE_FUNCTION VAR(c_name) SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context @@ -1114,26 +1151,42 @@ DEFINE_FUNCTION art_quick_alloc_object_region_tlab END_FUNCTION art_quick_alloc_object_region_tlab DEFINE_FUNCTION art_quick_resolve_string - movl 4(%esp), %ecx // get referrer - movl ART_METHOD_DECLARING_CLASS_OFFSET(%ecx), %ecx // get declaring class - movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %ecx // get string dex cache - movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %edx - andl %eax, %edx - movlps (%ecx, %edx, STRING_DEX_CACHE_ELEMENT_SIZE), %xmm0 // load string idx and ptr to xmm0 - movd %xmm0, %ecx // extract pointer + PUSH edi + PUSH esi + // Save xmm0 at an aligned address on the stack. + subl MACRO_LITERAL(12), %esp + CFI_ADJUST_CFA_OFFSET(12) + movsd %xmm0, 0(%esp) + movl 24(%esp), %edi // get referrer + movl ART_METHOD_DECLARING_CLASS_OFFSET(%edi), %edi // get declaring class + movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%edi), %edi // get string dex cache + movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %esi + andl %eax, %esi + movlps (%edi, %esi, STRING_DEX_CACHE_ELEMENT_SIZE), %xmm0 // load string idx and ptr to xmm0 + movd %xmm0, %edi // extract pointer pshufd LITERAL(0x55), %xmm0, %xmm0 // shuffle index into lowest bits - movd %xmm0, %edx // extract index - cmp %edx, %eax + movd %xmm0, %esi // extract index + // Restore xmm0 and remove it together with padding from the stack. + movsd 0(%esp), %xmm0 + addl MACRO_LITERAL(12), %esp + CFI_ADJUST_CFA_OFFSET(-12) + cmp %esi, %eax jne .Lart_quick_resolve_string_slow_path - movl %ecx, %eax + movl %edi, %eax + CFI_REMEMBER_STATE + POP esi + POP edi #ifdef USE_READ_BARRIER cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET - jne .Lart_quick_resolve_string_marking + jne .Lnot_null_art_quick_read_barrier_mark_reg00 #endif ret + CFI_RESTORE_STATE + CFI_DEF_CFA(esp, 24) // workaround for clang bug: 31975598 + .Lart_quick_resolve_string_slow_path: // Outgoing argument set up - SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx + SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED ebx, ebx subl LITERAL(8), %esp // push padding CFI_ADJUST_CFA_OFFSET(8) pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() @@ -1142,21 +1195,15 @@ DEFINE_FUNCTION art_quick_resolve_string call SYMBOL(artResolveStringFromCode) addl LITERAL(16), %esp // pop arguments CFI_ADJUST_CFA_OFFSET(-16) - RESTORE_SAVE_REFS_ONLY_FRAME - RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -.Lart_quick_resolve_string_marking: - SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx - testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax) - jnz .Lart_quick_resolve_string_no_rb - subl LITERAL(12), %esp // alignment padding - CFI_ADJUST_CFA_OFFSET(12) - PUSH eax // Pass the string as the first param. - call SYMBOL(artReadBarrierMark) - addl LITERAL(16), %esp - CFI_ADJUST_CFA_OFFSET(-16) -.Lart_quick_resolve_string_no_rb: - RESTORE_SAVE_REFS_ONLY_FRAME + testl %eax, %eax // If result is null, deliver the OOME. + jz 1f + CFI_REMEMBER_STATE + RESTORE_SAVE_EVERYTHING_FRAME_KEEP_EAX ret + CFI_RESTORE_STATE + CFI_DEF_CFA(esp, FRAME_SIZE_SAVE_EVERYTHING) // workaround for clang bug: 31975598 +1: + DELIVER_PENDING_EXCEPTION_FRAME_READY END_FUNCTION art_quick_resolve_string ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER @@ -2102,6 +2149,7 @@ MACRO2(READ_BARRIER_MARK_REG, name, reg) // Null check so that we can load the lock word. test REG_VAR(reg), REG_VAR(reg) jz .Lret_rb_\name +.Lnot_null_\name: // Check the mark bit, if it is 1 return. testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)) jz .Lslow_rb_\name diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S index af4a6c4f99..28018c5f24 100644 --- a/runtime/arch/x86_64/asm_support_x86_64.S +++ b/runtime/arch/x86_64/asm_support_x86_64.S @@ -76,6 +76,8 @@ #define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg #define CFI_RESTORE(reg) .cfi_restore reg #define CFI_REL_OFFSET(reg,size) .cfi_rel_offset reg,size + #define CFI_RESTORE_STATE .cfi_restore_state + #define CFI_REMEMBER_STATE .cfi_remember_state #else // Mac OS' doesn't like cfi_* directives. #define CFI_STARTPROC @@ -85,6 +87,8 @@ #define CFI_DEF_CFA_REGISTER(reg) #define CFI_RESTORE(reg) #define CFI_REL_OFFSET(reg,size) + #define CFI_RESTORE_STATE + #define CFI_REMEMBER_STATE #endif // Symbols. diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index c3321e17b9..afa1c0ff03 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -263,16 +263,15 @@ END_MACRO /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveEverything) - * when R15 is already saved. + * when R14 and R15 are already saved. */ -MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED) +MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED) #if defined(__APPLE__) int3 int3 #else // Save core registers from highest to lowest to agree with core spills bitmap. - // R15, or at least a placeholder for it, is already on the stack. - PUSH r14 + // R14 and R15, or at least placeholders for them, are already on the stack. PUSH r13 PUSH r12 PUSH r11 @@ -326,13 +325,23 @@ END_MACRO /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveEverything) + * when R15 is already saved. + */ +MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED) + PUSH r14 + SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED +END_MACRO + + /* + * Macro that sets up the callee save frame to conform with + * Runtime::CreateCalleeSaveMethod(kSaveEverything) */ MACRO0(SETUP_SAVE_EVERYTHING_FRAME) PUSH r15 SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED END_MACRO -MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS) // Restore FPRs. Method and padding is still on the stack. movq 16(%rsp), %xmm0 movq 24(%rsp), %xmm1 @@ -350,12 +359,10 @@ MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) movq 120(%rsp), %xmm13 movq 128(%rsp), %xmm14 movq 136(%rsp), %xmm15 +END_MACRO - // Remove save everything callee save method, stack alignment padding and FPRs. - addq MACRO_LITERAL(16 + 16 * 8), %rsp - CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8)) - // Restore callee and GPR args, mixed together to agree with core spills bitmap. - POP rax +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX) + // Restore callee and GPR args (except RAX), mixed together to agree with core spills bitmap. POP rcx POP rdx POP rbx @@ -372,19 +379,47 @@ MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) POP r15 END_MACRO +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) + RESTORE_SAVE_EVERYTHING_FRAME_FRPS + + // Remove save everything callee save method, stack alignment padding and FPRs. + addq MACRO_LITERAL(16 + 16 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8)) + + POP rax + RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX +END_MACRO + +MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX) + RESTORE_SAVE_EVERYTHING_FRAME_FRPS + + // Remove save everything callee save method, stack alignment padding and FPRs, skip RAX. + addq MACRO_LITERAL(16 + 16 * 8 + 8), %rsp + CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8 + 8)) + + RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX +END_MACRO /* - * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending - * exception is Thread::Current()->exception_. + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_ when the runtime method frame is ready. */ -MACRO0(DELIVER_PENDING_EXCEPTION) - SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save callee saves for throw +MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY) // (Thread*) setup movq %gs:THREAD_SELF_OFFSET, %rdi call SYMBOL(artDeliverPendingExceptionFromCode) // artDeliverPendingExceptionFromCode(Thread*) UNREACHABLE END_MACRO + /* + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_. + */ +MACRO0(DELIVER_PENDING_EXCEPTION) + SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save callee saves for throw + DELIVER_PENDING_EXCEPTION_FRAME_READY +END_MACRO + MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name) DEFINE_FUNCTION VAR(c_name) SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context @@ -1295,45 +1330,48 @@ DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab END_FUNCTION art_quick_alloc_object_initialized_region_tlab DEFINE_FUNCTION art_quick_resolve_string - movq 8(%rsp), %rcx // get referrer - movl ART_METHOD_DECLARING_CLASS_OFFSET(%rcx), %ecx // get declaring class - movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %rcx // get string dex cache - movq LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %rdx - andq %rdi, %rdx - movq (%rcx, %rdx, STRING_DEX_CACHE_ELEMENT_SIZE), %rdx - movl %edx, %eax - shrq LITERAL(32), %rdx - cmp %rdx, %rdi + // Custom calling convention: RAX serves as both input and output. + PUSH r15 + PUSH r14 + movq 24(%rsp), %r15 // get referrer + movl ART_METHOD_DECLARING_CLASS_OFFSET(%r15), %r15d // get declaring class + movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%r15d), %r15 // get string dex cache + movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %r14d + andl %eax, %r14d + movq (%r15, %r14, STRING_DEX_CACHE_ELEMENT_SIZE), %r14 + movl %r14d, %r15d + shrq LITERAL(32), %r14 + cmpl %r14d, %eax jne .Lart_quick_resolve_string_slow_path + movl %r15d, %eax + CFI_REMEMBER_STATE + POP r14 + POP r15 #ifdef USE_READ_BARRIER cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET - jne .Lart_quick_resolve_string_marking + jne .Lnot_null_art_quick_read_barrier_mark_reg00 #endif ret -// Slow path, the index did not match + CFI_RESTORE_STATE + CFI_DEF_CFA(rsp, 24) // workaround for clang bug: 31975598 + +// Slow path, the index did not match. .Lart_quick_resolve_string_slow_path: - SETUP_SAVE_REFS_ONLY_FRAME - movq %rcx, %rax + SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED // Outgoing argument set up + movl %eax, %edi // pass string index movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() call SYMBOL(artResolveStringFromCode) // artResolveStringFromCode(arg0, referrer, Thread*) - RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address - RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER -// GC is marking case, need to check the mark bit. -.Lart_quick_resolve_string_marking: - testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%rax) - jnz .Lart_quick_resolve_string_no_rb - // Save LR so that we can return, also x1 for alignment purposes - PUSH rdi - PUSH rsi - subq LITERAL(8), %rsp // 16 byte alignment - movq %rax, %rdi - call SYMBOL(artReadBarrierMark) - addq LITERAL(8), %rsp - POP rsi - POP rdi -.Lart_quick_resolve_string_no_rb: + + testl %eax, %eax // If result is null, deliver the OOME. + jz 1f + CFI_REMEMBER_STATE + RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX // restore frame up to return address ret + CFI_RESTORE_STATE + CFI_DEF_CFA(rsp, FRAME_SIZE_SAVE_EVERYTHING) // workaround for clang bug: 31975598 +1: + DELIVER_PENDING_EXCEPTION_FRAME_READY END_FUNCTION art_quick_resolve_string ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER @@ -2230,6 +2268,7 @@ MACRO2(READ_BARRIER_MARK_REG, name, reg) // Null check so that we can load the lock word. testq REG_VAR(reg), REG_VAR(reg) jz .Lret_rb_\name +.Lnot_null_\name: // Check the mark bit, if it is 1 return. testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)) jz .Lslow_rb_\name diff --git a/runtime/asm_support.h b/runtime/asm_support.h index 567791e291..cd8815b25a 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -98,7 +98,7 @@ ADD_TEST_EQ(THREAD_LOCAL_POS_OFFSET, ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET, art::Thread::ThreadLocalEndOffset<POINTER_SIZE>().Int32Value()) // Offset of field Thread::tlsPtr_.thread_local_objects. -#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + __SIZEOF_POINTER__) +#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + 2 * __SIZEOF_POINTER__) ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET, art::Thread::ThreadLocalObjectsOffset<POINTER_SIZE>().Int32Value()) // Offset of field Thread::tlsPtr_.mterp_current_ibase. diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc index 5132efc03c..5d70076d1e 100644 --- a/runtime/dex_file_verifier.cc +++ b/runtime/dex_file_verifier.cc @@ -454,6 +454,27 @@ uint32_t DexFileVerifier::ReadUnsignedLittleEndian(uint32_t size) { return result; } + +#define DECODE_UNSIGNED_CHECKED_FROM_WITH_ERROR_VALUE(ptr, var, error_value) \ + uint32_t var; \ + if (!DecodeUnsignedLeb128Checked(&ptr, begin_ + size_, &var)) { \ + return error_value; \ + } + +#define DECODE_UNSIGNED_CHECKED_FROM(ptr, var) \ + uint32_t var; \ + if (!DecodeUnsignedLeb128Checked(&ptr, begin_ + size_, &var)) { \ + ErrorStringPrintf("Read out of bounds"); \ + return false; \ + } + +#define DECODE_SIGNED_CHECKED_FROM(ptr, var) \ + int32_t var; \ + if (!DecodeSignedLeb128Checked(&ptr, begin_ + size_, &var)) { \ + ErrorStringPrintf("Read out of bounds"); \ + return false; \ + } + bool DexFileVerifier::CheckAndGetHandlerOffsets(const DexFile::CodeItem* code_item, uint32_t* handler_offsets, uint32_t handlers_size) { const uint8_t* handlers_base = DexFile::GetCatchHandlerData(*code_item, 0); @@ -461,7 +482,7 @@ bool DexFileVerifier::CheckAndGetHandlerOffsets(const DexFile::CodeItem* code_it for (uint32_t i = 0; i < handlers_size; i++) { bool catch_all; size_t offset = ptr_ - handlers_base; - int32_t size = DecodeSignedLeb128(&ptr_); + DECODE_SIGNED_CHECKED_FROM(ptr_, size); if (UNLIKELY((size < -65536) || (size > 65536))) { ErrorStringPrintf("Invalid exception handler size: %d", size); @@ -478,12 +499,12 @@ bool DexFileVerifier::CheckAndGetHandlerOffsets(const DexFile::CodeItem* code_it handler_offsets[i] = static_cast<uint32_t>(offset); while (size-- > 0) { - uint32_t type_idx = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, type_idx); if (!CheckIndex(type_idx, header_->type_ids_size_, "handler type_idx")) { return false; } - uint32_t addr = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, addr); if (UNLIKELY(addr >= code_item->insns_size_in_code_units_)) { ErrorStringPrintf("Invalid handler addr: %x", addr); return false; @@ -491,7 +512,7 @@ bool DexFileVerifier::CheckAndGetHandlerOffsets(const DexFile::CodeItem* code_it } if (catch_all) { - uint32_t addr = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, addr); if (UNLIKELY(addr >= code_item->insns_size_in_code_units_)) { ErrorStringPrintf("Invalid handler catch_all_addr: %x", addr); return false; @@ -726,7 +747,7 @@ bool DexFileVerifier::CheckEncodedValue() { } bool DexFileVerifier::CheckEncodedArray() { - uint32_t size = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, size); while (size--) { if (!CheckEncodedValue()) { @@ -738,16 +759,16 @@ bool DexFileVerifier::CheckEncodedArray() { } bool DexFileVerifier::CheckEncodedAnnotation() { - uint32_t idx = DecodeUnsignedLeb128(&ptr_); - if (!CheckIndex(idx, header_->type_ids_size_, "encoded_annotation type_idx")) { + DECODE_UNSIGNED_CHECKED_FROM(ptr_, anno_idx); + if (!CheckIndex(anno_idx, header_->type_ids_size_, "encoded_annotation type_idx")) { return false; } - uint32_t size = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, size); uint32_t last_idx = 0; for (uint32_t i = 0; i < size; i++) { - idx = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, idx); if (!CheckIndex(idx, header_->string_ids_size_, "annotation_element name_idx")) { return false; } @@ -1002,7 +1023,7 @@ bool DexFileVerifier::CheckIntraCodeItem() { } ptr_ = DexFile::GetCatchHandlerData(*code_item, 0); - uint32_t handlers_size = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, handlers_size); if (UNLIKELY((handlers_size == 0) || (handlers_size >= 65536))) { ErrorStringPrintf("Invalid handlers_size: %ud", handlers_size); @@ -1051,7 +1072,7 @@ bool DexFileVerifier::CheckIntraCodeItem() { } bool DexFileVerifier::CheckIntraStringDataItem() { - uint32_t size = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, size); const uint8_t* file_end = begin_ + size_; for (uint32_t i = 0; i < size; i++) { @@ -1137,15 +1158,15 @@ bool DexFileVerifier::CheckIntraStringDataItem() { } bool DexFileVerifier::CheckIntraDebugInfoItem() { - DecodeUnsignedLeb128(&ptr_); - uint32_t parameters_size = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, dummy); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, parameters_size); if (UNLIKELY(parameters_size > 65536)) { ErrorStringPrintf("Invalid parameters_size: %x", parameters_size); return false; } for (uint32_t j = 0; j < parameters_size; j++) { - uint32_t parameter_name = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, parameter_name); if (parameter_name != 0) { parameter_name--; if (!CheckIndex(parameter_name, header_->string_ids_size_, "debug_info_item parameter_name")) { @@ -1161,27 +1182,27 @@ bool DexFileVerifier::CheckIntraDebugInfoItem() { return true; } case DexFile::DBG_ADVANCE_PC: { - DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, advance_pc_dummy); break; } case DexFile::DBG_ADVANCE_LINE: { - DecodeSignedLeb128(&ptr_); + DECODE_SIGNED_CHECKED_FROM(ptr_, advance_line_dummy); break; } case DexFile::DBG_START_LOCAL: { - uint32_t reg_num = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, reg_num); if (UNLIKELY(reg_num >= 65536)) { ErrorStringPrintf("Bad reg_num for opcode %x", opcode); return false; } - uint32_t name_idx = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, name_idx); if (name_idx != 0) { name_idx--; if (!CheckIndex(name_idx, header_->string_ids_size_, "DBG_START_LOCAL name_idx")) { return false; } } - uint32_t type_idx = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, type_idx); if (type_idx != 0) { type_idx--; if (!CheckIndex(type_idx, header_->type_ids_size_, "DBG_START_LOCAL type_idx")) { @@ -1192,7 +1213,7 @@ bool DexFileVerifier::CheckIntraDebugInfoItem() { } case DexFile::DBG_END_LOCAL: case DexFile::DBG_RESTART_LOCAL: { - uint32_t reg_num = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, reg_num); if (UNLIKELY(reg_num >= 65536)) { ErrorStringPrintf("Bad reg_num for opcode %x", opcode); return false; @@ -1200,26 +1221,26 @@ bool DexFileVerifier::CheckIntraDebugInfoItem() { break; } case DexFile::DBG_START_LOCAL_EXTENDED: { - uint32_t reg_num = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, reg_num); if (UNLIKELY(reg_num >= 65536)) { ErrorStringPrintf("Bad reg_num for opcode %x", opcode); return false; } - uint32_t name_idx = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, name_idx); if (name_idx != 0) { name_idx--; if (!CheckIndex(name_idx, header_->string_ids_size_, "DBG_START_LOCAL_EXTENDED name_idx")) { return false; } } - uint32_t type_idx = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, type_idx); if (type_idx != 0) { type_idx--; if (!CheckIndex(type_idx, header_->type_ids_size_, "DBG_START_LOCAL_EXTENDED type_idx")) { return false; } } - uint32_t sig_idx = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, sig_idx); if (sig_idx != 0) { sig_idx--; if (!CheckIndex(sig_idx, header_->string_ids_size_, "DBG_START_LOCAL_EXTENDED sig_idx")) { @@ -1229,7 +1250,7 @@ bool DexFileVerifier::CheckIntraDebugInfoItem() { break; } case DexFile::DBG_SET_FILE: { - uint32_t name_idx = DecodeUnsignedLeb128(&ptr_); + DECODE_UNSIGNED_CHECKED_FROM(ptr_, name_idx); if (name_idx != 0) { name_idx--; if (!CheckIndex(name_idx, header_->string_ids_size_, "DBG_SET_FILE name_idx")) { @@ -2127,7 +2148,7 @@ bool DexFileVerifier::CheckInterAnnotationSetItem() { const DexFile::AnnotationItem* annotation = reinterpret_cast<const DexFile::AnnotationItem*>(begin_ + *offsets); const uint8_t* data = annotation->annotation_; - uint32_t idx = DecodeUnsignedLeb128(&data); + DECODE_UNSIGNED_CHECKED_FROM(data, idx); if (UNLIKELY(last_idx >= idx && i != 0)) { ErrorStringPrintf("Out-of-order entry types: %x then %x", last_idx, idx); @@ -2442,7 +2463,10 @@ static std::string GetStringOrError(const uint8_t* const begin, // Assume that the data is OK at this point. String data has been checked at this point. const uint8_t* ptr = begin + string_id->string_data_off_; - DecodeUnsignedLeb128(&ptr); + uint32_t dummy; + if (!DecodeUnsignedLeb128Checked(&ptr, begin + header->file_size_, &dummy)) { + return "(error)"; + } return reinterpret_cast<const char*>(ptr); } @@ -2604,7 +2628,11 @@ static bool FindMethodName(uint32_t method_index, return false; } const uint8_t* str_data_ptr = begin + string_off; - DecodeUnsignedLeb128(&str_data_ptr); + uint32_t dummy; + if (!DecodeUnsignedLeb128Checked(&str_data_ptr, begin + header->file_size_, &dummy)) { + *error_msg = "String size out of bounds for method flags verification"; + return false; + } *str = reinterpret_cast<const char*>(str_data_ptr); return true; } diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h index 1ee1f818b6..df23f94a31 100644 --- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h +++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h @@ -78,6 +78,7 @@ void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) qpoints->pJniMethodEnd = JniMethodEnd; qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized; qpoints->pJniMethodEndWithReference = JniMethodEndWithReference; + qpoints->pJniMethodFastEndWithReference = JniMethodFastEndWithReference; qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized; qpoints->pJniMethodFastEnd = JniMethodFastEnd; qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline; diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc index 2a3a6bfa06..4d47b83185 100644 --- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc @@ -60,7 +60,11 @@ extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); - auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly); + auto* caller = GetCalleeSaveMethodCaller( + self, + // TODO: Change art_quick_resolve_string on MIPS and MIPS64 to kSaveEverything. + (kRuntimeISA == kMips || kRuntimeISA == kMips64) ? Runtime::kSaveRefsOnly + : Runtime::kSaveEverything); mirror::String* result = ResolveStringFromCode(caller, string_idx); if (LIKELY(result != nullptr)) { // For AOT code, we need a write barrier for the class loader that holds diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h index 89712a3cc7..915f18ed71 100644 --- a/runtime/entrypoints/quick/quick_entrypoints.h +++ b/runtime/entrypoints/quick/quick_entrypoints.h @@ -65,6 +65,11 @@ extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject lo extern mirror::Object* JniMethodEndWithReference(jobject result, uint32_t saved_local_ref_cookie, Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; +extern mirror::Object* JniMethodFastEndWithReference(jobject result, + uint32_t saved_local_ref_cookie, + Thread* self) + NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; + extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result, uint32_t saved_local_ref_cookie, diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index e4029191d6..3cfee45462 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -78,6 +78,7 @@ V(JniMethodFastEnd, void, uint32_t, Thread*) \ V(JniMethodEndSynchronized, void, uint32_t, jobject, Thread*) \ V(JniMethodEndWithReference, mirror::Object*, jobject, uint32_t, Thread*) \ + V(JniMethodFastEndWithReference, mirror::Object*, jobject, uint32_t, Thread*) \ V(JniMethodEndWithReferenceSynchronized, mirror::Object*, jobject, uint32_t, jobject, Thread*) \ V(QuickGenericJniTrampoline, void, ArtMethod*) \ \ diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc index b25f447e4b..330c742354 100644 --- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc @@ -22,6 +22,9 @@ namespace art { +template <bool kDynamicFast> +static inline void GoToRunnableFast(Thread* self) NO_THREAD_SAFETY_ANALYSIS; + extern void ReadBarrierJni(mirror::CompressedReference<mirror::Object>* handle_on_stack, Thread* self ATTRIBUTE_UNUSED) { DCHECK(kUseReadBarrier); @@ -78,7 +81,28 @@ static void GoToRunnable(Thread* self) NO_THREAD_SAFETY_ANALYSIS { bool is_fast = native_method->IsFastNative(); if (!is_fast) { self->TransitionFromSuspendedToRunnable(); - } else if (UNLIKELY(self->TestAllFlags())) { + } else { + GoToRunnableFast</*kDynamicFast*/true>(self); + } +} + +// TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI. +template <bool kDynamicFast> +ALWAYS_INLINE static inline void GoToRunnableFast(Thread* self) NO_THREAD_SAFETY_ANALYSIS { + if (kIsDebugBuild) { + // Should only enter here if the method is !Fast JNI or @FastNative. + ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame(); + + if (kDynamicFast) { + CHECK(native_method->IsFastNative()) << native_method->PrettyMethod(); + } else { + CHECK(native_method->IsAnnotatedWithFastNative()) << native_method->PrettyMethod(); + } + } + + // When we are in "fast" JNI or @FastNative, we are already Runnable. + // Only do a suspend check on the way out of JNI. + if (UNLIKELY(self->TestAllFlags())) { // In fast JNI mode we never transitioned out of runnable. Perform a suspend check if there // is a flag raised. DCHECK(Locks::mutator_lock_->IsSharedHeld(self)); @@ -106,20 +130,7 @@ extern void JniMethodEnd(uint32_t saved_local_ref_cookie, Thread* self) { } extern void JniMethodFastEnd(uint32_t saved_local_ref_cookie, Thread* self) { - // inlined fast version of GoToRunnable(self); - - if (kIsDebugBuild) { - ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame(); - CHECK(native_method->IsAnnotatedWithFastNative()) << native_method->PrettyMethod(); - } - - if (UNLIKELY(self->TestAllFlags())) { - // In fast JNI mode we never transitioned out of runnable. Perform a suspend check if there - // is a flag raised. - DCHECK(Locks::mutator_lock_->IsSharedHeld(self)); - self->CheckSuspend(); - } - + GoToRunnableFast</*kDynamicFast*/false>(self); PopLocalReferences(saved_local_ref_cookie, self); } @@ -131,10 +142,6 @@ extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, PopLocalReferences(saved_local_ref_cookie, self); } -// TODO: JniMethodFastEndWithReference -// (Probably don't need to have a synchronized variant since -// it already has to do atomic operations) - // Common result handling for EndWithReference. static mirror::Object* JniMethodEndWithReferenceHandleResult(jobject result, uint32_t saved_local_ref_cookie, @@ -157,6 +164,13 @@ static mirror::Object* JniMethodEndWithReferenceHandleResult(jobject result, return o.Ptr(); } +extern mirror::Object* JniMethodFastEndWithReference(jobject result, + uint32_t saved_local_ref_cookie, + Thread* self) { + GoToRunnableFast</*kDynamicFast*/false>(self); + return JniMethodEndWithReferenceHandleResult(result, saved_local_ref_cookie, self); +} + extern mirror::Object* JniMethodEndWithReference(jobject result, uint32_t saved_local_ref_cookie, Thread* self) { diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index 03254ab8d4..cdb1051e08 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -122,9 +122,9 @@ class EntrypointsOrderTest : public CommonRuntimeTest { // Skip across the entrypoints structures. - EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*)); - EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, sizeof(void*)); + EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_start, sizeof(void*)); + EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_objects, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, mterp_current_ibase, sizeof(size_t)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_current_ibase, mterp_default_ibase, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_default_ibase, mterp_alt_ibase, sizeof(void*)); @@ -223,6 +223,8 @@ class EntrypointsOrderTest : public CommonRuntimeTest { EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndSynchronized, pJniMethodEndWithReference, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReference, + pJniMethodFastEndWithReference, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodFastEndWithReference, pJniMethodEndWithReferenceSynchronized, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReferenceSynchronized, pQuickGenericJniTrampoline, sizeof(void*)); diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc index a968343318..e2f5a1d7fc 100644 --- a/runtime/gc/accounting/space_bitmap.cc +++ b/runtime/gc/accounting/space_bitmap.cc @@ -104,6 +104,48 @@ void SpaceBitmap<kAlignment>::Clear() { } template<size_t kAlignment> +void SpaceBitmap<kAlignment>::ClearRange(const mirror::Object* begin, const mirror::Object* end) { + uintptr_t begin_offset = reinterpret_cast<uintptr_t>(begin) - heap_begin_; + uintptr_t end_offset = reinterpret_cast<uintptr_t>(end) - heap_begin_; + // Align begin and end to word boundaries. + while (begin_offset < end_offset && OffsetBitIndex(begin_offset) != 0) { + Clear(reinterpret_cast<mirror::Object*>(heap_begin_ + begin_offset)); + begin_offset += kAlignment; + } + while (begin_offset < end_offset && OffsetBitIndex(end_offset) != 0) { + end_offset -= kAlignment; + Clear(reinterpret_cast<mirror::Object*>(heap_begin_ + end_offset)); + } + const uintptr_t start_index = OffsetToIndex(begin_offset); + const uintptr_t end_index = OffsetToIndex(end_offset); + Atomic<uintptr_t>* const mem_begin = &bitmap_begin_[start_index]; + Atomic<uintptr_t>* const mem_end = &bitmap_begin_[end_index]; + Atomic<uintptr_t>* const page_begin = AlignUp(mem_begin, kPageSize); + Atomic<uintptr_t>* const page_end = AlignDown(mem_end, kPageSize); + if (!kMadviseZeroes || page_begin >= page_end) { + // No possible area to madvise. + std::fill(reinterpret_cast<uint8_t*>(mem_begin), + reinterpret_cast<uint8_t*>(mem_end), + 0); + } else { + // Spans one or more pages. + DCHECK_LE(mem_begin, page_begin); + DCHECK_LE(page_begin, page_end); + DCHECK_LE(page_end, mem_end); + std::fill(reinterpret_cast<uint8_t*>(mem_begin), + reinterpret_cast<uint8_t*>(page_begin), + 0); + CHECK_NE(madvise(page_begin, + reinterpret_cast<uint8_t*>(page_end) - reinterpret_cast<uint8_t*>(page_begin), + MADV_DONTNEED), + -1) << "madvise failed"; + std::fill(reinterpret_cast<uint8_t*>(page_end), + reinterpret_cast<uint8_t*>(mem_end), + 0); + } +} + +template<size_t kAlignment> void SpaceBitmap<kAlignment>::CopyFrom(SpaceBitmap* source_bitmap) { DCHECK_EQ(Size(), source_bitmap->Size()); const size_t count = source_bitmap->Size() / sizeof(intptr_t); diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h index 296663a8be..b13648894d 100644 --- a/runtime/gc/accounting/space_bitmap.h +++ b/runtime/gc/accounting/space_bitmap.h @@ -68,9 +68,13 @@ class SpaceBitmap { return static_cast<T>(index * kAlignment * kBitsPerIntPtrT); } + ALWAYS_INLINE static constexpr uintptr_t OffsetBitIndex(uintptr_t offset) { + return (offset / kAlignment) % kBitsPerIntPtrT; + } + // Bits are packed in the obvious way. static constexpr uintptr_t OffsetToMask(uintptr_t offset) { - return (static_cast<size_t>(1)) << ((offset / kAlignment) % kBitsPerIntPtrT); + return static_cast<size_t>(1) << OffsetBitIndex(offset); } bool Set(const mirror::Object* obj) ALWAYS_INLINE { @@ -87,6 +91,9 @@ class SpaceBitmap { // Fill the bitmap with zeroes. Returns the bitmap's memory to the system as a side-effect. void Clear(); + // Clear a covered by the bitmap using madvise if possible. + void ClearRange(const mirror::Object* begin, const mirror::Object* end); + bool Test(const mirror::Object* obj) const; // Return true iff <obj> is within the range of pointers that this bitmap could potentially cover, diff --git a/runtime/gc/accounting/space_bitmap_test.cc b/runtime/gc/accounting/space_bitmap_test.cc index edb08ef3d9..8c06cfd640 100644 --- a/runtime/gc/accounting/space_bitmap_test.cc +++ b/runtime/gc/accounting/space_bitmap_test.cc @@ -62,7 +62,7 @@ TEST_F(SpaceBitmapTest, ScanRange) { std::unique_ptr<ContinuousSpaceBitmap> space_bitmap( ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity)); - EXPECT_TRUE(space_bitmap.get() != nullptr); + EXPECT_TRUE(space_bitmap != nullptr); // Set all the odd bits in the first BitsPerIntPtrT * 3 to one. for (size_t j = 0; j < kBitsPerIntPtrT * 3; ++j) { @@ -87,6 +87,48 @@ TEST_F(SpaceBitmapTest, ScanRange) { } } +TEST_F(SpaceBitmapTest, ClearRange) { + uint8_t* heap_begin = reinterpret_cast<uint8_t*>(0x10000000); + size_t heap_capacity = 16 * MB; + + std::unique_ptr<ContinuousSpaceBitmap> bitmap( + ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity)); + EXPECT_TRUE(bitmap != nullptr); + + // Set all of the bits in the bitmap. + for (size_t j = 0; j < heap_capacity; j += kObjectAlignment) { + const mirror::Object* obj = reinterpret_cast<mirror::Object*>(heap_begin + j); + bitmap->Set(obj); + } + + std::vector<std::pair<uintptr_t, uintptr_t>> ranges = { + {0, 10 * KB + kObjectAlignment}, + {kObjectAlignment, kObjectAlignment}, + {kObjectAlignment, 2 * kObjectAlignment}, + {kObjectAlignment, 5 * kObjectAlignment}, + {1 * KB + kObjectAlignment, 2 * KB + 5 * kObjectAlignment}, + }; + // Try clearing a few ranges. + for (const std::pair<uintptr_t, uintptr_t>& range : ranges) { + const mirror::Object* obj_begin = reinterpret_cast<mirror::Object*>(heap_begin + range.first); + const mirror::Object* obj_end = reinterpret_cast<mirror::Object*>(heap_begin + range.second); + bitmap->ClearRange(obj_begin, obj_end); + // Boundaries should still be marked. + for (uintptr_t i = 0; i < range.first; i += kObjectAlignment) { + EXPECT_TRUE(bitmap->Test(reinterpret_cast<mirror::Object*>(heap_begin + i))); + } + for (uintptr_t i = range.second; i < range.second + kPageSize; i += kObjectAlignment) { + EXPECT_TRUE(bitmap->Test(reinterpret_cast<mirror::Object*>(heap_begin + i))); + } + // Everything inside should be cleared. + for (uintptr_t i = range.first; i < range.second; i += kObjectAlignment) { + EXPECT_FALSE(bitmap->Test(reinterpret_cast<mirror::Object*>(heap_begin + i))); + bitmap->Set(reinterpret_cast<mirror::Object*>(heap_begin + i)); + } + } +} + + class SimpleCounter { public: explicit SimpleCounter(size_t* counter) : count_(counter) {} diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 1931caf65e..13af67eb3e 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -170,10 +170,10 @@ void ConcurrentCopying::BindBitmaps() { CHECK(space->IsZygoteSpace() || space->IsImageSpace()); immune_spaces_.AddSpace(space); } else if (space == region_space_) { - accounting::ContinuousSpaceBitmap* bitmap = - accounting::ContinuousSpaceBitmap::Create("cc region space bitmap", - space->Begin(), space->Capacity()); - region_space_bitmap_ = bitmap; + // It is OK to clear the bitmap with mutators running since the only place it is read is + // VisitObjects which has exclusion with CC. + region_space_bitmap_ = region_space_->GetMarkBitmap(); + region_space_bitmap_->Clear(); } } } @@ -1601,9 +1601,8 @@ void ConcurrentCopying::ReclaimPhase() { SwapBitmaps(); heap_->UnBindBitmaps(); - // Delete the region bitmap. + // The bitmap was cleared at the start of the GC, there is nothing we need to do here. DCHECK(region_space_bitmap_ != nullptr); - delete region_space_bitmap_; region_space_bitmap_ = nullptr; } diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index ba18699168..918b8db301 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -2490,6 +2490,8 @@ void Heap::PreZygoteFork() { } else { if (collector_type_ == kCollectorTypeCC) { region_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE); + // Evacuated everything out of the region space, clear the mark bitmap. + region_space_->GetMarkBitmap()->Clear(); } else { bump_pointer_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE); } diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h index 66fd62cee1..bbc634dbf3 100644 --- a/runtime/gc/space/region_space-inl.h +++ b/runtime/gc/space/region_space-inl.h @@ -241,15 +241,28 @@ void RegionSpace::WalkInternal(ObjectCallback* callback, void* arg) { } else if (r->IsLargeTail()) { // Do nothing. } else { + // For newly allocated and evacuated regions, live bytes will be -1. uint8_t* pos = r->Begin(); uint8_t* top = r->Top(); - while (pos < top) { - mirror::Object* obj = reinterpret_cast<mirror::Object*>(pos); - if (obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) { + const bool need_bitmap = + r->LiveBytes() != static_cast<size_t>(-1) && + r->LiveBytes() != static_cast<size_t>(top - pos); + if (need_bitmap) { + GetLiveBitmap()->VisitMarkedRange( + reinterpret_cast<uintptr_t>(pos), + reinterpret_cast<uintptr_t>(top), + [callback, arg](mirror::Object* obj) { callback(obj, arg); - pos = reinterpret_cast<uint8_t*>(GetNextObject(obj)); - } else { - break; + }); + } else { + while (pos < top) { + mirror::Object* obj = reinterpret_cast<mirror::Object*>(pos); + if (obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) { + callback(obj, arg); + pos = reinterpret_cast<uint8_t*>(GetNextObject(obj)); + } else { + break; + } } } } diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc index 23cae7c821..35bc36988a 100644 --- a/runtime/gc/space/region_space.cc +++ b/runtime/gc/space/region_space.cc @@ -59,6 +59,8 @@ RegionSpace::RegionSpace(const std::string& name, MemMap* mem_map) for (size_t i = 0; i < num_regions_; ++i, region_addr += kRegionSize) { regions_[i] = Region(i, region_addr, region_addr + kRegionSize); } + mark_bitmap_.reset( + accounting::ContinuousSpaceBitmap::Create("region space live bitmap", Begin(), Capacity())); if (kIsDebugBuild) { CHECK_EQ(regions_[0].Begin(), Begin()); for (size_t i = 0; i < num_regions_; ++i) { @@ -215,7 +217,28 @@ void RegionSpace::ClearFromSpace() { r->Clear(); --num_non_free_regions_; } else if (r->IsInUnevacFromSpace()) { + size_t full_count = 0; + while (r->IsInUnevacFromSpace()) { + Region* const cur = ®ions_[i + full_count]; + if (i + full_count >= num_regions_ || + cur->LiveBytes() != static_cast<size_t>(cur->Top() - cur->Begin())) { + break; + } + if (full_count != 0) { + cur->SetUnevacFromSpaceAsToSpace(); + } + ++full_count; + } + // Note that r is the full_count == 0 iteration since it is not handled by the loop. r->SetUnevacFromSpaceAsToSpace(); + if (full_count >= 1) { + GetLiveBitmap()->ClearRange( + reinterpret_cast<mirror::Object*>(r->Begin()), + reinterpret_cast<mirror::Object*>(r->Begin() + full_count * kRegionSize)); + // Skip over extra regions we cleared. + // Subtract one for the for loop. + i += full_count - 1; + } } } evac_region_ = nullptr; diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h index 4e57a856c5..381ccfac5d 100644 --- a/runtime/gc/space/region_space.h +++ b/runtime/gc/space/region_space.h @@ -77,12 +77,10 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { return 0; } accounting::ContinuousSpaceBitmap* GetLiveBitmap() const OVERRIDE { - // No live bitmap. - return nullptr; + return mark_bitmap_.get(); } accounting::ContinuousSpaceBitmap* GetMarkBitmap() const OVERRIDE { - // No mark bitmap. - return nullptr; + return mark_bitmap_.get(); } void Clear() OVERRIDE REQUIRES(!region_lock_); @@ -516,6 +514,9 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { Region* evac_region_; // The region that's being evacuated to currently. Region full_region_; // The dummy/sentinel region that looks full. + // Mark bitmap used by the GC. + std::unique_ptr<accounting::ContinuousSpaceBitmap> mark_bitmap_; + DISALLOW_COPY_AND_ASSIGN(RegionSpace); }; diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc index 130c10d322..7389c73096 100644 --- a/runtime/indirect_reference_table.cc +++ b/runtime/indirect_reference_table.cc @@ -60,27 +60,24 @@ void IndirectReferenceTable::AbortIfNoCheckJNI(const std::string& msg) { IndirectReferenceTable::IndirectReferenceTable(size_t max_count, IndirectRefKind desired_kind, - bool abort_on_error) + std::string* error_msg) : kind_(desired_kind), max_entries_(max_count) { + CHECK(error_msg != nullptr); CHECK_NE(desired_kind, kHandleScopeOrInvalid); - std::string error_str; const size_t table_bytes = max_count * sizeof(IrtEntry); table_mem_map_.reset(MemMap::MapAnonymous("indirect ref table", nullptr, table_bytes, - PROT_READ | PROT_WRITE, false, false, &error_str)); - if (abort_on_error) { - CHECK(table_mem_map_.get() != nullptr) << error_str; - CHECK_EQ(table_mem_map_->Size(), table_bytes); - CHECK(table_mem_map_->Begin() != nullptr); - } else if (table_mem_map_.get() == nullptr || - table_mem_map_->Size() != table_bytes || - table_mem_map_->Begin() == nullptr) { - table_mem_map_.reset(); - LOG(ERROR) << error_str; - return; + PROT_READ | PROT_WRITE, false, false, error_msg)); + if (table_mem_map_.get() == nullptr && error_msg->empty()) { + *error_msg = "Unable to map memory for indirect ref table"; + } + + if (table_mem_map_.get() != nullptr) { + table_ = reinterpret_cast<IrtEntry*>(table_mem_map_->Begin()); + } else { + table_ = nullptr; } - table_ = reinterpret_cast<IrtEntry*>(table_mem_map_->Begin()); segment_state_.all = IRT_FIRST_SEGMENT; } diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h index 1762b10350..363280a87c 100644 --- a/runtime/indirect_reference_table.h +++ b/runtime/indirect_reference_table.h @@ -257,12 +257,24 @@ bool inline operator!=(const IrtIterator& lhs, const IrtIterator& rhs) { class IndirectReferenceTable { public: - // WARNING: When using with abort_on_error = false, the object may be in a partially - // initialized state. Use IsValid() to check. - IndirectReferenceTable(size_t max_count, IndirectRefKind kind, bool abort_on_error = true); + /* + * WARNING: Construction of the IndirectReferenceTable may fail. + * error_msg must not be null. If error_msg is set by the constructor, then + * construction has failed and the IndirectReferenceTable will be in an + * invalid state. Use IsValid to check whether the object is in an invalid + * state. + */ + IndirectReferenceTable(size_t max_count, IndirectRefKind kind, std::string* error_msg); ~IndirectReferenceTable(); + /* + * Checks whether construction of the IndirectReferenceTable succeeded. + * + * This object must only be used if IsValid() returns true. It is safe to + * call IsValid from multiple threads without locking or other explicit + * synchronization. + */ bool IsValid() const; /* diff --git a/runtime/indirect_reference_table_test.cc b/runtime/indirect_reference_table_test.cc index 7b28f0bda8..d7026de559 100644 --- a/runtime/indirect_reference_table_test.cc +++ b/runtime/indirect_reference_table_test.cc @@ -49,7 +49,9 @@ TEST_F(IndirectReferenceTableTest, BasicTest) { ScopedObjectAccess soa(Thread::Current()); static const size_t kTableMax = 20; - IndirectReferenceTable irt(kTableMax, kGlobal); + std::string error_msg; + IndirectReferenceTable irt(kTableMax, kGlobal, &error_msg); + ASSERT_TRUE(irt.IsValid()) << error_msg; mirror::Class* c = class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;"); StackHandleScope<4> hs(soa.Self()); diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc index 09d11678f2..b71236b511 100644 --- a/runtime/interpreter/interpreter_common.cc +++ b/runtime/interpreter/interpreter_common.cc @@ -667,11 +667,39 @@ inline bool DoInvokePolymorphic(Thread* self, ShadowFrame& shadow_frame, return false; } } else if (handle_kind == kInvokeDirect) { - // TODO(narayan) : We need to handle the case where the target method is a - // constructor here. Also the case where we don't want to dynamically - // dispatch based on the type of the receiver. - UNIMPLEMENTED(FATAL) << "Direct invokes are not implemented yet."; - return false; + if (called_method->IsConstructor()) { + // TODO(narayan) : We need to handle the case where the target method is a + // constructor here. + UNIMPLEMENTED(FATAL) << "Direct invokes for constructors are not implemented yet."; + return false; + } + + // Nothing special to do in the case where we're not dealing with a + // constructor. It's a private method, and we've already access checked at + // the point of creating the handle. + } else if (handle_kind == kInvokeSuper) { + mirror::Class* declaring_class = called_method->GetDeclaringClass(); + + // Note that we're not dynamically dispatching on the type of the receiver + // here. We use the static type of the "receiver" object that we've + // recorded in the method handle's type, which will be the same as the + // special caller that was specified at the point of lookup. + mirror::Class* referrer_class = handle_type->GetPTypes()->Get(0); + if (!declaring_class->IsInterface()) { + mirror::Class* super_class = referrer_class->GetSuperClass(); + uint16_t vtable_index = called_method->GetMethodIndex(); + DCHECK(super_class != nullptr); + DCHECK(super_class->HasVTable()); + // Note that super_class is a super of referrer_class and called_method + // will always be declared by super_class (or one of its super classes). + DCHECK_LT(vtable_index, super_class->GetVTableLength()); + called_method = super_class->GetVTableEntry(vtable_index, kRuntimePointerSize); + } else { + called_method = referrer_class->FindVirtualMethodForInterfaceSuper( + called_method, kRuntimePointerSize); + } + + CHECK(called_method != nullptr); } // NOTE: handle_kind == kInvokeStatic needs no special treatment here. We diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc index 7285b9a965..9b4327f137 100644 --- a/runtime/java_vm_ext.cc +++ b/runtime/java_vm_ext.cc @@ -411,7 +411,9 @@ const JNIInvokeInterface gJniInvokeInterface = { JII::AttachCurrentThreadAsDaemon }; -JavaVMExt::JavaVMExt(Runtime* runtime, const RuntimeArgumentMap& runtime_options) +JavaVMExt::JavaVMExt(Runtime* runtime, + const RuntimeArgumentMap& runtime_options, + std::string* error_msg) : runtime_(runtime), check_jni_abort_hook_(nullptr), check_jni_abort_hook_data_(nullptr), @@ -420,10 +422,10 @@ JavaVMExt::JavaVMExt(Runtime* runtime, const RuntimeArgumentMap& runtime_options tracing_enabled_(runtime_options.Exists(RuntimeArgumentMap::JniTrace) || VLOG_IS_ON(third_party_jni)), trace_(runtime_options.GetOrDefault(RuntimeArgumentMap::JniTrace)), - globals_(kGlobalsMax, kGlobal), + globals_(kGlobalsMax, kGlobal, error_msg), libraries_(new Libraries), unchecked_functions_(&gJniInvokeInterface), - weak_globals_(kWeakGlobalsMax, kWeakGlobal), + weak_globals_(kWeakGlobalsMax, kWeakGlobal, error_msg), allow_accessing_weak_globals_(true), weak_globals_add_condition_("weak globals add condition", (CHECK(Locks::jni_weak_globals_lock_ != nullptr), @@ -436,6 +438,19 @@ JavaVMExt::JavaVMExt(Runtime* runtime, const RuntimeArgumentMap& runtime_options JavaVMExt::~JavaVMExt() { } +// Checking "globals" and "weak_globals" usually requires locks, but we +// don't need the locks to check for validity when constructing the +// object. Use NO_THREAD_SAFETY_ANALYSIS for this. +std::unique_ptr<JavaVMExt> JavaVMExt::Create(Runtime* runtime, + const RuntimeArgumentMap& runtime_options, + std::string* error_msg) NO_THREAD_SAFETY_ANALYSIS { + std::unique_ptr<JavaVMExt> java_vm(new JavaVMExt(runtime, runtime_options, error_msg)); + if (java_vm && java_vm->globals_.IsValid() && java_vm->weak_globals_.IsValid()) { + return java_vm; + } + return nullptr; +} + jint JavaVMExt::HandleGetEnv(/*out*/void** env, jint version) { for (GetEnvHook hook : env_hooks_) { jint res = hook(this, env, version); diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h index 05717f41e7..9e37f1178c 100644 --- a/runtime/java_vm_ext.h +++ b/runtime/java_vm_ext.h @@ -43,7 +43,14 @@ using GetEnvHook = jint (*)(JavaVMExt* vm, /*out*/void** new_env, jint version); class JavaVMExt : public JavaVM { public: - JavaVMExt(Runtime* runtime, const RuntimeArgumentMap& runtime_options); + // Creates a new JavaVMExt object. + // Returns nullptr on error, in which case error_msg is set to a message + // describing the error. + static std::unique_ptr<JavaVMExt> Create(Runtime* runtime, + const RuntimeArgumentMap& runtime_options, + std::string* error_msg); + + ~JavaVMExt(); bool ForceCopy() const { @@ -192,6 +199,10 @@ class JavaVMExt : public JavaVM { static bool IsBadJniVersion(int version); private: + // The constructor should not be called directly. It may leave the object in + // an erroneous state, and the result needs to be checked. + JavaVMExt(Runtime* runtime, const RuntimeArgumentMap& runtime_options, std::string* error_msg); + // Return true if self can currently access weak globals. bool MayAccessWeakGlobalsUnlocked(Thread* self) const REQUIRES_SHARED(Locks::mutator_lock_); bool MayAccessWeakGlobals(Thread* self) const diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc index 1ca2cb4146..8eca8fcba9 100644 --- a/runtime/jni_env_ext.cc +++ b/runtime/jni_env_ext.cc @@ -57,19 +57,19 @@ jint JNIEnvExt::GetEnvHandler(JavaVMExt* vm, /*out*/void** env, jint version) { return JNI_OK; } -JNIEnvExt* JNIEnvExt::Create(Thread* self_in, JavaVMExt* vm_in) { - std::unique_ptr<JNIEnvExt> ret(new JNIEnvExt(self_in, vm_in)); +JNIEnvExt* JNIEnvExt::Create(Thread* self_in, JavaVMExt* vm_in, std::string* error_msg) { + std::unique_ptr<JNIEnvExt> ret(new JNIEnvExt(self_in, vm_in, error_msg)); if (CheckLocalsValid(ret.get())) { return ret.release(); } return nullptr; } -JNIEnvExt::JNIEnvExt(Thread* self_in, JavaVMExt* vm_in) +JNIEnvExt::JNIEnvExt(Thread* self_in, JavaVMExt* vm_in, std::string* error_msg) : self(self_in), vm(vm_in), local_ref_cookie(IRT_FIRST_SEGMENT), - locals(kLocalsInitial, kLocal, false), + locals(kLocalsInitial, kLocal, error_msg), check_jni(false), runtime_deleted(false), critical(0), diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h index 549f8c56a0..e89debbf90 100644 --- a/runtime/jni_env_ext.h +++ b/runtime/jni_env_ext.h @@ -34,7 +34,9 @@ class JavaVMExt; static constexpr size_t kLocalsInitial = 512; struct JNIEnvExt : public JNIEnv { - static JNIEnvExt* Create(Thread* self, JavaVMExt* vm); + // Creates a new JNIEnvExt. Returns null on error, in which case error_msg + // will contain a description of the error. + static JNIEnvExt* Create(Thread* self, JavaVMExt* vm, std::string* error_msg); ~JNIEnvExt(); @@ -103,9 +105,9 @@ struct JNIEnvExt : public JNIEnv { void SetFunctionsToRuntimeShutdownFunctions(); private: - // The constructor should not be called directly. It may leave the object in an erronuous state, + // The constructor should not be called directly. It may leave the object in an erroneous state, // and the result needs to be checked. - JNIEnvExt(Thread* self, JavaVMExt* vm); + JNIEnvExt(Thread* self, JavaVMExt* vm, std::string* error_msg); // All locked objects, with the (Java caller) stack frame that locked them. Used in CheckJNI // to ensure that only monitors locked in this native frame are being unlocked, and that at diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc index c6d5c9ea61..9479a181c6 100644 --- a/runtime/jni_internal_test.cc +++ b/runtime/jni_internal_test.cc @@ -2307,7 +2307,9 @@ TEST_F(JniInternalTest, IndirectReferenceTableOffsets) { // The segment_state_ field is private, and we want to avoid friend declaration. So we'll check // by modifying memory. // The parameters don't really matter here. - IndirectReferenceTable irt(5, IndirectRefKind::kGlobal, true); + std::string error_msg; + IndirectReferenceTable irt(5, IndirectRefKind::kGlobal, &error_msg); + ASSERT_TRUE(irt.IsValid()) << error_msg; uint32_t old_state = irt.GetSegmentState(); // Write some new state directly. We invert parts of old_state to ensure a new value. diff --git a/runtime/leb128.h b/runtime/leb128.h index 74934aebf0..31459af3a0 100644 --- a/runtime/leb128.h +++ b/runtime/leb128.h @@ -53,6 +53,49 @@ static inline uint32_t DecodeUnsignedLeb128(const uint8_t** data) { return static_cast<uint32_t>(result); } +static inline bool DecodeUnsignedLeb128Checked(const uint8_t** data, + const void* end, + uint32_t* out) { + const uint8_t* ptr = *data; + if (ptr >= end) { + return false; + } + int result = *(ptr++); + if (UNLIKELY(result > 0x7f)) { + if (ptr >= end) { + return false; + } + int cur = *(ptr++); + result = (result & 0x7f) | ((cur & 0x7f) << 7); + if (cur > 0x7f) { + if (ptr >= end) { + return false; + } + cur = *(ptr++); + result |= (cur & 0x7f) << 14; + if (cur > 0x7f) { + if (ptr >= end) { + return false; + } + cur = *(ptr++); + result |= (cur & 0x7f) << 21; + if (cur > 0x7f) { + if (ptr >= end) { + return false; + } + // Note: We don't check to see if cur is out of range here, + // meaning we tolerate garbage in the four high-order bits. + cur = *(ptr++); + result |= cur << 28; + } + } + } + } + *data = ptr; + *out = static_cast<uint32_t>(result); + return true; +} + // Reads an unsigned LEB128 + 1 value. updating the given pointer to point // just past the end of the read value. This function tolerates // non-zero high-order bits in the fifth encoded byte. @@ -97,6 +140,57 @@ static inline int32_t DecodeSignedLeb128(const uint8_t** data) { return result; } +static inline bool DecodeSignedLeb128Checked(const uint8_t** data, + const void* end, + int32_t* out) { + const uint8_t* ptr = *data; + if (ptr >= end) { + return false; + } + int32_t result = *(ptr++); + if (result <= 0x7f) { + result = (result << 25) >> 25; + } else { + if (ptr >= end) { + return false; + } + int cur = *(ptr++); + result = (result & 0x7f) | ((cur & 0x7f) << 7); + if (cur <= 0x7f) { + result = (result << 18) >> 18; + } else { + if (ptr >= end) { + return false; + } + cur = *(ptr++); + result |= (cur & 0x7f) << 14; + if (cur <= 0x7f) { + result = (result << 11) >> 11; + } else { + if (ptr >= end) { + return false; + } + cur = *(ptr++); + result |= (cur & 0x7f) << 21; + if (cur <= 0x7f) { + result = (result << 4) >> 4; + } else { + if (ptr >= end) { + return false; + } + // Note: We don't check to see if cur is out of range here, + // meaning we tolerate garbage in the four high-order bits. + cur = *(ptr++); + result |= cur << 28; + } + } + } + } + *data = ptr; + *out = static_cast<uint32_t>(result); + return true; +} + // Returns the number of bytes needed to encode the value in unsigned LEB128. static inline uint32_t UnsignedLeb128Size(uint32_t data) { // bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1 // 32 - CLZ(data | 1) diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h index be849a330c..df3865b1fe 100644 --- a/runtime/mirror/dex_cache-inl.h +++ b/runtime/mirror/dex_cache-inl.h @@ -24,6 +24,7 @@ #include "base/casts.h" #include "base/enums.h" #include "base/logging.h" +#include "gc_root.h" #include "mirror/class.h" #include "mirror/method_type.h" #include "runtime.h" @@ -159,6 +160,33 @@ inline void DexCache::SetElementPtrSize(PtrType* ptr_array, } } +template <typename T, + ReadBarrierOption kReadBarrierOption, + typename Visitor> +inline void VisitDexCachePairs(std::atomic<DexCachePair<T>>* pairs, + size_t num_pairs, + const Visitor& visitor) + REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) { + for (size_t i = 0; i < num_pairs; ++i) { + DexCachePair<T> source = pairs[i].load(std::memory_order_relaxed); + // NOTE: We need the "template" keyword here to avoid a compilation + // failure. GcRoot<T> is a template argument-dependent type and we need to + // tell the compiler to treat "Read" as a template rather than a field or + // function. Otherwise, on encountering the "<" token, the compiler would + // treat "Read" as a field. + T* before = source.object.template Read<kReadBarrierOption>(); + // TODO(narayan): This additional GC root construction and assignment + // is unnecessary. We're already operating on a copy of the DexCachePair + // that's in the cache. + GcRoot<T> root(before); + visitor.VisitRootIfNonNull(root.AddressWithoutBarrier()); + if (root.Read() != before) { + source.object = GcRoot<T>(root.Read()); + pairs[i].store(source, std::memory_order_relaxed); + } + } +} + template <bool kVisitNativeRoots, VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, @@ -168,21 +196,16 @@ inline void DexCache::VisitReferences(ObjPtr<Class> klass, const Visitor& visito VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor); // Visit arrays after. if (kVisitNativeRoots) { - mirror::StringDexCacheType* strings = GetStrings(); - for (size_t i = 0, num_strings = NumStrings(); i != num_strings; ++i) { - StringDexCachePair source = strings[i].load(std::memory_order_relaxed); - mirror::String* before = source.object.Read<kReadBarrierOption>(); - GcRoot<mirror::String> root(before); - visitor.VisitRootIfNonNull(root.AddressWithoutBarrier()); - if (root.Read() != before) { - source.object = GcRoot<String>(root.Read()); - strings[i].store(source, std::memory_order_relaxed); - } - } + VisitDexCachePairs<mirror::String, kReadBarrierOption, Visitor>( + GetStrings(), NumStrings(), visitor); + GcRoot<mirror::Class>* resolved_types = GetResolvedTypes(); for (size_t i = 0, num_types = NumResolvedTypes(); i != num_types; ++i) { visitor.VisitRootIfNonNull(resolved_types[i].AddressWithoutBarrier()); } + + VisitDexCachePairs<mirror::MethodType, kReadBarrierOption, Visitor>( + GetResolvedMethodTypes(), NumResolvedMethodTypes(), visitor); } } diff --git a/runtime/oat.h b/runtime/oat.h index 4d8687cebf..814a4934e7 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '0', '8', '9', '\0' }; + static constexpr uint8_t kOatVersion[] = { '0', '9', '0', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 6e15c38a53..bde41858cf 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -347,13 +347,13 @@ Runtime::~Runtime() { delete class_linker_; delete heap_; delete intern_table_; - delete java_vm_; delete oat_file_manager_; Thread::Shutdown(); QuasiAtomic::Shutdown(); verifier::MethodVerifier::Shutdown(); // Destroy allocators before shutting down the MemMap because they may use it. + java_vm_.reset(); linear_alloc_.reset(); low_4gb_arena_pool_.reset(); arena_pool_.reset(); @@ -1120,7 +1120,12 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { } } - java_vm_ = new JavaVMExt(this, runtime_options); + std::string error_msg; + java_vm_ = JavaVMExt::Create(this, runtime_options, &error_msg); + if (java_vm_.get() == nullptr) { + LOG(ERROR) << "Could not initialize JavaVMExt: " << error_msg; + return false; + } // Add the JniEnv handler. // TODO Refactor this stuff. @@ -1144,7 +1149,6 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { CHECK_GE(GetHeap()->GetContinuousSpaces().size(), 1U); class_linker_ = new ClassLinker(intern_table_); if (GetHeap()->HasBootImageSpace()) { - std::string error_msg; bool result = class_linker_->InitFromBootImage(&error_msg); if (!result) { LOG(ERROR) << "Could not initialize from image: " << error_msg; @@ -1191,7 +1195,6 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { &boot_class_path); } instruction_set_ = runtime_options.GetOrDefault(Opt::ImageInstructionSet); - std::string error_msg; if (!class_linker_->InitWithoutImage(std::move(boot_class_path), &error_msg)) { LOG(ERROR) << "Could not initialize without image: " << error_msg; return false; diff --git a/runtime/runtime.h b/runtime/runtime.h index e2ba2626e9..7cb87abe30 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -277,7 +277,7 @@ class Runtime { } JavaVMExt* GetJavaVM() const { - return java_vm_; + return java_vm_.get(); } size_t GetMaxSpinsBeforeThinkLockInflation() const { @@ -757,7 +757,7 @@ class Runtime { SignalCatcher* signal_catcher_; std::string stack_trace_file_; - JavaVMExt* java_vm_; + std::unique_ptr<JavaVMExt> java_vm_; std::unique_ptr<jit::Jit> jit_; std::unique_ptr<jit::JitOptions> jit_options_; diff --git a/runtime/thread.cc b/runtime/thread.cc index 39fe8d09c1..e47ccc062b 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -606,8 +606,9 @@ void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_siz // Try to allocate a JNIEnvExt for the thread. We do this here as we might be out of memory and // do not have a good way to report this on the child's side. + std::string error_msg; std::unique_ptr<JNIEnvExt> child_jni_env_ext( - JNIEnvExt::Create(child_thread, Runtime::Current()->GetJavaVM())); + JNIEnvExt::Create(child_thread, Runtime::Current()->GetJavaVM(), &error_msg)); int pthread_create_result = 0; if (child_jni_env_ext.get() != nullptr) { @@ -648,7 +649,7 @@ void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_siz env->SetLongField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer, 0); { std::string msg(child_jni_env_ext.get() == nullptr ? - "Could not allocate JNI Env" : + StringPrintf("Could not allocate JNI Env: %s", error_msg.c_str()) : StringPrintf("pthread_create (%s stack) failed: %s", PrettySize(stack_size).c_str(), strerror(pthread_create_result))); ScopedObjectAccess soa(env); @@ -693,8 +694,10 @@ bool Thread::Init(ThreadList* thread_list, JavaVMExt* java_vm, JNIEnvExt* jni_en DCHECK_EQ(jni_env_ext->self, this); tlsPtr_.jni_env = jni_env_ext; } else { - tlsPtr_.jni_env = JNIEnvExt::Create(this, java_vm); + std::string error_msg; + tlsPtr_.jni_env = JNIEnvExt::Create(this, java_vm, &error_msg); if (tlsPtr_.jni_env == nullptr) { + LOG(ERROR) << "Failed to create JNIEnvExt: " << error_msg; return false; } } diff --git a/runtime/thread.h b/runtime/thread.h index 6f5913e6b3..24038f5475 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -1392,7 +1392,7 @@ class Thread { stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr), frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0), last_no_thread_suspension_cause(nullptr), checkpoint_function(nullptr), - thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr), + thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_start(nullptr), thread_local_objects(0), mterp_current_ibase(nullptr), mterp_default_ibase(nullptr), mterp_alt_ibase(nullptr), thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr), nested_signal_state(nullptr), @@ -1506,12 +1506,13 @@ class Thread { JniEntryPoints jni_entrypoints; QuickEntryPoints quick_entrypoints; - // Thread-local allocation pointer. - uint8_t* thread_local_start; // thread_local_pos and thread_local_end must be consecutive for ldrd and are 8 byte aligned for // potentially better performance. uint8_t* thread_local_pos; uint8_t* thread_local_end; + // Thread-local allocation pointer. + uint8_t* thread_local_start; + size_t thread_local_objects; // Mterp jump table bases. diff --git a/test/151-OpenFileLimit/expected.txt b/test/151-OpenFileLimit/expected.txt new file mode 100644 index 0000000000..971e472bff --- /dev/null +++ b/test/151-OpenFileLimit/expected.txt @@ -0,0 +1,3 @@ +Message includes "Too many open files" +Message includes "Too many open files" +done. diff --git a/test/151-OpenFileLimit/info.txt b/test/151-OpenFileLimit/info.txt new file mode 100644 index 0000000000..56ed3963f4 --- /dev/null +++ b/test/151-OpenFileLimit/info.txt @@ -0,0 +1,3 @@ +This test verifies the exception message is informative for failure to launch +a thread due to the number of available file descriptors in the process being +exceeded. diff --git a/test/151-OpenFileLimit/src/Main.java b/test/151-OpenFileLimit/src/Main.java new file mode 100644 index 0000000000..01a9a4ed34 --- /dev/null +++ b/test/151-OpenFileLimit/src/Main.java @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import static java.nio.file.StandardOpenOption.*; +import java.nio.file.*; +import java.io.*; +import java.util.*; + +public class Main { + private static final String TEMP_FILE_NAME_PREFIX = "oflimit"; + private static final String TEMP_FILE_NAME_SUFFIX = ".txt"; + + public static void main(String[] args) throws IOException { + + // Exhaust the number of open file descriptors. + List<File> files = new ArrayList<File>(); + List<OutputStream> streams = new ArrayList<OutputStream>(); + try { + for (int i = 0; ; i++) { + File file = createTempFile(); + files.add(file); + streams.add(Files.newOutputStream(file.toPath(), CREATE, APPEND)); + } + } catch (Throwable e) { + if (e.getMessage().contains("Too many open files")) { + System.out.println("Message includes \"Too many open files\""); + } else { + System.out.println(e.getMessage()); + } + } + + // Now try to create a new thread. + try { + Thread thread = new Thread() { + public void run() { + System.out.println("thread run."); + } + }; + thread.start(); + thread.join(); + } catch (Throwable e) { + if (e.getMessage().contains("Too many open files")) { + System.out.println("Message includes \"Too many open files\""); + } else { + System.out.println(e.getMessage()); + } + } + + for (int i = 0; i < files.size(); i++) { + streams.get(i).close(); + files.get(i).delete(); + } + System.out.println("done."); + } + + private static File createTempFile() throws Exception { + try { + return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX); + } catch (IOException e) { + System.setProperty("java.io.tmpdir", "/data/local/tmp"); + try { + return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX); + } catch (IOException e2) { + System.setProperty("java.io.tmpdir", "/sdcard"); + return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX); + } + } + } +} diff --git a/test/956-methodhandles/build b/test/956-methodhandles/build new file mode 100755 index 0000000000..613e97c711 --- /dev/null +++ b/test/956-methodhandles/build @@ -0,0 +1,28 @@ +#!/bin/bash +# +# Copyright 2016 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# make us exit on a failure +set -e + +if [[ $@ != *"--jvm"* ]]; then + # Don't do anything with jvm. + export USE_JACK=true + export JACK_SERVER=false + export JACK_REPOSITORY="${ANDROID_BUILD_TOP}/prebuilts/sdk/tools/jacks" + export JACK_VERSION=4.11.BETA +fi + +./default-build "$@" --experimental method-handles diff --git a/test/956-methodhandles/expected.txt b/test/956-methodhandles/expected.txt new file mode 100644 index 0000000000..ddc1cb013e --- /dev/null +++ b/test/956-methodhandles/expected.txt @@ -0,0 +1,5 @@ +foo_A +foo_A +foo_A +foo_B +privateRyan_D diff --git a/test/956-methodhandles/info.txt b/test/956-methodhandles/info.txt new file mode 100644 index 0000000000..f1dbb61640 --- /dev/null +++ b/test/956-methodhandles/info.txt @@ -0,0 +1,3 @@ +Tests for method handle invocations. + +NOTE: needs to run under ART or a Java 8 Language runtime and compiler. diff --git a/test/956-methodhandles/run b/test/956-methodhandles/run new file mode 100755 index 0000000000..a9f182288c --- /dev/null +++ b/test/956-methodhandles/run @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Copyright 2016 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# make us exit on a failure +set -e + +./default-run "$@" --experimental method-handles diff --git a/test/956-methodhandles/src/Main.java b/test/956-methodhandles/src/Main.java new file mode 100644 index 0000000000..2802dfa4cc --- /dev/null +++ b/test/956-methodhandles/src/Main.java @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.MethodHandles.Lookup; +import java.lang.invoke.MethodType; +import java.lang.invoke.WrongMethodTypeException; + +public class Main { + + public static class A { + public void foo() { + System.out.println("foo_A"); + } + + public static final Lookup lookup = MethodHandles.lookup(); + } + + public static class B extends A { + public void foo() { + System.out.println("foo_B"); + } + + public static final Lookup lookup = MethodHandles.lookup(); + } + + public static class C extends B { + public static final Lookup lookup = MethodHandles.lookup(); + } + + public static class D { + private final void privateRyan() { + System.out.println("privateRyan_D"); + } + + public static final Lookup lookup = MethodHandles.lookup(); + } + + public static class E extends D { + public static final Lookup lookup = MethodHandles.lookup(); + } + + public static void main(String[] args) throws Throwable { + testfindSpecial_invokeSuperBehaviour(); + testfindSpecial_invokeDirectBehaviour(); + } + + public static void testfindSpecial_invokeSuperBehaviour() throws Throwable { + // This is equivalent to an invoke-super instruction where the referrer + // is B.class. + MethodHandle mh1 = B.lookup.findSpecial(A.class /* refC */, "foo", + MethodType.methodType(void.class), B.class /* specialCaller */); + + A aInstance = new A(); + B bInstance = new B(); + C cInstance = new C(); + + // This should be as if an invoke-super was called from one of B's methods. + mh1.invokeExact(bInstance); + mh1.invoke(bInstance); + + // This should not work. The receiver type in the handle will be suitably + // restricted to B and subclasses. + try { + mh1.invoke(aInstance); + System.out.println("mh1.invoke(aInstance) should not succeeed"); + } catch (ClassCastException expected) { + } + + try { + mh1.invokeExact(aInstance); + System.out.println("mh1.invoke(aInstance) should not succeeed"); + } catch (WrongMethodTypeException expected) { + } catch (ClassCastException workaround) { + // TODO(narayan): ART treats all invokes as if they were non-exact. We + // should throw a WMTE if we execute an invoke-polymorphic instruction whose + // target method is MethodHandle.invokeExact. + } + + // This should *still* be as if an invoke-super was called from one of C's + // methods, despite the fact that we're operating on a C. + mh1.invoke(cInstance); + + // Now that C is the special caller, the next invoke will call B.foo. + MethodHandle mh2 = C.lookup.findSpecial(A.class /* refC */, "foo", + MethodType.methodType(void.class), C.class /* specialCaller */); + mh2.invokeExact(cInstance); + + // Shouldn't allow invoke-super semantics from an unrelated special caller. + try { + C.lookup.findSpecial(A.class, "foo", + MethodType.methodType(void.class), D.class /* specialCaller */); + System.out.println("findSpecial(A.class, foo, .. D.class) unexpectedly succeeded."); + } catch (IllegalAccessException expected) { + } + } + + public static void testfindSpecial_invokeDirectBehaviour() throws Throwable { + D dInstance = new D(); + + MethodHandle mh3 = D.lookup.findSpecial(D.class, "privateRyan", + MethodType.methodType(void.class), D.class /* specialCaller */); + mh3.invoke(dInstance); + + // The private method shouldn't be accessible from any special caller except + // itself... + try { + D.lookup.findSpecial(D.class, "privateRyan", MethodType.methodType(void.class), C.class); + System.out.println("findSpecial(privateRyan, C.class) unexpectedly succeeded"); + } catch (IllegalAccessException expected) { + } + + // ... or from any lookup context except its own. + try { + E.lookup.findSpecial(D.class, "privateRyan", MethodType.methodType(void.class), E.class); + System.out.println("findSpecial(privateRyan, E.class) unexpectedly succeeded"); + } catch (IllegalAccessException expected) { + } + } +} + + diff --git a/test/Android.arm_vixl.mk b/test/Android.arm_vixl.mk new file mode 100644 index 0000000000..0bbcb64327 --- /dev/null +++ b/test/Android.arm_vixl.mk @@ -0,0 +1,397 @@ +# +# Copyright (C) 2016 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Known broken tests for the ARM VIXL backend. +TEST_ART_BROKEN_OPTIMIZING_ARM_VIXL_RUN_TESTS := \ + 002-sleep \ + 003-omnibus-opcodes \ + 004-InterfaceTest \ + 004-JniTest \ + 004-NativeAllocations \ + 004-ReferenceMap \ + 004-SignalTest \ + 004-StackWalk \ + 004-ThreadStress \ + 004-UnsafeTest \ + 004-checker-UnsafeTest18 \ + 005-annotations \ + 006-args \ + 008-exceptions \ + 009-instanceof \ + 011-array-copy \ + 012-math \ + 015-switch \ + 017-float \ + 018-stack-overflow \ + 019-wrong-array-type \ + 020-string \ + 021-string2 \ + 022-interface \ + 023-many-interfaces \ + 024-illegal-access \ + 025-access-controller \ + 027-arithmetic \ + 028-array-write \ + 031-class-attributes \ + 032-concrete-sub \ + 035-enum \ + 036-finalizer \ + 037-inherit \ + 041-narrowing \ + 042-new-instance \ + 043-privates \ + 044-proxy \ + 045-reflect-array \ + 046-reflect \ + 047-returns \ + 048-reflect-v8 \ + 049-show-object \ + 050-sync-test \ + 051-thread \ + 052-verifier-fun \ + 053-wait-some \ + 054-uncaught \ + 058-enum-order \ + 059-finalizer-throw \ + 061-out-of-memory \ + 062-character-encodings \ + 063-process-manager \ + 064-field-access \ + 065-mismatched-implements \ + 066-mismatched-super \ + 067-preemptive-unpark \ + 068-classloader \ + 069-field-type \ + 070-nio-buffer \ + 071-dexfile \ + 072-precise-gc \ + 074-gc-thrash \ + 075-verification-error \ + 076-boolean-put \ + 079-phantom \ + 080-oom-throw \ + 080-oom-throw-with-finalizer \ + 081-hot-exceptions \ + 082-inline-execute \ + 083-compiler-regressions \ + 086-null-super \ + 087-gc-after-link \ + 088-monitor-verification \ + 090-loop-formation \ + 091-override-package-private-method \ + 093-serialization \ + 094-pattern \ + 096-array-copy-concurrent-gc \ + 098-ddmc \ + 099-vmdebug \ + 100-reflect2 \ + 101-fibonacci \ + 102-concurrent-gc \ + 103-string-append \ + 104-growth-limit \ + 106-exceptions2 \ + 107-int-math2 \ + 108-check-cast \ + 109-suspend-check \ + 112-double-math \ + 113-multidex \ + 114-ParallelGC \ + 117-nopatchoat \ + 119-noimage-patchoat \ + 120-hashcode \ + 121-modifiers \ + 122-npe \ + 123-compiler-regressions-mt \ + 123-inline-execute2 \ + 127-checker-secondarydex \ + 129-ThreadGetId \ + 131-structural-change \ + 132-daemon-locks-shutdown \ + 133-static-invoke-super \ + 134-reg-promotion \ + 135-MirandaDispatch \ + 136-daemon-jni-shutdown \ + 137-cfi \ + 138-duplicate-classes-check2 \ + 139-register-natives \ + 140-field-packing \ + 141-class-unload \ + 142-classloader2 \ + 144-static-field-sigquit \ + 145-alloc-tracking-stress \ + 146-bad-interface \ + 150-loadlibrary \ + 201-built-in-except-detail-messages \ + 302-float-conversion \ + 304-method-tracing \ + 403-optimizing-long \ + 404-optimizing-allocator \ + 405-optimizing-long-allocator \ + 406-fields \ + 407-arrays \ + 410-floats \ + 411-optimizing-arith-mul \ + 412-new-array \ + 413-regalloc-regression \ + 414-optimizing-arith-sub \ + 414-static-fields \ + 415-optimizing-arith-neg \ + 416-optimizing-arith-not \ + 417-optimizing-arith-div \ + 419-long-parameter \ + 421-exceptions \ + 421-large-frame \ + 422-instanceof \ + 422-type-conversion \ + 423-invoke-interface \ + 424-checkcast \ + 425-invoke-super \ + 426-monitor \ + 427-bitwise \ + 427-bounds \ + 428-optimizing-arith-rem \ + 429-ssa-builder \ + 430-live-register-slow-path \ + 431-optimizing-arith-shifts \ + 431-type-propagation \ + 432-optimizing-cmp \ + 434-invoke-direct \ + 436-rem-float \ + 436-shift-constant \ + 437-inline \ + 438-volatile \ + 439-npe \ + 439-swap-double \ + 440-stmp \ + 441-checker-inliner \ + 442-checker-constant-folding \ + 444-checker-nce \ + 445-checker-licm \ + 446-checker-inliner2 \ + 447-checker-inliner3 \ + 448-multiple-returns \ + 449-checker-bce \ + 450-checker-types \ + 451-regression-add-float \ + 451-spill-splot \ + 452-multiple-returns2 \ + 453-not-byte \ + 454-get-vreg \ + 456-baseline-array-set \ + 457-regs \ + 458-checker-instruct-simplification \ + 458-long-to-fpu \ + 459-dead-phi \ + 460-multiple-returns3 \ + 461-get-reference-vreg \ + 462-checker-inlining-dex-files \ + 463-checker-boolean-simplifier \ + 464-checker-inline-sharpen-calls \ + 466-get-live-vreg \ + 467-regalloc-pair \ + 468-checker-bool-simplif-regression \ + 469-condition-materialization \ + 470-huge-method \ + 471-deopt-environment \ + 472-type-propagation \ + 473-checker-inliner-constants \ + 474-checker-boolean-input \ + 474-fp-sub-neg \ + 475-regression-inliner-ids \ + 476-checker-ctor-memory-barrier \ + 477-checker-bound-type \ + 477-long-2-float-convers-precision \ + 478-checker-clinit-check-pruning \ + 478-checker-inliner-nested-loop \ + 480-checker-dead-blocks \ + 482-checker-loop-back-edge-use \ + 483-dce-block \ + 484-checker-register-hints \ + 485-checker-dce-loop-update \ + 485-checker-dce-switch \ + 486-checker-must-do-null-check \ + 487-checker-inline-calls \ + 488-checker-inline-recursive-calls \ + 490-checker-inline \ + 491-current-method \ + 492-checker-inline-invoke-interface \ + 493-checker-inline-invoke-interface \ + 494-checker-instanceof-tests \ + 495-checker-checkcast-tests \ + 496-checker-inlining-class-loader \ + 497-inlining-and-class-loader \ + 498-type-propagation \ + 499-bce-phi-array-length \ + 500-instanceof \ + 501-null-constant-dce \ + 501-regression-packed-switch \ + 503-dead-instructions \ + 504-regression-baseline-entry \ + 508-checker-disassembly \ + 510-checker-try-catch \ + 513-array-deopt \ + 514-shifts \ + 515-dce-dominator \ + 517-checker-builder-fallthrough \ + 518-null-array-get \ + 519-bound-load-class \ + 520-equivalent-phi \ + 521-checker-array-set-null \ + 521-regression-integer-field-set \ + 522-checker-regression-monitor-exit \ + 523-checker-can-throw-regression \ + 525-checker-arrays-fields1 \ + 525-checker-arrays-fields2 \ + 526-checker-caller-callee-regs \ + 526-long-regalloc \ + 527-checker-array-access-split \ + 528-long-hint \ + 529-checker-unresolved \ + 529-long-split \ + 530-checker-loops1 \ + 530-checker-loops2 \ + 530-checker-loops3 \ + 530-checker-lse \ + 530-checker-regression-reftyp-final \ + 530-instanceof-checkcast \ + 532-checker-nonnull-arrayset \ + 534-checker-bce-deoptimization \ + 535-deopt-and-inlining \ + 535-regression-const-val \ + 536-checker-intrinsic-optimization \ + 536-checker-needs-access-check \ + 537-checker-arraycopy \ + 537-checker-inline-and-unverified \ + 537-checker-jump-over-jump \ + 538-checker-embed-constants \ + 540-checker-rtp-bug \ + 541-regression-inlined-deopt \ + 542-bitfield-rotates \ + 542-unresolved-access-check \ + 543-checker-dce-trycatch \ + 543-env-long-ref \ + 545-tracing-and-jit \ + 546-regression-simplify-catch \ + 548-checker-inlining-and-dce \ + 550-checker-multiply-accumulate \ + 550-checker-regression-wide-store \ + 551-checker-clinit \ + 551-checker-shifter-operand \ + 551-implicit-null-checks \ + 551-invoke-super \ + 552-checker-primitive-typeprop \ + 552-checker-sharpening \ + 552-invoke-non-existent-super \ + 553-invoke-super \ + 554-checker-rtp-checkcast \ + 555-UnsafeGetLong-regression \ + 556-invoke-super \ + 557-checker-instruct-simplifier-ror \ + 558-switch \ + 559-bce-ssa \ + 559-checker-irreducible-loop \ + 559-checker-rtp-ifnotnull \ + 560-packed-switch \ + 561-divrem \ + 561-shared-slowpaths \ + 562-bce-preheader \ + 562-no-intermediate \ + 563-checker-fakestring \ + 564-checker-bitcount \ + 564-checker-inline-loop \ + 564-checker-irreducible-loop \ + 564-checker-negbitwise \ + 565-checker-condition-liveness \ + 565-checker-doublenegbitwise \ + 565-checker-irreducible-loop \ + 565-checker-rotate \ + 566-checker-codegen-select \ + 566-checker-signum \ + 566-polymorphic-inlining \ + 567-checker-compare \ + 568-checker-onebit \ + 570-checker-osr \ + 570-checker-select \ + 571-irreducible-loop \ + 572-checker-array-get-regression \ + 573-checker-checkcast-regression \ + 574-irreducible-and-constant-area \ + 575-checker-isnan \ + 575-checker-string-init-alias \ + 577-checker-fp2int \ + 578-bce-visit \ + 580-checker-round \ + 580-checker-string-fact-intrinsics \ + 581-rtp \ + 582-checker-bce-length \ + 583-checker-zero \ + 584-checker-div-bool \ + 586-checker-null-array-get \ + 587-inline-class-error \ + 588-checker-irreducib-lifetime-hole \ + 589-super-imt \ + 590-checker-arr-set-null-regression \ + 591-new-instance-string \ + 592-checker-regression-bool-input \ + 593-checker-boolean-2-integral-conv \ + 593-checker-long-2-float-regression \ + 593-checker-shift-and-simplifier \ + 594-checker-array-alias \ + 594-invoke-super \ + 594-load-string-regression \ + 595-error-class \ + 596-checker-dead-phi \ + 597-deopt-new-string \ + 599-checker-irreducible-loop \ + 600-verifier-fails \ + 601-method-access \ + 602-deoptimizeable \ + 603-checker-instanceof \ + 604-hot-static-interface \ + 605-new-string-from-bytes \ + 608-checker-unresolved-lse \ + 609-checker-inline-interface \ + 609-checker-x86-bounds-check \ + 610-arraycopy \ + 611-checker-simplify-if \ + 612-jit-dex-cache \ + 613-inlining-dex-cache \ + 614-checker-dump-constant-location \ + 615-checker-arm64-store-zero \ + 617-clinit-oome \ + 618-checker-induction \ + 700-LoadArgRegs \ + 701-easy-div-rem \ + 702-LargeBranchOffset \ + 703-floating-point-div \ + 704-multiply-accumulate \ + 705-register-conflict \ + 800-smali \ + 802-deoptimization \ + 960-default-smali \ + 961-default-iface-resolution-gen \ + 963-default-range-smali \ + 965-default-verify \ + 966-default-conflict \ + 967-default-ame \ + 968-default-partial-compile-gen \ + 969-iface-super \ + 971-iface-super \ + 972-default-imt-collision \ + 972-iface-super-multidex \ + 973-default-multidex \ + 974-verify-interface-super \ + 975-iface-private diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index 3bf7e4b626..c99510a537 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -232,9 +232,11 @@ ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), # Disable 149-suspend-all-stress, its output is flaky (b/28988206). +# Disable 151-OpenFileLimit (b/32302133) # Disable 577-profile-foreign-dex (b/27454772). TEST_ART_BROKEN_ALL_TARGET_TESTS := \ 149-suspend-all-stress \ + 151-OpenFileLimit \ 577-profile-foreign-dex \ ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ @@ -516,6 +518,26 @@ ifneq (,$(filter regalloc_gc,$(COMPILER_TYPES))) $(TEST_ART_BROKEN_OPTIMIZING_GRAPH_COLOR),$(ALL_ADDRESS_SIZES)) endif +# Known broken tests for the ARM VIXL backend. +# Android.arm_vixl.mk defines TEST_ART_BROKEN_OPTIMIZING_ARM_VIXL_RUN_TESTS. +include $(LOCAL_PATH)/Android.arm_vixl.mk + +ifdef ART_USE_VIXL_ARM_BACKEND + ifeq (arm,$(filter arm,$(TARGET_ARCH) $(TARGET_2ND_ARCH))) + ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES))) + ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \ + $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ + $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ + $(TEST_ART_BROKEN_OPTIMIZING_ARM_VIXL_RUN_TESTS),32) + endif + endif + # TODO(VIXL): These two tests currently fail, but adding them to `ART_TEST_KNOWN_BROKEN` breaks + # `export ART_USE_VIXL_ARM_BACKEND=true && mma -j6 test-art-target-gtest dist` + #ART_TEST_KNOWN_BROKEN += test-art-target-gtest-dex2oat_test32 + #ART_TEST_KNOWN_BROKEN += test-art-target-gtest-image_test32 +endif + + # Known broken tests for the mips32 optimizing compiler backend. TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \ |