diff options
87 files changed, 3006 insertions, 770 deletions
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk index bac0ff36fe..0cd90c97a6 100644 --- a/build/Android.common_build.mk +++ b/build/Android.common_build.mk @@ -252,6 +252,7 @@ art_non_debug_cflags := \ art_debug_cflags := \ $(ART_DEBUG_OPT_FLAG) \ -DDYNAMIC_ANNOTATIONS_ENABLED=1 \ + -DVIXL_DEBUG \ -UNDEBUG # Assembler flags for non-debug ART and ART tools. @@ -263,20 +264,46 @@ art_debug_asflags := -UNDEBUG art_host_non_debug_cflags := $(art_non_debug_cflags) art_target_non_debug_cflags := $(art_non_debug_cflags) +### +# Frame size +### + +# Size of the stack-overflow gap. +ART_STACK_OVERFLOW_GAP_arm := 8192 +ART_STACK_OVERFLOW_GAP_arm64 := 8192 +ART_STACK_OVERFLOW_GAP_mips := 16384 +ART_STACK_OVERFLOW_GAP_mips64 := 16384 +ART_STACK_OVERFLOW_GAP_x86 := 8192 +ART_STACK_OVERFLOW_GAP_x86_64 := 8192 +ART_COMMON_STACK_OVERFLOW_DEFINES := \ + -DART_STACK_OVERFLOW_GAP_arm=$(ART_STACK_OVERFLOW_GAP_arm) \ + -DART_STACK_OVERFLOW_GAP_arm64=$(ART_STACK_OVERFLOW_GAP_arm64) \ + -DART_STACK_OVERFLOW_GAP_mips=$(ART_STACK_OVERFLOW_GAP_mips) \ + -DART_STACK_OVERFLOW_GAP_mips64=$(ART_STACK_OVERFLOW_GAP_mips64) \ + -DART_STACK_OVERFLOW_GAP_x86=$(ART_STACK_OVERFLOW_GAP_x86) \ + -DART_STACK_OVERFLOW_GAP_x86_64=$(ART_STACK_OVERFLOW_GAP_x86_64) \ + +# Keep these as small as possible. We have separate values as we have some host vs target +# specific code (and previously GCC vs Clang). +ART_HOST_FRAME_SIZE_LIMIT := 1736 +ART_TARGET_FRAME_SIZE_LIMIT := 1736 + +# Frame size adaptations for instrumented builds. +ifdef SANITIZE_TARGET + ART_TARGET_FRAME_SIZE_LIMIT := 6400 +endif + +# Add frame-size checks for non-debug builds. ifeq ($(HOST_OS),linux) - # Larger frame-size for host clang builds today ifneq ($(ART_COVERAGE),true) ifneq ($(NATIVE_COVERAGE),true) - art_host_non_debug_cflags += -Wframe-larger-than=2700 - ifdef SANITIZE_TARGET - art_target_non_debug_cflags += -Wframe-larger-than=6400 - else - art_target_non_debug_cflags += -Wframe-larger-than=1736 - endif + art_host_non_debug_cflags += -Wframe-larger-than=$(ART_HOST_FRAME_SIZE_LIMIT) + art_target_non_debug_cflags += -Wframe-larger-than=$(ART_TARGET_FRAME_SIZE_LIMIT) endif endif endif + ART_HOST_CFLAGS := $(art_cflags) ART_TARGET_CFLAGS := $(art_cflags) @@ -293,6 +320,10 @@ endif ART_HOST_CFLAGS += -DART_BASE_ADDRESS=$(LIBART_IMG_HOST_BASE_ADDRESS) ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default $(art_host_cflags) +ART_HOST_CFLAGS += -DART_FRAME_SIZE_LIMIT=$(ART_HOST_FRAME_SIZE_LIMIT) \ + $(ART_COMMON_STACK_OVERFLOW_DEFINES) + + ifndef LIBART_IMG_TARGET_BASE_ADDRESS $(error LIBART_IMG_TARGET_BASE_ADDRESS unset) endif @@ -300,6 +331,9 @@ endif ART_TARGET_CFLAGS += -DART_TARGET \ -DART_BASE_ADDRESS=$(LIBART_IMG_TARGET_BASE_ADDRESS) \ +ART_TARGET_CFLAGS += -DART_FRAME_SIZE_LIMIT=$(ART_TARGET_FRAME_SIZE_LIMIT) \ + $(ART_COMMON_STACK_OVERFLOW_DEFINES) + ifeq ($(ART_TARGET_LINUX),true) # Setting ART_TARGET_LINUX to true compiles art/ assuming that the target device # will be running linux rather than android. diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index c79205fca6..3d07fc0ca8 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -634,7 +634,7 @@ define define-art-gtest ifeq ($$(art_target_or_host),target) $$(eval LOCAL_CLANG := $$(ART_TARGET_CLANG)) $$(eval $$(call set-target-local-cflags-vars,debug)) - LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixl-arm64 + LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixld-arm64 LOCAL_MODULE_PATH_32 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_32) LOCAL_MODULE_PATH_64 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_64) LOCAL_MULTILIB := both @@ -678,7 +678,7 @@ valgrind-test-art-target-gtest-$$(art_gtest_name): $$(ART_TEST_TARGET_VALGRIND_G LOCAL_CLANG := $$(ART_HOST_CLANG) LOCAL_CFLAGS += $$(ART_HOST_CFLAGS) $$(ART_HOST_DEBUG_CFLAGS) LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS) $$(ART_HOST_DEBUG_ASFLAGS) - LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libziparchive-host libz-host libvixl-arm64 + LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libziparchive-host libz-host libvixld-arm64 LOCAL_LDLIBS := -lpthread -ldl LOCAL_IS_HOST_MODULE := true LOCAL_MULTILIB := both diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h index 1146f958ca..b57383b963 100644 --- a/cmdline/cmdline_types.h +++ b/cmdline/cmdline_types.h @@ -24,14 +24,16 @@ // Includes for the types that are being specialized #include <string> -#include "unit.h" -#include "jdwp/jdwp.h" #include "base/logging.h" #include "base/time_utils.h" #include "experimental_flags.h" #include "gc/collector_type.h" #include "gc/space/large_object_space.h" +#include "jdwp/jdwp.h" #include "jit/profile_saver_options.h" +#include "plugin.h" +#include "ti/agent.h" +#include "unit.h" namespace art { @@ -381,6 +383,38 @@ struct CmdlineType<std::string> : CmdlineTypeParser<std::string> { }; template <> +struct CmdlineType<std::vector<Plugin>> : CmdlineTypeParser<std::vector<Plugin>> { + Result Parse(const std::string& args) { + assert(false && "Use AppendValues() for a Plugin vector type"); + return Result::Failure("Unconditional failure: Plugin vector must be appended: " + args); + } + + Result ParseAndAppend(const std::string& args, + std::vector<Plugin>& existing_value) { + existing_value.push_back(Plugin::Create(args)); + return Result::SuccessNoValue(); + } + + static const char* Name() { return "std::vector<Plugin>"; } +}; + +template <> +struct CmdlineType<std::vector<ti::Agent>> : CmdlineTypeParser<std::vector<ti::Agent>> { + Result Parse(const std::string& args) { + assert(false && "Use AppendValues() for an Agent vector type"); + return Result::Failure("Unconditional failure: Agent vector must be appended: " + args); + } + + Result ParseAndAppend(const std::string& args, + std::vector<ti::Agent>& existing_value) { + existing_value.push_back(ti::Agent::Create(args)); + return Result::SuccessNoValue(); + } + + static const char* Name() { return "std::vector<ti::Agent>"; } +}; + +template <> struct CmdlineType<std::vector<std::string>> : CmdlineTypeParser<std::vector<std::string>> { Result Parse(const std::string& args) { assert(false && "Use AppendValues() for a string vector type"); @@ -625,6 +659,8 @@ struct CmdlineType<LogVerbosity> : CmdlineTypeParser<LogVerbosity> { log_verbosity.image = true; } else if (verbose_options[j] == "systrace-locks") { log_verbosity.systrace_lock_logging = true; + } else if (verbose_options[j] == "agents") { + log_verbosity.agents = true; } else { return Result::Usage(std::string("Unknown -verbose option ") + verbose_options[j]); } @@ -735,6 +771,10 @@ struct CmdlineType<ExperimentalFlags> : CmdlineTypeParser<ExperimentalFlags> { Result ParseAndAppend(const std::string& option, ExperimentalFlags& existing) { if (option == "none") { existing = ExperimentalFlags::kNone; + } else if (option == "agents") { + existing = existing | ExperimentalFlags::kAgents; + } else if (option == "runtime-plugins") { + existing = existing | ExperimentalFlags::kRuntimePlugins; } else { return Result::Failure(std::string("Unknown option '") + option + "'"); } diff --git a/cmdline/detail/cmdline_parse_argument_detail.h b/cmdline/detail/cmdline_parse_argument_detail.h index 4b56804ea6..84beff59c7 100644 --- a/cmdline/detail/cmdline_parse_argument_detail.h +++ b/cmdline/detail/cmdline_parse_argument_detail.h @@ -497,7 +497,7 @@ namespace art { std::function<void(TArg&)> save_argument_; std::function<TArg&(void)> load_argument_; }; - } // namespace detail // NOLINT [readability/namespace] [5] [whitespace/comments] [2] + } // namespace detail // NOLINT [readability/namespace] [5] } // namespace art #endif // ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_ diff --git a/cmdline/detail/cmdline_parser_detail.h b/cmdline/detail/cmdline_parser_detail.h index 9b43bb0f5d..24dbca2642 100644 --- a/cmdline/detail/cmdline_parser_detail.h +++ b/cmdline/detail/cmdline_parser_detail.h @@ -35,7 +35,7 @@ namespace art { private: template <typename TStream, typename T> static std::true_type InsertionOperatorTest(TStream& os, const T& value, - std::remove_reference<decltype(os << value)>* = 0); // NOLINT [whitespace/operators] [3] + std::remove_reference<decltype(os << value)>* = 0); // NOLINT [whitespace/operators] [3] template <typename TStream, typename ... T> static std::false_type InsertionOperatorTest(TStream& os, const T& ... args); @@ -53,7 +53,7 @@ namespace art { private: template <typename TL, typename TR> static std::true_type EqualityOperatorTest(const TL& left, const TR& right, - std::remove_reference<decltype(left == right)>* = 0); // NOLINT [whitespace/operators] [3] + std::remove_reference<decltype(left == right)>* = 0); // NOLINT [whitespace/operators] [3] template <typename TL, typename ... T> static std::false_type EqualityOperatorTest(const TL& left, const T& ... args); diff --git a/compiler/Android.mk b/compiler/Android.mk index 0ede30d03a..6c6d99f616 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -284,12 +284,12 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT endif LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk - # Vixl assembly support for ARM64 targets. + # VIXL assembly support for ARM64 targets. ifeq ($$(art_ndebug_or_debug),debug) ifeq ($$(art_static_or_shared), static) - LOCAL_WHOLESTATIC_LIBRARIES += libvixl-arm64 + LOCAL_WHOLESTATIC_LIBRARIES += libvixld-arm64 else - LOCAL_SHARED_LIBRARIES += libvixl-arm64 + LOCAL_SHARED_LIBRARIES += libvixld-arm64 endif else ifeq ($$(art_static_or_shared), static) diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 828603398b..d0a8335a99 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -77,10 +77,6 @@ namespace art { static constexpr bool kTimeCompileMethod = !kIsDebugBuild; -// Whether classes-to-compile and methods-to-compile are only applied to the boot image, or, when -// given, too all compilations. -static constexpr bool kRestrictCompilationFiltersToImage = true; - // Print additional info during profile guided compilation. static constexpr bool kDebugProfileGuidedCompilation = false; @@ -946,10 +942,6 @@ bool CompilerDriver::IsImageClass(const char* descriptor) const { } bool CompilerDriver::IsClassToCompile(const char* descriptor) const { - if (kRestrictCompilationFiltersToImage && !IsBootImage()) { - return true; - } - if (classes_to_compile_ == nullptr) { return true; } @@ -957,10 +949,6 @@ bool CompilerDriver::IsClassToCompile(const char* descriptor) const { } bool CompilerDriver::IsMethodToCompile(const MethodReference& method_ref) const { - if (kRestrictCompilationFiltersToImage && !IsBootImage()) { - return true; - } - if (methods_to_compile_ == nullptr) { return true; } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index ab85c12a1d..4c4128c5f8 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -59,8 +59,8 @@ static constexpr DRegister DTMP = D31; static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value() class NullCheckSlowPathARM : public SlowPathCode { @@ -432,11 +432,6 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); - // The read barrier instrumentation of object ArrayGet - // instructions does not support the HIntermediateAddress - // instruction. - DCHECK(!(instruction_->IsArrayGet() && - instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); __ Bind(GetEntryLabel()); // No need to save live registers; it's taken care of by the @@ -517,11 +512,6 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); - // The read barrier instrumentation of object ArrayGet - // instructions does not support the HIntermediateAddress - // instruction. - DCHECK(!(instruction_->IsArrayGet() && - instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -706,8 +696,8 @@ class ReadBarrierForRootSlowPathARM : public SlowPathCode { }; #undef __ -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT inline Condition ARMCondition(IfCondition cond) { switch (cond) { @@ -4507,6 +4497,8 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { Primitive::Type type = instruction->GetType(); HInstruction* array_instr = instruction->GetArray(); bool has_intermediate_address = array_instr->IsIntermediateAddress(); + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); switch (type) { case Primitive::kPrimBoolean: @@ -4541,11 +4533,6 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimNot: { - // The read barrier instrumentation of object ArrayGet - // instructions does not support the HIntermediateAddress - // instruction. - DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); - static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); @@ -4688,6 +4675,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { Location value_loc = locations->InAt(2); HInstruction* array_instr = instruction->GetArray(); bool has_intermediate_address = array_instr->IsIntermediateAddress(); + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); switch (value_type) { case Primitive::kPrimBoolean: @@ -4952,6 +4941,8 @@ void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) { } void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) { + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -4966,6 +4957,9 @@ void InstructionCodeGeneratorARM::VisitIntermediateAddress(HIntermediateAddress* Location first = locations->InAt(0); Location second = locations->InAt(1); + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); + if (second.IsRegister()) { __ add(out.AsRegister<Register>(), first.AsRegister<Register>(), diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 9ceb3109cd..d95e7df6b4 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -131,8 +131,8 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type retur return ARM64ReturnLocation(return_type); } -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value() // Calculate memory accessing operand for save/restore live registers. @@ -598,11 +598,6 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); - // The read barrier instrumentation of object ArrayGet - // instructions does not support the HIntermediateAddress - // instruction. - DCHECK(!(instruction_->IsArrayGet() && - instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); __ Bind(GetEntryLabel()); // No need to save live registers; it's taken care of by the @@ -685,9 +680,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); - // The read barrier instrumentation of object ArrayGet - // instructions does not support the HIntermediateAddress - // instruction. + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. DCHECK(!(instruction_->IsArrayGet() && instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); @@ -1990,6 +1983,8 @@ void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp( } void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); @@ -1997,7 +1992,10 @@ void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instr locations->SetOut(Location::RequiresRegister()); } -void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { +void InstructionCodeGeneratorARM64::VisitIntermediateAddress( + HIntermediateAddress* instruction) { + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); __ Add(OutputRegister(instruction), InputRegisterAt(instruction, 0), Operand(InputOperandAt(instruction, 1))); @@ -2093,15 +2091,11 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { // Block pools between `Load` and `MaybeRecordImplicitNullCheck`. BlockPoolsScope block_pools(masm); - // The read barrier instrumentation of object ArrayGet instructions - // does not support the HIntermediateAddress instruction. - DCHECK(!((type == Primitive::kPrimNot) && - instruction->GetArray()->IsIntermediateAddress() && - kEmitCompilerReadBarrier)); - if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Object ArrayGet with Baker's read barrier case. Register temp = temps.AcquireW(); + // The read barrier instrumentation does not support the HIntermediateAddress instruction yet. + DCHECK(!instruction->GetArray()->IsIntermediateAddress()); // Note that a potential implicit null check is handled in the // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( @@ -2115,6 +2109,9 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { } else { Register temp = temps.AcquireSameSizeAs(obj); if (instruction->GetArray()->IsIntermediateAddress()) { + // The read barrier instrumentation does not support the + // HIntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); // We do not need to compute the intermediate address from the array: the // input instruction has done it already. See the comment in // `TryExtractArrayAccessAddress()`. @@ -2204,6 +2201,9 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { UseScratchRegisterScope temps(masm); Register temp = temps.AcquireSameSizeAs(array); if (instruction->GetArray()->IsIntermediateAddress()) { + // The read barrier instrumentation does not support the + // HIntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); // We do not need to compute the intermediate address from the array: the // input instruction has done it already. See the comment in // `TryExtractArrayAccessAddress()`. @@ -2223,6 +2223,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { codegen_->Store(value_type, value, destination); codegen_->MaybeRecordImplicitNullCheck(instruction); } else { + DCHECK(needs_write_barrier); DCHECK(!instruction->GetArray()->IsIntermediateAddress()); vixl::aarch64::Label done; SlowPathCodeARM64* slow_path = nullptr; diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 1b5fa857e7..921ce10aaa 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -27,11 +27,11 @@ #include "utils/arm64/assembler_arm64.h" #include "utils/type_reference.h" -// TODO: make vixl clean wrt -Wshadow. +// TODO(VIXL): Make VIXL compile with -Wshadow. #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshadow" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +#include "aarch64/disasm-aarch64.h" +#include "aarch64/macro-assembler-aarch64.h" #pragma GCC diagnostic pop namespace art { diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 59e103a3bd..58879bc2f1 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -145,8 +145,8 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type) return MipsReturnLocation(type); } -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value() class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS { @@ -501,8 +501,8 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, } #undef __ -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value() void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) { diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index fe1fddc7bf..4e7a2728b1 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -102,8 +102,8 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type) return Mips64ReturnLocation(type); } -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value() class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { @@ -427,8 +427,8 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, } #undef __ -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value() void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) { diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index ade21174f4..7a561bb4ad 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -47,8 +47,8 @@ static constexpr int kC2ConditionMask = 0x400; static constexpr int kFakeReturnRegister = Register(8); -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value() class NullCheckSlowPathX86 : public SlowPathCode { @@ -729,8 +729,8 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode { }; #undef __ -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<X86Assembler*>(GetAssembler())-> /* NOLINT */ +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT inline Condition X86Condition(IfCondition cond) { switch (cond) { @@ -7099,12 +7099,6 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // /* LockWord */ lock_word = LockWord(monitor) static_assert(sizeof(LockWord) == sizeof(int32_t), "art::LockWord and int32_t have different sizes."); - // /* uint32_t */ rb_state = lock_word.ReadBarrierState() - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift)); - __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask)); - static_assert( - LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, - "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86 memory model. @@ -7124,8 +7118,13 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // if (rb_state == ReadBarrier::gray_ptr_) // ref = ReadBarrier::Mark(ref); - __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_)); - __ j(kEqual, slow_path->GetEntryLabel()); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with SHR. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); + __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index eadb431440..cf01a791ee 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -51,8 +51,8 @@ static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 static constexpr int kC2ConditionMask = 0x400; -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value() class NullCheckSlowPathX86_64 : public SlowPathCode { @@ -748,8 +748,8 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { }; #undef __ -// NOLINT on __ macro to suppress wrong warning/fix from clang-tidy. -#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT inline Condition X86_64IntegerCondition(IfCondition cond) { switch (cond) { @@ -6551,12 +6551,6 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // /* LockWord */ lock_word = LockWord(monitor) static_assert(sizeof(LockWord) == sizeof(int32_t), "art::LockWord and int32_t have different sizes."); - // /* uint32_t */ rb_state = lock_word.ReadBarrierState() - __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift)); - __ andl(temp_reg, Immediate(LockWord::kReadBarrierStateMask)); - static_assert( - LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, - "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); // Load fence to prevent load-load reordering. // Note that this is a no-op, thanks to the x86-64 memory model. @@ -6576,8 +6570,13 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // if (rb_state == ReadBarrier::gray_ptr_) // ref = ReadBarrier::Mark(ref); - __ cmpl(temp_reg, Immediate(ReadBarrier::gray_ptr_)); - __ j(kEqual, slow_path->GetEntryLabel()); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with SHR. + static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1"); + static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2"); + __ shrl(temp_reg, Immediate(LockWord::kReadBarrierStateShift + 1)); + __ j(kCarrySet, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index fe9a7af250..18db507c48 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -247,7 +247,7 @@ static void RunCode(InstructionSet target_isa, } else if (target_isa == kX86) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); + TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected); } else if (target_isa == kX86_64) { std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index af0ee4e197..cc949c5275 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -22,8 +22,13 @@ #include "nodes.h" #include "utils/arm64/assembler_arm64.h" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +// TODO(VIXL): Make VIXL compile with -Wshadow. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow" +#include "aarch64/disasm-aarch64.h" +#include "aarch64/macro-assembler-aarch64.h" +#include "aarch64/simulator-aarch64.h" +#pragma GCC diagnostic pop namespace art { namespace arm64 { diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index 6632cd9969..8f7778fe68 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -231,6 +231,15 @@ bool TryExtractArrayAccessAddress(HInstruction* access, HInstruction* array, HInstruction* index, size_t data_offset) { + if (kEmitCompilerReadBarrier) { + // The read barrier instrumentation does not support the + // HIntermediateAddress instruction yet. + // + // TODO: Handle this case properly in the ARM64 and ARM code generator and + // re-enable this optimization; otherwise, remove this TODO. + // b/26601270 + return false; + } if (index->IsConstant() || (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) { // When the index is a constant all the addressing can be fitted in the @@ -242,13 +251,6 @@ bool TryExtractArrayAccessAddress(HInstruction* access, // The access may require a runtime call or the original array pointer. return false; } - if (kEmitCompilerReadBarrier && - access->IsArrayGet() && - access->AsArrayGet()->GetType() == Primitive::kPrimNot) { - // For object arrays, the read barrier instrumentation requires - // the original array pointer. - return false; - } // Proceed to extract the base address computation. HGraph* graph = access->GetBlock()->GetGraph(); diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 3429a8fdbb..1a8eb58857 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -27,9 +27,6 @@ namespace art { class CompilerDriver; class DexFile; -// Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751 -static constexpr bool kRoundIsPlusPointFive = false; - // Positive floating-point infinities. static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U; static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index e7c40e6600..9cfe3ce569 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -29,11 +29,11 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) -// TODO: make vixl clean wrt -Wshadow. +// TODO(VIXL): Make VIXL compile with -Wshadow. #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshadow" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +#include "aarch64/disasm-aarch64.h" +#include "aarch64/macro-assembler-aarch64.h" #pragma GCC diagnostic pop namespace art { @@ -1160,8 +1160,10 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { MacroAssembler* masm = GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); - Register str = XRegisterFrom(locations->InAt(0)); - Register arg = XRegisterFrom(locations->InAt(1)); + Register str = InputRegisterAt(invoke, 0); + Register arg = InputRegisterAt(invoke, 1); + DCHECK(str.IsW()); + DCHECK(arg.IsW()); Register out = OutputRegister(invoke); Register temp0 = WRegisterFrom(locations->GetTemp(0)); @@ -1192,8 +1194,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Subs(out, str, arg); __ B(&end, eq); // Load lengths of this and argument strings. - __ Ldr(temp0, MemOperand(str.X(), count_offset)); - __ Ldr(temp1, MemOperand(arg.X(), count_offset)); + __ Ldr(temp0, HeapOperand(str, count_offset)); + __ Ldr(temp1, HeapOperand(arg, count_offset)); // Return zero if both strings are empty. __ Orr(out, temp0, temp1); __ Cbz(out, &end); @@ -1222,8 +1224,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { // Loop to compare 4x16-bit characters at a time (ok because of string data alignment). __ Bind(&loop); - __ Ldr(temp4, MemOperand(str.X(), temp1)); - __ Ldr(temp0, MemOperand(arg.X(), temp1)); + __ Ldr(temp4, MemOperand(str.X(), temp1.X())); + __ Ldr(temp0, MemOperand(arg.X(), temp1.X())); __ Cmp(temp4, temp0); __ B(ne, &find_char_diff); __ Add(temp1, temp1, char_size * 4); @@ -1242,14 +1244,14 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Clz(temp1, temp1); // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then // the difference occurs outside the remaining string data, so just return length diff (out). - __ Cmp(temp2, Operand(temp1, LSR, 4)); + __ Cmp(temp2, Operand(temp1.W(), LSR, 4)); __ B(le, &end); // Extract the characters and calculate the difference. __ Bic(temp1, temp1, 0xf); __ Lsr(temp0, temp0, temp1); __ Lsr(temp4, temp4, temp1); __ And(temp4, temp4, 0xffff); - __ Sub(out, temp4, Operand(temp0, UXTH)); + __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH)); __ Bind(&end); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index dc409c92d6..22f4181b92 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -753,11 +753,6 @@ void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { - // See intrinsics.h. - if (!kRoundIsPlusPointFive) { - return; - } - // Do we have instruction support? if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) { HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); @@ -795,7 +790,6 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { } XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); - Register constant_area = locations->InAt(1).AsRegister<Register>(); XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); Register out = locations->Out().AsRegister<Register>(); @@ -810,10 +804,23 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { __ movss(t2, in); __ roundss(t1, in, Immediate(1)); __ subss(t2, t1); - __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area)); - __ j(kBelow, &skip_incr); - __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area)); - __ Bind(&skip_incr); + if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { + // Direct constant area available. + Register constant_area = locations->InAt(1).AsRegister<Register>(); + __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area)); + __ j(kBelow, &skip_incr); + __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area)); + __ Bind(&skip_incr); + } else { + // No constant area: go through stack. + __ pushl(Immediate(bit_cast<int32_t, float>(0.5f))); + __ pushl(Immediate(bit_cast<int32_t, float>(1.0f))); + __ comiss(t2, Address(ESP, 4)); + __ j(kBelow, &skip_incr); + __ addss(t1, Address(ESP, 0)); + __ Bind(&skip_incr); + __ addl(ESP, Immediate(8)); + } // Final conversion to an integer. Unfortunately this also does not have a // direct x86 instruction, since NaN should map to 0 and large positive diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 7dfbfb09be..ab8b05c3d4 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -598,10 +598,6 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, } void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) { - // See intrinsics.h. - if (!kRoundIsPlusPointFive) { - return; - } CreateSSE41FPToIntLocations(arena_, invoke, codegen_); } @@ -646,10 +642,6 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) { - // See intrinsics.h. - if (!kRoundIsPlusPointFive) { - return; - } CreateSSE41FPToIntLocations(arena_, invoke, codegen_); } diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 79ca5a0d86..cfdb41ab62 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -37,6 +37,165 @@ static constexpr size_t kMaxNumRegs = 32; // intervals are split when coloring fails. static constexpr size_t kMaxGraphColoringAttemptsDebug = 100; +// We always want to avoid spilling inside loops. +static constexpr size_t kLoopSpillWeightMultiplier = 10; + +// If we avoid moves in single jump blocks, we can avoid jumps to jumps. +static constexpr size_t kSingleJumpBlockWeightMultiplier = 2; + +// We avoid moves in blocks that dominate the exit block, since these blocks will +// be executed on every path through the method. +static constexpr size_t kDominatesExitBlockWeightMultiplier = 2; + +enum class CoalesceKind { + kAdjacentSibling, // Prevents moves at interval split points. + kFixedOutputSibling, // Prevents moves from a fixed output location. + kFixedInput, // Prevents moves into a fixed input location. + kNonlinearControlFlow, // Prevents moves between blocks. + kPhi, // Prevents phi resolution moves. + kFirstInput, // Prevents a single input move. + kAnyInput, // May lead to better instruction selection / smaller encodings. +}; + +std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) { + return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind); +} + +static size_t LoopDepthAt(HBasicBlock* block) { + HLoopInformation* loop_info = block->GetLoopInformation(); + size_t depth = 0; + while (loop_info != nullptr) { + ++depth; + loop_info = loop_info->GetPreHeader()->GetLoopInformation(); + } + return depth; +} + +// Return the runtime cost of inserting a move instruction at the specified location. +static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) { + HBasicBlock* block = liveness.GetBlockFromPosition(position / 2); + DCHECK(block != nullptr); + size_t cost = 1; + if (block->IsSingleJump()) { + cost *= kSingleJumpBlockWeightMultiplier; + } + if (block->Dominates(block->GetGraph()->GetExitBlock())) { + cost *= kDominatesExitBlockWeightMultiplier; + } + for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) { + cost *= kLoopSpillWeightMultiplier; + } + return cost; +} + +// In general, we estimate coalesce priority by whether it will definitely avoid a move, +// and by how likely it is to create an interference graph that's harder to color. +static size_t ComputeCoalescePriority(CoalesceKind kind, + size_t position, + const SsaLivenessAnalysis& liveness) { + if (kind == CoalesceKind::kAnyInput) { + // This type of coalescing can affect instruction selection, but not moves, so we + // give it the lowest priority. + return 0; + } else { + return CostForMoveAt(position, liveness); + } +} + +enum class CoalesceStage { + kWorklist, // Currently in the iterative coalescing worklist. + kActive, // Not in a worklist, but could be considered again during iterative coalescing. + kInactive, // No longer considered until last-chance coalescing. + kDefunct, // Either the two nodes interfere, or have already been coalesced. +}; + +std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) { + return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage); +} + +// Represents a coalesce opportunity between two nodes. +struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> { + CoalesceOpportunity(InterferenceNode* a, + InterferenceNode* b, + CoalesceKind kind, + size_t position, + const SsaLivenessAnalysis& liveness) + : node_a(a), + node_b(b), + stage(CoalesceStage::kWorklist), + priority(ComputeCoalescePriority(kind, position, liveness)) {} + + // Compare two coalesce opportunities based on their priority. + // Return true if lhs has a lower priority than that of rhs. + static bool CmpPriority(const CoalesceOpportunity* lhs, + const CoalesceOpportunity* rhs) { + return lhs->priority < rhs->priority; + } + + InterferenceNode* const node_a; + InterferenceNode* const node_b; + + // The current stage of this coalesce opportunity, indicating whether it is in a worklist, + // and whether it should still be considered. + CoalesceStage stage; + + // The priority of this coalesce opportunity, based on heuristics. + const size_t priority; +}; + +enum class NodeStage { + kInitial, // Uninitialized. + kPrecolored, // Marks fixed nodes. + kSafepoint, // Marks safepoint nodes. + kPrunable, // Marks uncolored nodes in the interference graph. + kSimplifyWorklist, // Marks non-move-related nodes with degree less than the number of registers. + kFreezeWorklist, // Marks move-related nodes with degree less than the number of registers. + kSpillWorklist, // Marks nodes with degree greater or equal to the number of registers. + kPruned // Marks nodes already pruned from the interference graph. +}; + +std::ostream& operator<<(std::ostream& os, const NodeStage& stage) { + return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage); +} + +// Returns the estimated cost of spilling a particular live interval. +static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) { + if (interval->HasRegister()) { + // Intervals with a fixed register cannot be spilled. + return std::numeric_limits<float>::min(); + } + + size_t length = interval->GetLength(); + if (length == 1) { + // Tiny intervals should have maximum priority, since they cannot be split any further. + return std::numeric_limits<float>::max(); + } + + size_t use_weight = 0; + if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) { + // Cost for spilling at a register definition point. + use_weight += CostForMoveAt(interval->GetStart() + 1, liveness); + } + + UsePosition* use = interval->GetFirstUse(); + while (use != nullptr && use->GetPosition() <= interval->GetStart()) { + // Skip uses before the start of this live interval. + use = use->GetNext(); + } + + while (use != nullptr && use->GetPosition() <= interval->GetEnd()) { + if (use->GetUser() != nullptr && use->RequiresRegister()) { + // Cost for spilling at a register use point. + use_weight += CostForMoveAt(use->GetUser()->GetLifetimePosition() - 1, liveness); + } + use = use->GetNext(); + } + + // We divide by the length of the interval because we want to prioritize + // short intervals; we do not benefit much if we split them further. + return static_cast<float>(use_weight) / static_cast<float>(length); +} + // Interference nodes make up the interference graph, which is the primary data structure in // graph coloring register allocation. Each node represents a single live interval, and contains // a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory, @@ -58,84 +217,320 @@ static constexpr size_t kMaxGraphColoringAttemptsDebug = 100; // and thus whether it is safe to prune it from the interference graph early on. class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> { public: - InterferenceNode(ArenaAllocator* allocator, LiveInterval* interval, size_t id) - : interval_(interval), - adjacent_nodes_(CmpPtr, allocator->Adapter(kArenaAllocRegisterAllocator)), - out_degree_(0), - id_(id) {} - - // Used to maintain determinism when storing InterferenceNode pointers in sets. - static bool CmpPtr(const InterferenceNode* lhs, const InterferenceNode* rhs) { - return lhs->id_ < rhs->id_; + InterferenceNode(ArenaAllocator* allocator, + LiveInterval* interval, + const SsaLivenessAnalysis& liveness) + : stage(NodeStage::kInitial), + interval_(interval), + adjacent_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), + coalesce_opportunities_(allocator->Adapter(kArenaAllocRegisterAllocator)), + out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0), + alias_(this), + spill_weight_(ComputeSpillWeight(interval, liveness)), + requires_color_(interval->RequiresRegister()) { + DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval"; } - void AddInterference(InterferenceNode* other) { - if (adjacent_nodes_.insert(other).second) { + void AddInterference(InterferenceNode* other, bool guaranteed_not_interfering_yet) { + DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences"; + DCHECK_NE(this, other) << "Should not create self loops in the interference graph"; + DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another"; + DCHECK_NE(stage, NodeStage::kPruned); + DCHECK_NE(other->stage, NodeStage::kPruned); + if (guaranteed_not_interfering_yet) { + DCHECK(std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other) + == adjacent_nodes_.end()); + adjacent_nodes_.push_back(other); out_degree_ += EdgeWeightWith(other); + } else { + auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other); + if (it == adjacent_nodes_.end()) { + adjacent_nodes_.push_back(other); + out_degree_ += EdgeWeightWith(other); + } } } void RemoveInterference(InterferenceNode* other) { - if (adjacent_nodes_.erase(other) > 0) { + DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node"; + DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning"; + auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other); + if (it != adjacent_nodes_.end()) { + adjacent_nodes_.erase(it); out_degree_ -= EdgeWeightWith(other); } } bool ContainsInterference(InterferenceNode* other) const { - return adjacent_nodes_.count(other) > 0; + DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences"; + DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences"; + auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other); + return it != adjacent_nodes_.end(); } LiveInterval* GetInterval() const { return interval_; } - const ArenaSet<InterferenceNode*, decltype(&CmpPtr)>& GetAdjacentNodes() const { + const ArenaVector<InterferenceNode*>& GetAdjacentNodes() const { return adjacent_nodes_; } size_t GetOutDegree() const { + // Pre-colored nodes have infinite degree. + DCHECK(!IsPrecolored() || out_degree_ == std::numeric_limits<size_t>::max()); return out_degree_; } - size_t GetId() const { - return id_; + void AddCoalesceOpportunity(CoalesceOpportunity* opportunity) { + coalesce_opportunities_.push_back(opportunity); + } + + void ClearCoalesceOpportunities() { + coalesce_opportunities_.clear(); + } + + bool IsMoveRelated() const { + for (CoalesceOpportunity* opportunity : coalesce_opportunities_) { + if (opportunity->stage == CoalesceStage::kWorklist || + opportunity->stage == CoalesceStage::kActive) { + return true; + } + } + return false; + } + + // Return whether this node already has a color. + // Used to find fixed nodes in the interference graph before coloring. + bool IsPrecolored() const { + return interval_->HasRegister(); + } + + bool IsPair() const { + return interval_->HasHighInterval(); + } + + void SetAlias(InterferenceNode* rep) { + DCHECK_NE(rep->stage, NodeStage::kPruned); + DCHECK_EQ(this, alias_) << "Should only set a node's alias once"; + alias_ = rep; + } + + InterferenceNode* GetAlias() { + if (alias_ != this) { + // Recurse in order to flatten tree of alias pointers. + alias_ = alias_->GetAlias(); + } + return alias_; + } + + const ArenaVector<CoalesceOpportunity*>& GetCoalesceOpportunities() const { + return coalesce_opportunities_; + } + + float GetSpillWeight() const { + return spill_weight_; + } + + bool RequiresColor() const { + return requires_color_; } - private: // We give extra weight to edges adjacent to pair nodes. See the general comment on the // interference graph above. - size_t EdgeWeightWith(InterferenceNode* other) const { - return (interval_->HasHighInterval() || other->interval_->HasHighInterval()) ? 2 : 1; + size_t EdgeWeightWith(const InterferenceNode* other) const { + return (IsPair() || other->IsPair()) ? 2 : 1; } + // The current stage of this node, indicating which worklist it belongs to. + NodeStage stage; + + private: // The live interval that this node represents. LiveInterval* const interval_; // All nodes interfering with this one. - // TODO: There is potential to use a cheaper data structure here, especially since - // adjacency sets will usually be small. - ArenaSet<InterferenceNode*, decltype(&CmpPtr)> adjacent_nodes_; + // We use an unsorted vector as a set, since a tree or hash set is too heavy for the + // set sizes that we encounter. Using a vector leads to much better performance. + ArenaVector<InterferenceNode*> adjacent_nodes_; + + // Interference nodes that this node should be coalesced with to reduce moves. + ArenaVector<CoalesceOpportunity*> coalesce_opportunities_; // The maximum number of colors with which this node could interfere. This could be more than // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes. // We use "out" degree because incoming edges come from nodes already pruned from the graph, // and do not affect the coloring of this node. + // Pre-colored nodes are treated as having infinite degree. size_t out_degree_; - // A unique identifier for this node, used to maintain determinism when storing - // interference nodes in sets. - const size_t id_; + // The node representing this node in the interference graph. + // Initially set to `this`, and only changed if this node is coalesced into another. + InterferenceNode* alias_; - // TODO: We could cache the result of interval_->RequiresRegister(), since it - // will not change for the lifetime of this node. (Currently, RequiresRegister() requires - // iterating through all uses of a live interval.) + // The cost of splitting and spilling this interval to the stack. + // Nodes with a higher spill weight should be prioritized when assigning registers. + // This is essentially based on use density and location; short intervals with many uses inside + // deeply nested loops have a high spill weight. + const float spill_weight_; + + const bool requires_color_; DISALLOW_COPY_AND_ASSIGN(InterferenceNode); }; +// The order in which we color nodes is important. To guarantee forward progress, +// we prioritize intervals that require registers, and after that we prioritize +// short intervals. That way, if we fail to color a node, it either won't require a +// register, or it will be a long interval that can be split in order to make the +// interference graph sparser. +// To improve code quality, we prioritize intervals used frequently in deeply nested loops. +// (This metric is secondary to the forward progress requirements above.) +// TODO: May also want to consider: +// - Constants (since they can be rematerialized) +// - Allocated spill slots +static bool HasGreaterNodePriority(const InterferenceNode* lhs, + const InterferenceNode* rhs) { + // (1) Prioritize the node that requires a color. + if (lhs->RequiresColor() != rhs->RequiresColor()) { + return lhs->RequiresColor(); + } + + // (2) Prioritize the interval that has a higher spill weight. + return lhs->GetSpillWeight() > rhs->GetSpillWeight(); +} + +// A ColoringIteration holds the many data structures needed for a single graph coloring attempt, +// and provides methods for each phase of the attempt. +class ColoringIteration { + public: + ColoringIteration(RegisterAllocatorGraphColor* register_allocator, + ArenaAllocator* allocator, + bool processing_core_regs, + size_t num_regs) + : register_allocator_(register_allocator), + allocator_(allocator), + processing_core_regs_(processing_core_regs), + num_regs_(num_regs), + interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)), + prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), + pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), + simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)), + freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)), + spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)), + coalesce_worklist_(CoalesceOpportunity::CmpPriority, + allocator->Adapter(kArenaAllocRegisterAllocator)) {} + + // Use the intervals collected from instructions to construct an + // interference graph mapping intervals to adjacency lists. + // Also, collect synthesized safepoint nodes, used to keep + // track of live intervals across safepoints. + // TODO: Should build safepoints elsewhere. + void BuildInterferenceGraph(const ArenaVector<LiveInterval*>& intervals, + const ArenaVector<InterferenceNode*>& physical_nodes, + ArenaVector<InterferenceNode*>* safepoints); + + // Add coalesce opportunities to interference nodes. + void FindCoalesceOpportunities(); + + // Prune nodes from the interference graph to be colored later. Build + // a stack (pruned_nodes) containing these intervals in an order determined + // by various heuristics. + void PruneInterferenceGraph(); + + // Process pruned_intervals_ to color the interference graph, spilling when + // necessary. Returns true if successful. Else, some intervals have been + // split, and the interference graph should be rebuilt for another attempt. + bool ColorInterferenceGraph(); + + // Return prunable nodes. + // The register allocator will need to access prunable nodes after coloring + // in order to tell the code generator which registers have been assigned. + const ArenaVector<InterferenceNode*>& GetPrunableNodes() const { + return prunable_nodes_; + } + + private: + // Create a coalesce opportunity between two nodes. + void CreateCoalesceOpportunity(InterferenceNode* a, + InterferenceNode* b, + CoalesceKind kind, + size_t position); + + // Add an edge in the interference graph, if valid. + // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion + // when possible. + void AddPotentialInterference(InterferenceNode* from, + InterferenceNode* to, + bool guaranteed_not_interfering_yet, + bool both_directions = true); + + // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors) + // may be pruned from the interference graph. + void FreezeMoves(InterferenceNode* node); + + // Prune a node from the interference graph, updating worklists if necessary. + void PruneNode(InterferenceNode* node); + + // Add coalesce opportunities associated with this node to the coalesce worklist. + void EnableCoalesceOpportunities(InterferenceNode* node); + + // If needed, from `node` from the freeze worklist to the simplify worklist. + void CheckTransitionFromFreezeWorklist(InterferenceNode* node); + + // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively. + bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into); + + // Return true if `from` and `into` are uncolored, and can be coalesced conservatively. + bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into); + + void Coalesce(CoalesceOpportunity* opportunity); + + // Merge `from` into `into` in the interference graph. + void Combine(InterferenceNode* from, InterferenceNode* into); + + // A reference to the register allocator instance, + // needed to split intervals and assign spill slots. + RegisterAllocatorGraphColor* register_allocator_; + + // An arena allocator used for a single graph coloring attempt. + ArenaAllocator* allocator_; + + const bool processing_core_regs_; + + const size_t num_regs_; + + // A map from live intervals to interference nodes. + ArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_; + + // Uncolored nodes that should be pruned from the interference graph. + ArenaVector<InterferenceNode*> prunable_nodes_; + + // A stack of nodes pruned from the interference graph, waiting to be pruned. + ArenaStdStack<InterferenceNode*> pruned_nodes_; + + // A queue containing low degree, non-move-related nodes that can pruned immediately. + ArenaDeque<InterferenceNode*> simplify_worklist_; + + // A queue containing low degree, move-related nodes. + ArenaDeque<InterferenceNode*> freeze_worklist_; + + // A queue containing high degree nodes. + // If we have to prune from the spill worklist, we cannot guarantee + // the pruned node a color, so we order the worklist by priority. + ArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_; + + // A queue containing coalesce opportunities. + // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those + // inside of loops) are more important than others. + ArenaPriorityQueue<CoalesceOpportunity*, + decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_; + + DISALLOW_COPY_AND_ASSIGN(ColoringIteration); +}; + static bool IsCoreInterval(LiveInterval* interval) { - return interval->GetType() != Primitive::kPrimFloat - && interval->GetType() != Primitive::kPrimDouble; + return !Primitive::IsFloatingPointType(interval->GetType()); } static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) { @@ -144,14 +539,16 @@ static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) { RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocator, CodeGenerator* codegen, - const SsaLivenessAnalysis& liveness) + const SsaLivenessAnalysis& liveness, + bool iterative_move_coalescing) : RegisterAllocator(allocator, codegen, liveness), + iterative_move_coalescing_(iterative_move_coalescing), core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)), - physical_core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - physical_fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), + physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), + physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), int_spill_slot_counter_(0), double_spill_slot_counter_(0), float_spill_slot_counter_(0), @@ -162,17 +559,18 @@ RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocat number_of_globally_blocked_core_regs_(0), number_of_globally_blocked_fp_regs_(0), max_safepoint_live_core_regs_(0), - max_safepoint_live_fp_regs_(0), - coloring_attempt_allocator_(nullptr) { + max_safepoint_live_fp_regs_(0) { // Before we ask for blocked registers, set them up in the code generator. codegen->SetupBlockedRegisters(); // Initialize physical core register live intervals and blocked registers. // This includes globally blocked registers, such as the stack pointer. - physical_core_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr); - for (size_t i = 0; i < codegen->GetNumberOfCoreRegisters(); ++i) { + physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr); + for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimInt); - physical_core_intervals_[i] = interval; + physical_core_nodes_[i] = + new (allocator_) InterferenceNode(allocator_, interval, liveness); + physical_core_nodes_[i]->stage = NodeStage::kPrecolored; core_intervals_.push_back(interval); if (codegen_->IsBlockedCoreRegister(i)) { ++number_of_globally_blocked_core_regs_; @@ -180,10 +578,12 @@ RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocat } } // Initialize physical floating point register live intervals and blocked registers. - physical_fp_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr); - for (size_t i = 0; i < codegen->GetNumberOfFloatingPointRegisters(); ++i) { + physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr); + for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimFloat); - physical_fp_intervals_[i] = interval; + physical_fp_nodes_[i] = + new (allocator_) InterferenceNode(allocator_, interval, liveness); + physical_fp_nodes_[i]->stage = NodeStage::kPrecolored; fp_intervals_.push_back(interval); if (codegen_->IsBlockedFloatingPointRegister(i)) { ++number_of_globally_blocked_fp_regs_; @@ -213,24 +613,44 @@ void RegisterAllocatorGraphColor::AllocateRegisters() { << "which could be caused by prioritizing the wrong live intervals. (Short intervals " << "should be prioritized over long ones, because they cannot be split further.)"; - // Reset the allocator for the next coloring attempt. + // Many data structures are cleared between graph coloring attempts, so we reduce + // total memory usage by using a new arena allocator for each attempt. ArenaAllocator coloring_attempt_allocator(allocator_->GetArenaPool()); - coloring_attempt_allocator_ = &coloring_attempt_allocator; + ColoringIteration iteration(this, + &coloring_attempt_allocator, + processing_core_regs, + num_registers); - // (2) Build the interference graph. - ArenaVector<InterferenceNode*> prunable_nodes( - coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); + // (2) Build the interference graph. Also gather safepoints. ArenaVector<InterferenceNode*> safepoints( - coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); - BuildInterferenceGraph(intervals, &prunable_nodes, &safepoints); + coloring_attempt_allocator.Adapter(kArenaAllocRegisterAllocator)); + ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs + ? physical_core_nodes_ + : physical_fp_nodes_; + iteration.BuildInterferenceGraph(intervals, physical_nodes, &safepoints); + + // (3) Add coalesce opportunities. + // If we have tried coloring the graph a suspiciously high number of times, give + // up on move coalescing, just in case the coalescing heuristics are not conservative. + // (This situation will be caught if DCHECKs are turned on.) + if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) { + iteration.FindCoalesceOpportunities(); + } - // (3) Prune all uncolored nodes from interference graph. - ArenaStdStack<InterferenceNode*> pruned_nodes( - coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); - PruneInterferenceGraph(prunable_nodes, num_registers, &pruned_nodes); + // (4) Prune all uncolored nodes from interference graph. + iteration.PruneInterferenceGraph(); - // (4) Color pruned nodes based on interferences. - bool successful = ColorInterferenceGraph(&pruned_nodes, num_registers); + // (5) Color pruned nodes based on interferences. + bool successful = iteration.ColorInterferenceGraph(); + + // We manually clear coalesce opportunities for physical nodes, + // since they persist across coloring attempts. + for (InterferenceNode* node : physical_core_nodes_) { + node->ClearCoalesceOpportunities(); + } + for (InterferenceNode* node : physical_fp_nodes_) { + node->ClearCoalesceOpportunities(); + } if (successful) { // Compute the maximum number of live registers across safepoints. @@ -250,7 +670,7 @@ void RegisterAllocatorGraphColor::AllocateRegisters() { // We only look at prunable_nodes because we already told the code generator about // fixed intervals while processing instructions. We also ignore the fixed intervals // placed at the top of catch blocks. - for (InterferenceNode* node : prunable_nodes) { + for (InterferenceNode* node : iteration.GetPrunableNodes()) { LiveInterval* interval = node->GetInterval(); if (interval->HasRegister()) { Location low_reg = processing_core_regs @@ -275,7 +695,7 @@ void RegisterAllocatorGraphColor::AllocateRegisters() { } // while unsuccessful } // for processing_core_instructions - // (5) Resolve locations and deconstruct SSA form. + // (6) Resolve locations and deconstruct SSA form. RegisterAllocationResolver(allocator_, codegen_, liveness_) .Resolve(max_safepoint_live_core_regs_, max_safepoint_live_fp_regs_, @@ -304,11 +724,12 @@ bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) { } } - ArenaVector<LiveInterval*>& physical_intervals = processing_core_regs - ? physical_core_intervals_ - : physical_fp_intervals_; - for (LiveInterval* fixed : physical_intervals) { - if (fixed->GetFirstRange() != nullptr) { + ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs + ? physical_core_nodes_ + : physical_fp_nodes_; + for (InterferenceNode* fixed : physical_nodes) { + LiveInterval* interval = fixed->GetInterval(); + if (interval->GetFirstRange() != nullptr) { // Ideally we would check fixed ranges as well, but currently there are times when // two fixed intervals for the same register will overlap. For example, a fixed input // and a fixed output may sometimes share the same register, in which there will be two @@ -358,7 +779,8 @@ void RegisterAllocatorGraphColor::ProcessInstructions() { ProcessInstruction(phi_it.Current()); } - if (block->IsCatchBlock() || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { + if (block->IsCatchBlock() + || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { // By blocking all registers at the top of each catch block or irreducible loop, we force // intervals belonging to the live-in set of the catch/header block to be spilled. // TODO(ngeoffray): Phis in this block could be allocated in register. @@ -435,7 +857,9 @@ void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) // TODO: Ideally we would coalesce the physical register with the register // allocated to the input value, but this can be tricky if, e.g., there // could be multiple physical register uses of the same value at the - // same instruction. Need to think about it more. + // same instruction. Furthermore, there's currently no distinction between + // fixed inputs to a call (which will be clobbered) and other fixed inputs (which + // may not be clobbered). LocationSummary* locations = instruction->GetLocations(); size_t position = instruction->GetLifetimePosition(); for (size_t i = 0; i < locations->GetInputCount(); ++i) { @@ -639,8 +1063,8 @@ void RegisterAllocatorGraphColor::BlockRegister(Location location, DCHECK(location.IsRegister() || location.IsFpuRegister()); int reg = location.reg(); LiveInterval* interval = location.IsRegister() - ? physical_core_intervals_[reg] - : physical_fp_intervals_[reg]; + ? physical_core_nodes_[reg]->GetInterval() + : physical_fp_nodes_[reg]->GetInterval(); DCHECK(interval->GetRegister() == reg); bool blocked_by_codegen = location.IsRegister() ? codegen_->IsBlockedCoreRegister(reg) @@ -666,28 +1090,105 @@ void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool } } -// Add an interference edge, but only if necessary. -static void AddPotentialInterference(InterferenceNode* from, InterferenceNode* to) { - if (from->GetInterval()->HasRegister()) { +void ColoringIteration::AddPotentialInterference(InterferenceNode* from, + InterferenceNode* to, + bool guaranteed_not_interfering_yet, + bool both_directions) { + if (from->IsPrecolored()) { // We save space by ignoring outgoing edges from fixed nodes. } else if (to->GetInterval()->IsSlowPathSafepoint()) { // Safepoint intervals are only there to count max live registers, // so no need to give them incoming interference edges. // This is also necessary for correctness, because we don't want nodes // to remove themselves from safepoint adjacency sets when they're pruned. + } else if (to->IsPrecolored()) { + // It is important that only a single node represents a given fixed register in the + // interference graph. We retrieve that node here. + const ArenaVector<InterferenceNode*>& physical_nodes = to->GetInterval()->IsFloatingPoint() + ? register_allocator_->physical_fp_nodes_ + : register_allocator_->physical_core_nodes_; + InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()]; + from->AddInterference(physical_node, /*guaranteed_not_interfering_yet*/ false); + DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister()); + DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node"; + + // If a node interferes with a fixed pair node, the weight of the edge may + // be inaccurate after using the alias of the pair node, because the alias of the pair node + // is a singular node. + // We could make special pair fixed nodes, but that ends up being too conservative because + // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of + // three rather than two. + // Instead, we explicitly add an interference with the high node of the fixed pair node. + // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals + // can be unaligned on x86 complicates things. + if (to->IsPair()) { + InterferenceNode* high_node = + physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()]; + DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(), + high_node->GetInterval()->GetRegister()); + from->AddInterference(high_node, /*guaranteed_not_interfering_yet*/ false); + } } else { - from->AddInterference(to); + // Standard interference between two uncolored nodes. + from->AddInterference(to, guaranteed_not_interfering_yet); + } + + if (both_directions) { + AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false); } } -// TODO: See locations->OutputCanOverlapWithInputs(); we may want to consider -// this when building the interference graph. -void RegisterAllocatorGraphColor::BuildInterferenceGraph( +// Returns true if `in_node` represents an input interval of `out_node`, and the output interval +// is allowed to have the same register as the input interval. +// TODO: Ideally we should just produce correct intervals in liveness analysis. +// We would need to refactor the current live interval layout to do so, which is +// no small task. +static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) { + LiveInterval* output_interval = out_node->GetInterval(); + HInstruction* defined_by = output_interval->GetDefinedBy(); + if (defined_by == nullptr) { + // This must not be a definition point. + return false; + } + + LocationSummary* locations = defined_by->GetLocations(); + if (locations->OutputCanOverlapWithInputs()) { + // This instruction does not allow the output to reuse a register from an input. + return false; + } + + LiveInterval* input_interval = in_node->GetInterval(); + LiveInterval* next_sibling = input_interval->GetNextSibling(); + size_t def_position = defined_by->GetLifetimePosition(); + size_t use_position = def_position + 1; + if (next_sibling != nullptr && next_sibling->GetStart() == use_position) { + // The next sibling starts at the use position, so reusing the input register in the output + // would clobber the input before it's moved into the sibling interval location. + return false; + } + + if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) { + // The input interval is live after the use position. + return false; + } + + HInputsRef inputs = defined_by->GetInputs(); + for (size_t i = 0; i < inputs.size(); ++i) { + if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) { + DCHECK(input_interval->SameRegisterKind(*output_interval)); + return true; + } + } + + // The input interval was not an input for this instruction. + return false; +} + +void ColoringIteration::BuildInterferenceGraph( const ArenaVector<LiveInterval*>& intervals, - ArenaVector<InterferenceNode*>* prunable_nodes, + const ArenaVector<InterferenceNode*>& physical_nodes, ArenaVector<InterferenceNode*>* safepoints) { - size_t interval_id_counter = 0; - + DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty()); // Build the interference graph efficiently by ordering range endpoints // by position and doing a linear sweep to find interferences. (That is, we // jump from endpoint to endpoint, maintaining a set of intervals live at each @@ -701,21 +1202,34 @@ void RegisterAllocatorGraphColor::BuildInterferenceGraph( // For simplicity, we create a tuple for each endpoint, and then sort the tuples. // Tuple contents: (position, is_range_beginning, node). ArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints( - coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); + allocator_->Adapter(kArenaAllocRegisterAllocator)); + + // We reserve plenty of space to avoid excessive copying. + range_endpoints.reserve(4 * prunable_nodes_.size()); + for (LiveInterval* parent : intervals) { for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) { LiveRange* range = sibling->GetFirstRange(); if (range != nullptr) { - InterferenceNode* node = new (coloring_attempt_allocator_) InterferenceNode( - coloring_attempt_allocator_, sibling, interval_id_counter++); + InterferenceNode* node = new (allocator_) InterferenceNode( + allocator_, sibling, register_allocator_->liveness_); + interval_node_map_.Insert(std::make_pair(sibling, node)); + if (sibling->HasRegister()) { - // Fixed nodes will never be pruned, so no need to keep track of them. + // Fixed nodes should alias the canonical node for the corresponding register. + node->stage = NodeStage::kPrecolored; + InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()]; + node->SetAlias(physical_node); + DCHECK_EQ(node->GetInterval()->GetRegister(), + physical_node->GetInterval()->GetRegister()); } else if (sibling->IsSlowPathSafepoint()) { // Safepoint intervals are synthesized to count max live registers. // They will be processed separately after coloring. + node->stage = NodeStage::kSafepoint; safepoints->push_back(node); } else { - prunable_nodes->push_back(node); + node->stage = NodeStage::kPrunable; + prunable_nodes_.push_back(node); } while (range != nullptr) { @@ -728,11 +1242,18 @@ void RegisterAllocatorGraphColor::BuildInterferenceGraph( } // Sort the endpoints. - std::sort(range_endpoints.begin(), range_endpoints.end()); + // We explicitly ignore the third entry of each tuple (the node pointer) in order + // to maintain determinism. + std::sort(range_endpoints.begin(), range_endpoints.end(), + [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs, + const std::tuple<size_t, bool, InterferenceNode*>& rhs) { + return std::tie(std::get<0>(lhs), std::get<1>(lhs)) + < std::tie(std::get<0>(rhs), std::get<1>(rhs)); + }); // Nodes live at the current position in the linear sweep. - ArenaSet<InterferenceNode*, decltype(&InterferenceNode::CmpPtr)> live( - InterferenceNode::CmpPtr, coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); + ArenaVector<InterferenceNode*> live( + allocator_->Adapter(kArenaAllocRegisterAllocator)); // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the // live set. When we encounter the end of a range, we remove the corresponding node @@ -740,131 +1261,505 @@ void RegisterAllocatorGraphColor::BuildInterferenceGraph( for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) { bool is_range_beginning; InterferenceNode* node; + size_t position; // Extract information from the tuple, including the node this tuple represents. - std::tie(std::ignore, is_range_beginning, node) = *it; + std::tie(position, is_range_beginning, node) = *it; if (is_range_beginning) { + bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart(); for (InterferenceNode* conflicting : live) { DCHECK_NE(node, conflicting); - AddPotentialInterference(node, conflicting); - AddPotentialInterference(conflicting, node); + if (CheckInputOutputCanOverlap(conflicting, node)) { + // We do not add an interference, because the instruction represented by `node` allows + // its output to share a register with an input, represented here by `conflicting`. + } else { + AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet); + } } - DCHECK_EQ(live.count(node), 0u); - live.insert(node); + DCHECK(std::find(live.begin(), live.end(), node) == live.end()); + live.push_back(node); } else { // End of range. - DCHECK_EQ(live.count(node), 1u); - live.erase(node); + auto live_it = std::find(live.begin(), live.end(), node); + DCHECK(live_it != live.end()); + live.erase(live_it); } } DCHECK(live.empty()); } -// The order in which we color nodes is vital to both correctness (forward -// progress) and code quality. Specifically, we must prioritize intervals -// that require registers, and after that we must prioritize short intervals. -// That way, if we fail to color a node, it either won't require a register, -// or it will be a long interval that can be split in order to make the -// interference graph sparser. -// TODO: May also want to consider: -// - Loop depth -// - Constants (since they can be rematerialized) -// - Allocated spill slots -static bool GreaterNodePriority(const InterferenceNode* lhs, - const InterferenceNode* rhs) { - LiveInterval* lhs_interval = lhs->GetInterval(); - LiveInterval* rhs_interval = rhs->GetInterval(); +void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a, + InterferenceNode* b, + CoalesceKind kind, + size_t position) { + DCHECK_EQ(a->IsPair(), b->IsPair()) + << "Nodes of different memory widths should never be coalesced"; + CoalesceOpportunity* opportunity = + new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_); + a->AddCoalesceOpportunity(opportunity); + b->AddCoalesceOpportunity(opportunity); + coalesce_worklist_.push(opportunity); +} - // (1) Choose the interval that requires a register. - if (lhs_interval->RequiresRegister() != rhs_interval->RequiresRegister()) { - return lhs_interval->RequiresRegister(); - } +// When looking for coalesce opportunities, we use the interval_node_map_ to find the node +// corresponding to an interval. Note that not all intervals are in this map, notably the parents +// of constants and stack arguments. (However, these interval should not be involved in coalesce +// opportunities anyway, because they're not going to be in registers.) +void ColoringIteration::FindCoalesceOpportunities() { + DCHECK(coalesce_worklist_.empty()); - // (2) Choose the interval that has a shorter life span. - if (lhs_interval->GetLength() != rhs_interval->GetLength()) { - return lhs_interval->GetLength() < rhs_interval->GetLength(); - } + for (InterferenceNode* node : prunable_nodes_) { + LiveInterval* interval = node->GetInterval(); + + // Coalesce siblings. + LiveInterval* next_sibling = interval->GetNextSibling(); + if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) { + auto it = interval_node_map_.Find(next_sibling); + if (it != interval_node_map_.end()) { + InterferenceNode* sibling_node = it->second; + CreateCoalesceOpportunity(node, + sibling_node, + CoalesceKind::kAdjacentSibling, + interval->GetEnd()); + } + } + + // Coalesce fixed outputs with this interval if this interval is an adjacent sibling. + LiveInterval* parent = interval->GetParent(); + if (parent->HasRegister() + && parent->GetNextSibling() == interval + && parent->GetEnd() == interval->GetStart()) { + auto it = interval_node_map_.Find(parent); + if (it != interval_node_map_.end()) { + InterferenceNode* parent_node = it->second; + CreateCoalesceOpportunity(node, + parent_node, + CoalesceKind::kFixedOutputSibling, + parent->GetEnd()); + } + } + + // Try to prevent moves across blocks. + // Note that this does not lead to many succeeding coalesce attempts, so could be removed + // if found to add to compile time. + const SsaLivenessAnalysis& liveness = register_allocator_->liveness_; + if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) { + // If the start of this interval is at a block boundary, we look at the + // location of the interval in blocks preceding the block this interval + // starts at. This can avoid a move between the two blocks. + HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2); + for (HBasicBlock* predecessor : block->GetPredecessors()) { + size_t position = predecessor->GetLifetimeEnd() - 1; + LiveInterval* existing = interval->GetParent()->GetSiblingAt(position); + if (existing != nullptr) { + auto it = interval_node_map_.Find(existing); + if (it != interval_node_map_.end()) { + InterferenceNode* existing_node = it->second; + CreateCoalesceOpportunity(node, + existing_node, + CoalesceKind::kNonlinearControlFlow, + position); + } + } + } + } + + // Coalesce phi inputs with the corresponding output. + HInstruction* defined_by = interval->GetDefinedBy(); + if (defined_by != nullptr && defined_by->IsPhi()) { + const ArenaVector<HBasicBlock*>& predecessors = defined_by->GetBlock()->GetPredecessors(); + HInputsRef inputs = defined_by->GetInputs(); + + for (size_t i = 0, e = inputs.size(); i < e; ++i) { + // We want the sibling at the end of the appropriate predecessor block. + size_t position = predecessors[i]->GetLifetimeEnd() - 1; + LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position); + + auto it = interval_node_map_.Find(input_interval); + if (it != interval_node_map_.end()) { + InterferenceNode* input_node = it->second; + CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position); + } + } + } + + // Coalesce output with first input when policy is kSameAsFirstInput. + if (defined_by != nullptr) { + Location out = defined_by->GetLocations()->Out(); + if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) { + LiveInterval* input_interval + = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1); + // TODO: Could we consider lifetime holes here? + if (input_interval->GetEnd() == interval->GetStart()) { + auto it = interval_node_map_.Find(input_interval); + if (it != interval_node_map_.end()) { + InterferenceNode* input_node = it->second; + CreateCoalesceOpportunity(node, + input_node, + CoalesceKind::kFirstInput, + interval->GetStart()); + } + } + } + } + + // An interval that starts an instruction (that is, it is not split), may + // re-use the registers used by the inputs of that instruction, based on the + // location summary. + if (defined_by != nullptr) { + DCHECK(!interval->IsSplit()); + LocationSummary* locations = defined_by->GetLocations(); + if (!locations->OutputCanOverlapWithInputs()) { + HInputsRef inputs = defined_by->GetInputs(); + for (size_t i = 0; i < inputs.size(); ++i) { + size_t def_point = defined_by->GetLifetimePosition(); + // TODO: Getting the sibling at the def_point might not be quite what we want + // for fixed inputs, since the use will be *at* the def_point rather than after. + LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point); + if (input_interval != nullptr && + input_interval->HasHighInterval() == interval->HasHighInterval()) { + auto it = interval_node_map_.Find(input_interval); + if (it != interval_node_map_.end()) { + InterferenceNode* input_node = it->second; + CreateCoalesceOpportunity(node, + input_node, + CoalesceKind::kAnyInput, + interval->GetStart()); + } + } + } + } + } + + // Try to prevent moves into fixed input locations. + UsePosition* use = interval->GetFirstUse(); + for (; use != nullptr && use->GetPosition() <= interval->GetStart(); use = use->GetNext()) { + // Skip past uses before the start of this interval. + } + for (; use != nullptr && use->GetPosition() <= interval->GetEnd(); use = use->GetNext()) { + HInstruction* user = use->GetUser(); + if (user == nullptr) { + // User may be null for certain intervals, such as temp intervals. + continue; + } + LocationSummary* locations = user->GetLocations(); + Location input = locations->InAt(use->GetInputIndex()); + if (input.IsRegister() || input.IsFpuRegister()) { + // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes + // is currently not supported. + InterferenceNode* fixed_node = input.IsRegister() + ? register_allocator_->physical_core_nodes_[input.reg()] + : register_allocator_->physical_fp_nodes_[input.reg()]; + CreateCoalesceOpportunity(node, + fixed_node, + CoalesceKind::kFixedInput, + user->GetLifetimePosition()); + } + } + } // for node in prunable_nodes +} - // (3) Just choose the interval based on a deterministic ordering. - return InterferenceNode::CmpPtr(lhs, rhs); +static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) { + return node->GetOutDegree() < num_regs; } -void RegisterAllocatorGraphColor::PruneInterferenceGraph( - const ArenaVector<InterferenceNode*>& prunable_nodes, - size_t num_regs, - ArenaStdStack<InterferenceNode*>* pruned_nodes) { +static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) { + return !IsLowDegreeNode(node, num_regs); +} + +void ColoringIteration::PruneInterferenceGraph() { + DCHECK(pruned_nodes_.empty() + && simplify_worklist_.empty() + && freeze_worklist_.empty() + && spill_worklist_.empty()); // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes, // and all others as high degree nodes. The distinction is important: low degree nodes are // guaranteed a color, while high degree nodes are not. - // Low-degree nodes are guaranteed a color, so worklist order does not matter. - ArenaDeque<InterferenceNode*> low_degree_worklist( - coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); - - // If we have to prune from the high-degree worklist, we cannot guarantee - // the pruned node a color. So, we order the worklist by priority. - ArenaSet<InterferenceNode*, decltype(&GreaterNodePriority)> high_degree_worklist( - GreaterNodePriority, coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); - - // Build worklists. - for (InterferenceNode* node : prunable_nodes) { - DCHECK(!node->GetInterval()->HasRegister()) - << "Fixed nodes should never be pruned"; - DCHECK(!node->GetInterval()->IsSlowPathSafepoint()) - << "Safepoint nodes should never be pruned"; - if (node->GetOutDegree() < num_regs) { - low_degree_worklist.push_back(node); - } else { - high_degree_worklist.insert(node); - } - } - - // Helper function to prune an interval from the interference graph, - // which includes updating the worklists. - auto prune_node = [this, - num_regs, - &pruned_nodes, - &low_degree_worklist, - &high_degree_worklist] (InterferenceNode* node) { - DCHECK(!node->GetInterval()->HasRegister()); - pruned_nodes->push(node); - for (InterferenceNode* adjacent : node->GetAdjacentNodes()) { - DCHECK(!adjacent->GetInterval()->IsSlowPathSafepoint()) - << "Nodes should never interfere with synthesized safepoint nodes"; - if (adjacent->GetInterval()->HasRegister()) { - // No effect on pre-colored nodes; they're never pruned. + // Build worklists. Note that the coalesce worklist has already been + // filled by FindCoalesceOpportunities(). + for (InterferenceNode* node : prunable_nodes_) { + DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned"; + DCHECK(!node->GetInterval()->IsSlowPathSafepoint()) << "Safepoint nodes should never be pruned"; + if (IsLowDegreeNode(node, num_regs_)) { + if (node->GetCoalesceOpportunities().empty()) { + // Simplify Worklist. + node->stage = NodeStage::kSimplifyWorklist; + simplify_worklist_.push_back(node); } else { - bool was_high_degree = adjacent->GetOutDegree() >= num_regs; - DCHECK(adjacent->ContainsInterference(node)) - << "Missing incoming interference edge from non-fixed node"; - adjacent->RemoveInterference(node); - if (was_high_degree && adjacent->GetOutDegree() < num_regs) { - // This is a transition from high degree to low degree. - DCHECK_EQ(high_degree_worklist.count(adjacent), 1u); - high_degree_worklist.erase(adjacent); - low_degree_worklist.push_back(adjacent); - } + // Freeze Worklist. + node->stage = NodeStage::kFreezeWorklist; + freeze_worklist_.push_back(node); } + } else { + // Spill worklist. + node->stage = NodeStage::kSpillWorklist; + spill_worklist_.push(node); } - }; + } // Prune graph. - while (!low_degree_worklist.empty() || !high_degree_worklist.empty()) { - while (!low_degree_worklist.empty()) { - InterferenceNode* node = low_degree_worklist.front(); - // TODO: pop_back() should work as well, but it doesn't; we get a + // Note that we do not remove a node from its current worklist if it moves to another, so it may + // be in multiple worklists at once; the node's `phase` says which worklist it is really in. + while (true) { + if (!simplify_worklist_.empty()) { + // Prune low-degree nodes. + // TODO: pop_back() should work as well, but it didn't; we get a // failed check while pruning. We should look into this. - low_degree_worklist.pop_front(); - prune_node(node); - } - if (!high_degree_worklist.empty()) { - // We prune the lowest-priority node, because pruning a node earlier + InterferenceNode* node = simplify_worklist_.front(); + simplify_worklist_.pop_front(); + DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list"; + DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree"; + DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related"; + PruneNode(node); + } else if (!coalesce_worklist_.empty()) { + // Coalesce. + CoalesceOpportunity* opportunity = coalesce_worklist_.top(); + coalesce_worklist_.pop(); + if (opportunity->stage == CoalesceStage::kWorklist) { + Coalesce(opportunity); + } + } else if (!freeze_worklist_.empty()) { + // Freeze moves and prune a low-degree move-related node. + InterferenceNode* node = freeze_worklist_.front(); + freeze_worklist_.pop_front(); + if (node->stage == NodeStage::kFreezeWorklist) { + DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree"; + DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related"; + FreezeMoves(node); + PruneNode(node); + } + } else if (!spill_worklist_.empty()) { + // We spill the lowest-priority node, because pruning a node earlier // gives it a higher chance of being spilled. - InterferenceNode* node = *high_degree_worklist.rbegin(); - high_degree_worklist.erase(node); - prune_node(node); + InterferenceNode* node = spill_worklist_.top(); + spill_worklist_.pop(); + if (node->stage == NodeStage::kSpillWorklist) { + DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree"; + FreezeMoves(node); + PruneNode(node); + } + } else { + // Pruning complete. + break; + } + } + DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size()); +} + +void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) { + for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { + if (opportunity->stage == CoalesceStage::kActive) { + opportunity->stage = CoalesceStage::kWorklist; + coalesce_worklist_.push(opportunity); + } + } +} + +void ColoringIteration::PruneNode(InterferenceNode* node) { + DCHECK_NE(node->stage, NodeStage::kPruned); + DCHECK(!node->IsPrecolored()); + node->stage = NodeStage::kPruned; + pruned_nodes_.push(node); + + for (InterferenceNode* adj : node->GetAdjacentNodes()) { + DCHECK(!adj->GetInterval()->IsSlowPathSafepoint()) + << "Nodes should never interfere with synthesized safepoint nodes"; + DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes"; + + if (adj->IsPrecolored()) { + // No effect on pre-colored nodes; they're never pruned. + } else { + // Remove the interference. + bool was_high_degree = IsHighDegreeNode(adj, num_regs_); + DCHECK(adj->ContainsInterference(node)) + << "Missing reflexive interference from non-fixed node"; + adj->RemoveInterference(node); + + // Handle transitions from high degree to low degree. + if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) { + EnableCoalesceOpportunities(adj); + for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) { + EnableCoalesceOpportunities(adj_adj); + } + + DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist); + if (adj->IsMoveRelated()) { + adj->stage = NodeStage::kFreezeWorklist; + freeze_worklist_.push_back(adj); + } else { + adj->stage = NodeStage::kSimplifyWorklist; + simplify_worklist_.push_back(adj); + } + } + } + } +} + +void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) { + if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) { + DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist); + node->stage = NodeStage::kSimplifyWorklist; + simplify_worklist_.push_back(node); + } +} + +void ColoringIteration::FreezeMoves(InterferenceNode* node) { + for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { + if (opportunity->stage == CoalesceStage::kDefunct) { + // Constrained moves should remain constrained, since they will not be considered + // during last-chance coalescing. + } else { + opportunity->stage = CoalesceStage::kInactive; + } + InterferenceNode* other = opportunity->node_a->GetAlias() == node + ? opportunity->node_b->GetAlias() + : opportunity->node_a->GetAlias(); + if (other != node && other->stage == NodeStage::kFreezeWorklist) { + DCHECK(IsLowDegreeNode(node, num_regs_)); + CheckTransitionFromFreezeWorklist(other); + } + } +} + +bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from, + InterferenceNode* into) { + if (!into->IsPrecolored()) { + // The uncolored heuristic will cover this case. + return false; + } + if (from->IsPair() || into->IsPair()) { + // TODO: Merging from a pair node is currently not supported, since fixed pair nodes + // are currently represented as two single fixed nodes in the graph, and `into` is + // only one of them. (We may lose the implicit connections to the second one in a merge.) + return false; + } + + // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`. + // Reasons an adjacent node `adj` can be "ok": + // (1) If `adj` is low degree, interference with `into` will not affect its existing + // colorable guarantee. (Notice that coalescing cannot increase its degree.) + // (2) If `adj` is pre-colored, it already interferes with `into`. See (3). + // (3) If there's already an interference with `into`, coalescing will not add interferences. + for (InterferenceNode* adj : from->GetAdjacentNodes()) { + if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) { + // Ok. + } else { + return false; + } + } + return true; +} + +bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from, + InterferenceNode* into) { + if (into->IsPrecolored()) { + // The pre-colored heuristic will handle this case. + return false; + } + + // Arbitrary cap to improve compile time. Tests show that this has negligible affect + // on generated code. + if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) { + return false; + } + + // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors + // of high degree. (Low degree neighbors can be ignored, because they will eventually be + // pruned from the interference graph in the simplify stage.) + size_t high_degree_interferences = 0; + for (InterferenceNode* adj : from->GetAdjacentNodes()) { + if (IsHighDegreeNode(adj, num_regs_)) { + high_degree_interferences += from->EdgeWeightWith(adj); + } + } + for (InterferenceNode* adj : into->GetAdjacentNodes()) { + if (IsHighDegreeNode(adj, num_regs_)) { + if (from->ContainsInterference(adj)) { + // We've already counted this adjacent node. + // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that + // we should not have counted it at all. (This extends the textbook Briggs coalescing test, + // but remains conservative.) + if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) { + high_degree_interferences -= from->EdgeWeightWith(adj); + } + } else { + high_degree_interferences += into->EdgeWeightWith(adj); + } + } + } + + return high_degree_interferences < num_regs_; +} + +void ColoringIteration::Combine(InterferenceNode* from, + InterferenceNode* into) { + from->SetAlias(into); + + // Add interferences. + for (InterferenceNode* adj : from->GetAdjacentNodes()) { + bool was_low_degree = IsLowDegreeNode(adj, num_regs_); + AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false); + if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) { + // This is a (temporary) transition to a high degree node. Its degree will decrease again + // when we prune `from`, but it's best to be consistent about the current worklist. + adj->stage = NodeStage::kSpillWorklist; + spill_worklist_.push(adj); + } + } + + // Add coalesce opportunities. + for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) { + if (opportunity->stage != CoalesceStage::kDefunct) { + into->AddCoalesceOpportunity(opportunity); } } + EnableCoalesceOpportunities(from); + + // Prune and update worklists. + PruneNode(from); + if (IsLowDegreeNode(into, num_regs_)) { + // Coalesce(...) takes care of checking for a transition to the simplify worklist. + DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist); + } else if (into->stage == NodeStage::kFreezeWorklist) { + // This is a transition to a high degree node. + into->stage = NodeStage::kSpillWorklist; + spill_worklist_.push(into); + } else { + DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored); + } +} + +void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) { + InterferenceNode* from = opportunity->node_a->GetAlias(); + InterferenceNode* into = opportunity->node_b->GetAlias(); + DCHECK_NE(from->stage, NodeStage::kPruned); + DCHECK_NE(into->stage, NodeStage::kPruned); + + if (from->IsPrecolored()) { + // If we have one pre-colored node, make sure it's the `into` node. + std::swap(from, into); + } + + if (from == into) { + // These nodes have already been coalesced. + opportunity->stage = CoalesceStage::kDefunct; + CheckTransitionFromFreezeWorklist(from); + } else if (from->IsPrecolored() || from->ContainsInterference(into)) { + // These nodes interfere. + opportunity->stage = CoalesceStage::kDefunct; + CheckTransitionFromFreezeWorklist(from); + CheckTransitionFromFreezeWorklist(into); + } else if (PrecoloredHeuristic(from, into) + || UncoloredHeuristic(from, into)) { + // We can coalesce these nodes. + opportunity->stage = CoalesceStage::kDefunct; + Combine(from, into); + CheckTransitionFromFreezeWorklist(into); + } else { + // We cannot coalesce, but we may be able to later. + opportunity->stage = CoalesceStage::kActive; + } } // Build a mask with a bit set for each register assigned to some @@ -888,35 +1783,115 @@ static std::bitset<kMaxNumRegs> BuildConflictMask(Container& intervals) { return conflict_mask; } -bool RegisterAllocatorGraphColor::ColorInterferenceGraph( - ArenaStdStack<InterferenceNode*>* pruned_nodes, - size_t num_regs) { - DCHECK_LE(num_regs, kMaxNumRegs) << "kMaxNumRegs is too small"; +bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) { + return processing_core_regs + ? !codegen_->IsCoreCalleeSaveRegister(reg) + : !codegen_->IsCoreCalleeSaveRegister(reg); +} + +static bool RegisterIsAligned(size_t reg) { + return reg % 2 == 0; +} + +static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) { + // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit. + // Note that CTZ is undefined if all bits are 0, so we special-case it. + return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong()); +} + +bool ColoringIteration::ColorInterferenceGraph() { + DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small"; ArenaVector<LiveInterval*> colored_intervals( - coloring_attempt_allocator_->Adapter(kArenaAllocRegisterAllocator)); + allocator_->Adapter(kArenaAllocRegisterAllocator)); bool successful = true; - while (!pruned_nodes->empty()) { - InterferenceNode* node = pruned_nodes->top(); - pruned_nodes->pop(); + while (!pruned_nodes_.empty()) { + InterferenceNode* node = pruned_nodes_.top(); + pruned_nodes_.pop(); LiveInterval* interval = node->GetInterval(); - - // Search for free register(s). - // Note that the graph coloring allocator assumes that pair intervals are aligned here, - // excluding pre-colored pair intervals (which can currently be unaligned on x86). - std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes()); size_t reg = 0; - if (interval->HasHighInterval()) { - while (reg < num_regs - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) { - reg += 2; + + InterferenceNode* alias = node->GetAlias(); + if (alias != node) { + // This node was coalesced with another. + LiveInterval* alias_interval = alias->GetInterval(); + if (alias_interval->HasRegister()) { + reg = alias_interval->GetRegister(); + DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg]) + << "This node conflicts with the register it was coalesced with"; + } else { + DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " " + << "Move coalescing was not conservative, causing a node to be coalesced " + << "with another node that could not be colored"; + if (interval->RequiresRegister()) { + successful = false; + } } } else { - // We use CTZ (count trailing zeros) to quickly find the lowest available register. - // Note that CTZ is undefined for 0, so we special-case it. - reg = conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong()); + // Search for free register(s). + std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes()); + if (interval->HasHighInterval()) { + // Note that the graph coloring allocator assumes that pair intervals are aligned here, + // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we + // change the alignment requirements here, we will have to update the algorithm (e.g., + // be more conservative about the weight of edges adjacent to pair nodes.) + while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) { + reg += 2; + } + + // Try to use a caller-save register first. + for (size_t i = 0; i < num_regs_ - 1; i += 2) { + bool low_caller_save = register_allocator_->IsCallerSave(i, processing_core_regs_); + bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_); + if (!conflict_mask[i] && !conflict_mask[i + 1]) { + if (low_caller_save && high_caller_save) { + reg = i; + break; + } else if (low_caller_save || high_caller_save) { + reg = i; + // Keep looking to try to get both parts in caller-save registers. + } + } + } + } else { + // Not a pair interval. + reg = FindFirstZeroInConflictMask(conflict_mask); + + // Try to use caller-save registers first. + for (size_t i = 0; i < num_regs_; ++i) { + if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) { + reg = i; + break; + } + } + } + + // Last-chance coalescing. + for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { + if (opportunity->stage == CoalesceStage::kDefunct) { + continue; + } + LiveInterval* other_interval = opportunity->node_a->GetAlias() == node + ? opportunity->node_b->GetAlias()->GetInterval() + : opportunity->node_a->GetAlias()->GetInterval(); + if (other_interval->HasRegister()) { + size_t coalesce_register = other_interval->GetRegister(); + if (interval->HasHighInterval()) { + if (!conflict_mask[coalesce_register] && + !conflict_mask[coalesce_register + 1] && + RegisterIsAligned(coalesce_register)) { + reg = coalesce_register; + break; + } + } else if (!conflict_mask[coalesce_register]) { + reg = coalesce_register; + break; + } + } + } } - if (reg < (interval->HasHighInterval() ? num_regs - 1 : num_regs)) { + if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) { // Assign register. DCHECK(!interval->HasRegister()); interval->SetRegister(reg); @@ -930,12 +1905,12 @@ bool RegisterAllocatorGraphColor::ColorInterferenceGraph( // The interference graph is too dense to color. Make it sparser by // splitting this live interval. successful = false; - SplitAtRegisterUses(interval); + register_allocator_->SplitAtRegisterUses(interval); // We continue coloring, because there may be additional intervals that cannot // be colored, and that we should split. } else { // Spill. - AllocateSpillSlotFor(interval); + register_allocator_->AllocateSpillSlotFor(interval); } } diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h index 0b5af96b40..9dddcea685 100644 --- a/compiler/optimizing/register_allocator_graph_color.h +++ b/compiler/optimizing/register_allocator_graph_color.h @@ -34,6 +34,8 @@ class HParallelMove; class Location; class SsaLivenessAnalysis; class InterferenceNode; +struct CoalesceOpportunity; +enum class CoalesceKind; /** * A graph coloring register allocator. @@ -60,6 +62,25 @@ class InterferenceNode; * sparser, so that future coloring attempts may succeed. * - If the node does not require a register, we simply assign it a location on the stack. * + * If iterative move coalescing is enabled, the algorithm also attempts to conservatively + * combine nodes in the graph that would prefer to have the same color. (For example, the output + * of a phi instruction would prefer to have the same register as at least one of its inputs.) + * There are several additional steps involved with this: + * - We look for coalesce opportunities by examining each live interval, a step similar to that + * used by linear scan when looking for register hints. + * - When pruning the graph, we maintain a worklist of coalesce opportunities, as well as a worklist + * of low degree nodes that have associated coalesce opportunities. Only when we run out of + * coalesce opportunities do we start pruning coalesce-associated nodes. + * - When pruning a node, if any nodes transition from high degree to low degree, we add + * associated coalesce opportunities to the worklist, since these opportunities may now succeed. + * - Whether two nodes can be combined is decided by two different heuristics--one used when + * coalescing uncolored nodes, and one used for coalescing an uncolored node with a colored node. + * It is vital that we only combine two nodes if the node that remains is guaranteed to receive + * a color. This is because additionally spilling is more costly than failing to coalesce. + * - Even if nodes are not coalesced while pruning, we keep the coalesce opportunities around + * to be used as last-chance register hints when coloring. If nothing else, we try to use + * caller-save registers before callee-save registers. + * * A good reference for graph coloring register allocation is * "Modern Compiler Implementation in Java" (Andrew W. Appel, 2nd Edition). */ @@ -67,7 +88,8 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { public: RegisterAllocatorGraphColor(ArenaAllocator* allocator, CodeGenerator* codegen, - const SsaLivenessAnalysis& analysis); + const SsaLivenessAnalysis& analysis, + bool iterative_move_coalescing = true); ~RegisterAllocatorGraphColor() OVERRIDE {} void AllocateRegisters() OVERRIDE; @@ -116,26 +138,7 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { void BlockRegister(Location location, size_t start, size_t end); void BlockRegisters(size_t start, size_t end, bool caller_save_only = false); - // Use the intervals collected from instructions to construct an - // interference graph mapping intervals to adjacency lists. - // Also, collect synthesized safepoint nodes, used to keep - // track of live intervals across safepoints. - void BuildInterferenceGraph(const ArenaVector<LiveInterval*>& intervals, - ArenaVector<InterferenceNode*>* prunable_nodes, - ArenaVector<InterferenceNode*>* safepoints); - - // Prune nodes from the interference graph to be colored later. Build - // a stack (pruned_nodes) containing these intervals in an order determined - // by various heuristics. - void PruneInterferenceGraph(const ArenaVector<InterferenceNode*>& prunable_nodes, - size_t num_registers, - ArenaStdStack<InterferenceNode*>* pruned_nodes); - - // Process pruned_intervals to color the interference graph, spilling when - // necessary. Return true if successful. Else, split some intervals to make - // the interference graph sparser. - bool ColorInterferenceGraph(ArenaStdStack<InterferenceNode*>* pruned_nodes, - size_t num_registers); + bool IsCallerSave(size_t reg, bool processing_core_regs); // Return the maximum number of registers live at safepoints, // based on the outgoing interference edges of safepoint nodes. @@ -145,6 +148,10 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { // and make sure it's ready to be spilled to the stack. void AllocateSpillSlotFor(LiveInterval* interval); + // Whether iterative move coalescing should be performed. Iterative move coalescing + // improves code quality, but increases compile time. + const bool iterative_move_coalescing_; + // Live intervals, split by kind (core and floating point). // These should not contain high intervals, as those are represented by // the corresponding low interval throughout register allocation. @@ -157,10 +164,10 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { // Safepoints, saved for special handling while processing instructions. ArenaVector<HInstruction*> safepoints_; - // Live intervals for specific registers. These become pre-colored nodes + // Interference nodes representing specific registers. These are "pre-colored" nodes // in the interference graph. - ArenaVector<LiveInterval*> physical_core_intervals_; - ArenaVector<LiveInterval*> physical_fp_intervals_; + ArenaVector<InterferenceNode*> physical_core_nodes_; + ArenaVector<InterferenceNode*> physical_fp_nodes_; // Allocated stack slot counters. size_t int_spill_slot_counter_; @@ -184,10 +191,7 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { size_t max_safepoint_live_core_regs_; size_t max_safepoint_live_fp_regs_; - // An arena allocator used for a single graph coloring attempt. - // Many data structures are cleared between graph coloring attempts, so we reduce - // total memory usage by using a new arena allocator for each attempt. - ArenaAllocator* coloring_attempt_allocator_; + friend class ColoringIteration; DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor); }; diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 346753b775..92788fe6b8 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -514,7 +514,9 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { // Whether the interval requires a register rather than a stack location. // If needed for performance, this could be cached. - bool RequiresRegister() const { return FirstRegisterUse() != kNoLifetime; } + bool RequiresRegister() const { + return !HasRegister() && FirstRegisterUse() != kNoLifetime; + } size_t FirstUseAfter(size_t position) const { if (is_temp_) { diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index ff0bbafb9a..86548e153b 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -680,6 +680,8 @@ class ArmAssembler : public Assembler { virtual void vpushd(DRegister reg, int nregs, Condition cond = AL) = 0; virtual void vpops(SRegister reg, int nregs, Condition cond = AL) = 0; virtual void vpopd(DRegister reg, int nregs, Condition cond = AL) = 0; + virtual void vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) = 0; + virtual void vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) = 0; // Branch instructions. virtual void b(Label* label, Condition cond = AL) = 0; diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc index 6f9d5f32af..b8eb60c387 100644 --- a/compiler/utils/arm/assembler_arm32.cc +++ b/compiler/utils/arm/assembler_arm32.cc @@ -1106,6 +1106,18 @@ void Arm32Assembler::vpopd(DRegister reg, int nregs, Condition cond) { } +void Arm32Assembler::vldmiad(Register, DRegister, int, Condition) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + + +void Arm32Assembler::vstmiad(Register, DRegister, int, Condition) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + + void Arm32Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond) { CHECK_NE(cond, kNoCondition); CHECK_GT(nregs, 0); diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index 044eaa1edf..0cb6b171ce 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -212,6 +212,8 @@ class Arm32Assembler FINAL : public ArmAssembler { void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; + void vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE; + void vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE; // Branch instructions. void b(Label* label, Condition cond = AL) OVERRIDE; diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index ee69698ce8..ebdfc98554 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -3020,9 +3020,49 @@ void Thumb2Assembler::vpopd(DRegister reg, int nregs, Condition cond) { } +void Thumb2Assembler::vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond) { + int32_t rest = B23; + EmitVLdmOrStm(rest, + static_cast<uint32_t>(reg), + nregs, + base_reg, + /*is_load*/ true, + /*dbl*/ true, + cond); +} + + +void Thumb2Assembler::vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond) { + int32_t rest = B23; + EmitVLdmOrStm(rest, + static_cast<uint32_t>(reg), + nregs, + base_reg, + /*is_load*/ false, + /*dbl*/ true, + cond); +} + + void Thumb2Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond) { + int32_t rest = B21 | (push ? B24 : B23); + EmitVLdmOrStm(rest, reg, nregs, SP, /*is_load*/ !push, dbl, cond); +} + + +void Thumb2Assembler::EmitVLdmOrStm(int32_t rest, + uint32_t reg, + int nregs, + Register rn, + bool is_load, + bool dbl, + Condition cond) { CheckCondition(cond); + DCHECK_GT(nregs, 0); + DCHECK_LE(reg + nregs, 32u); + DCHECK(!dbl || (nregs <= 16)); + uint32_t D; uint32_t Vd; if (dbl) { @@ -3034,14 +3074,17 @@ void Thumb2Assembler::EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, D = reg & 1; Vd = (reg >> 1) & 15U /* 0b1111 */; } - int32_t encoding = B27 | B26 | B21 | B19 | B18 | B16 | - B11 | B9 | - (dbl ? B8 : 0) | - (push ? B24 : (B23 | B20)) | - 14U /* 0b1110 */ << 28 | - nregs << (dbl ? 1 : 0) | - D << 22 | - Vd << 12; + + int32_t encoding = rest | + 14U /* 0b1110 */ << 28 | + B27 | B26 | B11 | B9 | + (is_load ? B20 : 0) | + static_cast<int16_t>(rn) << 16 | + D << 22 | + Vd << 12 | + (dbl ? B8 : 0) | + nregs << (dbl ? 1 : 0); + Emit32(encoding); } diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 1c1c98b52b..13f3becb6d 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -258,6 +258,8 @@ class Thumb2Assembler FINAL : public ArmAssembler { void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vpopd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; + void vldmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE; + void vstmiad(Register base_reg, DRegister reg, int nregs, Condition cond = AL) OVERRIDE; // Branch instructions. void b(Label* label, Condition cond = AL); @@ -748,6 +750,14 @@ class Thumb2Assembler FINAL : public ArmAssembler { SRegister sn, SRegister sm); + void EmitVLdmOrStm(int32_t rest, + uint32_t reg, + int nregs, + Register rn, + bool is_load, + bool dbl, + Condition cond); + void EmitVFPddd(Condition cond, int32_t opcode, DRegister dd, diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 3ca37145d5..d0799d6112 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -1611,4 +1611,46 @@ TEST_F(AssemblerThumb2Test, LoadFromShiftedRegOffset) { DriverStr(expected, "LoadFromShiftedRegOffset"); } +TEST_F(AssemblerThumb2Test, VStmLdmPushPop) { + // Different D register numbers are used here, to test register encoding. + // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd, + // For source and destination registers which use D0..D15, the M bit and D bit should be 0. + // For source and destination registers which use D16..D32, the M bit and D bit should be 1. + // Different data types (signed and unsigned) are also tested. + __ vstmiad(arm::R0, arm::D0, 4); + __ vldmiad(arm::R1, arm::D9, 5); + __ vpopd(arm::D0, 4); + __ vpushd(arm::D9, 5); + __ vpops(arm::S0, 4); + __ vpushs(arm::S9, 5); + __ vpushs(arm::S16, 5); + __ vpushd(arm::D0, 16); + __ vpushd(arm::D1, 15); + __ vpushd(arm::D8, 16); + __ vpushd(arm::D31, 1); + __ vpushs(arm::S0, 32); + __ vpushs(arm::S1, 31); + __ vpushs(arm::S16, 16); + __ vpushs(arm::S31, 1); + + std::string expected = + "vstmia r0, {d0 - d3}\n" + "vldmia r1, {d9 - d13}\n" + "vpop {d0 - d3}\n" + "vpush {d9 - d13}\n" + "vpop {s0 - s3}\n" + "vpush {s9 - s13}\n" + "vpush {s16 - s20}\n" + "vpush {d0 - d15}\n" + "vpush {d1 - d15}\n" + "vpush {d8 - d23}\n" + "vpush {d31}\n" + "vpush {s0 - s31}\n" + "vpush {s1 - s31}\n" + "vpush {s16 - s31}\n" + "vpush {s31}\n"; + + DriverStr(expected, "VStmLdmPushPop"); +} + } // namespace art diff --git a/compiler/utils/arm/jni_macro_assembler_arm.cc b/compiler/utils/arm/jni_macro_assembler_arm.cc index c03981653e..af5ebb4ce8 100644 --- a/compiler/utils/arm/jni_macro_assembler_arm.cc +++ b/compiler/utils/arm/jni_macro_assembler_arm.cc @@ -243,14 +243,16 @@ void ArmJNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedReg } void ArmJNIMacroAssembler::LoadRef(ManagedRegister mdest, - ManagedRegister base, + ManagedRegister mbase, MemberOffset offs, bool unpoison_reference) { + ArmManagedRegister base = mbase.AsArm(); ArmManagedRegister dst = mdest.AsArm(); - CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst; + CHECK(base.IsCoreRegister()) << base; + CHECK(dst.IsCoreRegister()) << dst; __ LoadFromOffset(kLoadWord, dst.AsCoreRegister(), - base.AsArm().AsCoreRegister(), + base.AsCoreRegister(), offs.Int32Value()); if (unpoison_reference) { __ MaybeUnpoisonHeapReference(dst.AsCoreRegister()); @@ -263,13 +265,16 @@ void ArmJNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) { __ LoadFromOffset(kLoadWord, dst.AsCoreRegister(), SP, src.Int32Value()); } -void ArmJNIMacroAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, - Offset offs) { +void ArmJNIMacroAssembler::LoadRawPtr(ManagedRegister mdest, + ManagedRegister mbase, + Offset offs) { + ArmManagedRegister base = mbase.AsArm(); ArmManagedRegister dst = mdest.AsArm(); - CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst; + CHECK(base.IsCoreRegister()) << base; + CHECK(dst.IsCoreRegister()) << dst; __ LoadFromOffset(kLoadWord, dst.AsCoreRegister(), - base.AsArm().AsCoreRegister(), + base.AsCoreRegister(), offs.Int32Value()); } @@ -530,8 +535,9 @@ void ArmJNIMacroAssembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_nul // TODO: not validating references. } -void ArmJNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, - ManagedRegister mscratch) { +void ArmJNIMacroAssembler::Call(ManagedRegister mbase, + Offset offset, + ManagedRegister mscratch) { ArmManagedRegister base = mbase.AsArm(); ArmManagedRegister scratch = mscratch.AsArm(); CHECK(base.IsCoreRegister()) << base; diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index 22221e752a..19450b3a32 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -36,7 +36,7 @@ void Arm64Assembler::FinalizeCode() { } size_t Arm64Assembler::CodeSize() const { - return vixl_masm_.GetBufferCapacity() - vixl_masm_.GetRemainingBufferSpace(); + return vixl_masm_.GetSizeOfCodeGenerated(); } const uint8_t* Arm64Assembler::CodeBufferBaseAddress() const { diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index 4e88e640e5..2847cb86a8 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -27,13 +27,11 @@ #include "utils/assembler.h" #include "offsets.h" -// TODO: make vixl clean wrt -Wshadow, -Wunknown-pragmas, -Wmissing-noreturn +// TODO(VIXL): Make VIXL compile with -Wshadow. #pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunknown-pragmas" #pragma GCC diagnostic ignored "-Wshadow" -#pragma GCC diagnostic ignored "-Wmissing-noreturn" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +#include "aarch64/disasm-aarch64.h" +#include "aarch64/macro-assembler-aarch64.h" #pragma GCC diagnostic pop namespace art { diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h index 79ee441144..b9f6854b01 100644 --- a/compiler/utils/arm64/jni_macro_assembler_arm64.h +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h @@ -29,12 +29,10 @@ #include "utils/jni_macro_assembler.h" #include "offsets.h" -// TODO: make vixl clean wrt -Wshadow, -Wunknown-pragmas, -Wmissing-noreturn +// TODO(VIXL): Make VIXL compile with -Wshadow. #pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunknown-pragmas" #pragma GCC diagnostic ignored "-Wshadow" -#pragma GCC diagnostic ignored "-Wmissing-noreturn" -#include "a64/macro-assembler-a64.h" +#include "aarch64/macro-assembler-aarch64.h" #pragma GCC diagnostic pop namespace art { diff --git a/compiler/utils/x86/constants_x86.h b/compiler/utils/x86/constants_x86.h index 2dfb65c479..0bc1560ed7 100644 --- a/compiler/utils/x86/constants_x86.h +++ b/compiler/utils/x86/constants_x86.h @@ -97,6 +97,8 @@ enum Condition { kNotZero = kNotEqual, kNegative = kSign, kPositive = kNotSign, + kCarrySet = kBelow, + kCarryClear = kAboveEqual, kUnordered = kParityEven }; diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h index 37db6b1543..cc508a196b 100644 --- a/compiler/utils/x86_64/constants_x86_64.h +++ b/compiler/utils/x86_64/constants_x86_64.h @@ -106,6 +106,8 @@ enum Condition { kNotZero = kNotEqual, kNegative = kSign, kPositive = kNotSign, + kCarrySet = kBelow, + kCarryClear = kAboveEqual, kUnordered = kParityEven }; diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc index 47fb59b1d8..3e687a7758 100644 --- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc +++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc @@ -288,21 +288,27 @@ void X86_64JNIMacroAssembler::LoadRef(ManagedRegister mdest, FrameOffset src) { } void X86_64JNIMacroAssembler::LoadRef(ManagedRegister mdest, - ManagedRegister base, + ManagedRegister mbase, MemberOffset offs, bool unpoison_reference) { + X86_64ManagedRegister base = mbase.AsX86_64(); X86_64ManagedRegister dest = mdest.AsX86_64(); - CHECK(dest.IsCpuRegister() && dest.IsCpuRegister()); - __ movl(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs)); + CHECK(base.IsCpuRegister()); + CHECK(dest.IsCpuRegister()); + __ movl(dest.AsCpuRegister(), Address(base.AsCpuRegister(), offs)); if (unpoison_reference) { __ MaybeUnpoisonHeapReference(dest.AsCpuRegister()); } } -void X86_64JNIMacroAssembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) { +void X86_64JNIMacroAssembler::LoadRawPtr(ManagedRegister mdest, + ManagedRegister mbase, + Offset offs) { + X86_64ManagedRegister base = mbase.AsX86_64(); X86_64ManagedRegister dest = mdest.AsX86_64(); - CHECK(dest.IsCpuRegister() && dest.IsCpuRegister()); - __ movq(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs)); + CHECK(base.IsCpuRegister()); + CHECK(dest.IsCpuRegister()); + __ movq(dest.AsCpuRegister(), Address(base.AsCpuRegister(), offs)); } void X86_64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) { diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk index f5f02cd966..37acef666e 100644 --- a/dex2oat/Android.mk +++ b/dex2oat/Android.mk @@ -62,7 +62,6 @@ DEX2OAT_STATIC_DEPENDENCIES := \ libnativebridge \ libnativeloader \ libsigchain_dummy \ - libvixl-arm64 \ liblog \ libz \ libbacktrace \ @@ -83,14 +82,14 @@ DEX2OAT_STATIC_DEPENDENCIES := \ ifeq ($(ART_BUILD_HOST_NDEBUG),true) $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler libsigchain libziparchive-host liblz4,art/compiler,host,ndebug,$(dex2oat_host_arch))) ifeq ($(ART_BUILD_HOST_STATIC),true) - $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart $(DEX2OAT_STATIC_DEPENDENCIES),art/compiler,host,ndebug,$(dex2oat_host_arch),static)) + $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart libvixl-arm64 $(DEX2OAT_STATIC_DEPENDENCIES),art/compiler,host,ndebug,$(dex2oat_host_arch),static)) endif endif ifeq ($(ART_BUILD_HOST_DEBUG),true) $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain libziparchive-host liblz4,art/compiler,host,debug,$(dex2oat_host_arch))) ifeq ($(ART_BUILD_HOST_STATIC),true) - $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd $(DEX2OAT_STATIC_DEPENDENCIES),art/compiler,host,debug,$(dex2oat_host_arch),static)) + $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd libvixld-arm64 $(DEX2OAT_STATIC_DEPENDENCIES),art/compiler,host,debug,$(dex2oat_host_arch),static)) endif endif diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index eb11f6d3d7..cfcfe1c999 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -1576,7 +1576,7 @@ class Dex2Oat FINAL { IsAppImage(), image_classes_.release(), compiled_classes_.release(), - /* compiled_methods */ nullptr, + compiled_methods_.release(), thread_count_, dump_stats_, dump_passes_, diff --git a/disassembler/Android.mk b/disassembler/Android.mk index 778fe8ee96..db327fcdb6 100644 --- a/disassembler/Android.mk +++ b/disassembler/Android.mk @@ -90,9 +90,9 @@ define build-libart-disassembler LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE) # For disassembler_arm64. ifeq ($$(art_ndebug_or_debug),debug) - LOCAL_SHARED_LIBRARIES += libvixl-arm64 + LOCAL_SHARED_LIBRARIES += libvixld-arm64 else - LOCAL_SHARED_LIBRARIES += libvixl-arm64 + LOCAL_SHARED_LIBRARIES += libvixl-arm64 endif ifeq ($$(art_target_or_host),target) include $(BUILD_SHARED_LIBRARY) diff --git a/disassembler/disassembler_arm64.h b/disassembler/disassembler_arm64.h index c64d8eaf9d..7c64792b13 100644 --- a/disassembler/disassembler_arm64.h +++ b/disassembler/disassembler_arm64.h @@ -19,10 +19,11 @@ #include "disassembler.h" +// TODO(VIXL): Make VIXL compile with -Wshadow. #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshadow" -#include "a64/decoder-a64.h" -#include "a64/disasm-a64.h" +#include "aarch64/decoder-aarch64.h" +#include "aarch64/disasm-aarch64.h" #pragma GCC diagnostic pop namespace art { diff --git a/runtime/Android.mk b/runtime/Android.mk index 2f8b11361c..b31eaf60d8 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -164,6 +164,7 @@ LIBART_COMMON_SRC_FILES := \ offsets.cc \ os_linux.cc \ parsed_options.cc \ + plugin.cc \ primitive.cc \ quick_exception_handler.cc \ quick/inline_method_analyser.cc \ @@ -177,6 +178,7 @@ LIBART_COMMON_SRC_FILES := \ thread.cc \ thread_list.cc \ thread_pool.cc \ + ti/agent.cc \ trace.cc \ transaction.cc \ type_lookup_table.cc \ @@ -370,6 +372,7 @@ LIBART_ENUM_OPERATOR_OUT_HEADER_FILES := \ stack.h \ thread.h \ thread_state.h \ + ti/agent.h \ verifier/method_verifier.h LIBOPENJDKJVM_SRC_FILES := openjdkjvm/OpenjdkJvm.cc @@ -419,7 +422,7 @@ define build-runtime-library endif ifneq ($(4),libart) ifneq ($(4),libopenjdkjvm) - $$(error expected libart of libopenjdkjvm for argument 4, received $(4)) + $$(error expected libart or libopenjdkjvm for argument 4, received $(4)) endif endif diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc index 0e2a6720ae..492a12d02b 100644 --- a/runtime/arch/arm/entrypoints_init_arm.cc +++ b/runtime/arch/arm/entrypoints_init_arm.cc @@ -133,7 +133,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09; qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10; qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11; - qpoints->pReadBarrierMarkReg12 = art_quick_read_barrier_mark_reg12; + qpoints->pReadBarrierMarkReg12 = nullptr; // Cannot use register 12 (IP) to pass arguments. qpoints->pReadBarrierMarkReg13 = nullptr; // Cannot use register 13 (SP) to pass arguments. qpoints->pReadBarrierMarkReg14 = nullptr; // Cannot use register 14 (LR) to pass arguments. qpoints->pReadBarrierMarkReg15 = nullptr; // Cannot use register 15 (PC) to pass arguments. diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 0fcf866e18..c4ec72685f 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -191,7 +191,7 @@ .cfi_rel_offset r11, 44 .cfi_rel_offset ip, 48 .cfi_rel_offset lr, 52 - vpush {s0-s31} @ 32 words of float args. + vpush {d0-d15} @ 32 words of float args. .cfi_adjust_cfa_offset 128 sub sp, #8 @ 2 words of space, alignment padding and Method* .cfi_adjust_cfa_offset 8 @@ -210,7 +210,7 @@ .macro RESTORE_SAVE_EVERYTHING_FRAME add sp, #8 @ rewind sp .cfi_adjust_cfa_offset -8 - vpop {s0-s31} + vpop {d0-d15} .cfi_adjust_cfa_offset -128 pop {r0-r12, lr} @ 14 words of callee saves .cfi_restore r0 @@ -1246,9 +1246,15 @@ ENTRY art_quick_alloc_object_region_tlab ldr r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT] // Read barrier for class load. ldr r3, [r9, #THREAD_IS_GC_MARKING_OFFSET] - cbnz r3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path + cbnz r3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit: ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path +.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking: + cbz r2, .Lart_quick_alloc_object_region_tlab_slow_path // Null check for loading lock word. + // Check lock word for mark bit, if marked do the allocation. + ldr r3, [r2, MIRROR_OBJECT_LOCK_WORD_OFFSET] + ands r3, #LOCK_WORD_MARK_BIT_MASK_SHIFTED + bne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path: // The read barrier slow path. Mark // the class. @@ -1817,6 +1823,39 @@ ENTRY art_quick_l2f pop {pc} END art_quick_l2f +.macro CONDITIONAL_CBZ reg, reg_if, dest +.ifc \reg, \reg_if + cbz \reg, \dest +.endif +.endm + +.macro CONDITIONAL_CMPBZ reg, reg_if, dest +.ifc \reg, \reg_if + cmp \reg, #0 + beq \dest +.endif +.endm + +// Use CBZ if the register is in {r0, r7} otherwise compare and branch. +.macro SMART_CBZ reg, dest + CONDITIONAL_CBZ \reg, r0, \dest + CONDITIONAL_CBZ \reg, r1, \dest + CONDITIONAL_CBZ \reg, r2, \dest + CONDITIONAL_CBZ \reg, r3, \dest + CONDITIONAL_CBZ \reg, r4, \dest + CONDITIONAL_CBZ \reg, r5, \dest + CONDITIONAL_CBZ \reg, r6, \dest + CONDITIONAL_CBZ \reg, r7, \dest + CONDITIONAL_CMPBZ \reg, r8, \dest + CONDITIONAL_CMPBZ \reg, r9, \dest + CONDITIONAL_CMPBZ \reg, r10, \dest + CONDITIONAL_CMPBZ \reg, r11, \dest + CONDITIONAL_CMPBZ \reg, r12, \dest + CONDITIONAL_CMPBZ \reg, r13, \dest + CONDITIONAL_CMPBZ \reg, r14, \dest + CONDITIONAL_CMPBZ \reg, r15, \dest +.endm + /* * Create a function `name` calling the ReadBarrier::Mark routine, * getting its argument and returning its result through register @@ -1835,28 +1874,25 @@ END art_quick_l2f .macro READ_BARRIER_MARK_REG name, reg ENTRY \name // Null check so that we can load the lock word. - cmp \reg, #0 - beq .Lret_rb_\name - // Check lock word for mark bit, if marked return. - push {r0} - ldr r0, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET] - and r0, #LOCK_WORD_MARK_BIT_MASK_SHIFTED - cbz r0, .Lslow_rb_\name - // Restore LR and return. - pop {r0} - bx lr + SMART_CBZ \reg, .Lret_rb_\name + // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked. + ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET] + ands ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED + beq .Lslow_rb_\name + // Already marked, return right away. + bx lr .Lslow_rb_\name: - pop {r0} - push {r0-r4, r9, r12, lr} @ save return address and core caller-save registers + push {r0-r5, r9, lr} @ save return address and core caller-save registers + @ also save callee save r5 for 16 byte alignment .cfi_adjust_cfa_offset 32 .cfi_rel_offset r0, 0 .cfi_rel_offset r1, 4 .cfi_rel_offset r2, 8 .cfi_rel_offset r3, 12 .cfi_rel_offset r4, 16 - .cfi_rel_offset r9, 20 - .cfi_rel_offset r12, 24 + .cfi_rel_offset r5, 20 + .cfi_rel_offset r9, 24 .cfi_rel_offset lr, 28 vpush {s0-s15} @ save floating-point caller-save registers .cfi_adjust_cfa_offset 64 @@ -1865,48 +1901,11 @@ ENTRY \name mov r0, \reg @ pass arg1 - obj from `reg` .endif bl artReadBarrierMark @ r0 <- artReadBarrierMark(obj) - + mov ip, r0 @ Save result in IP vpop {s0-s15} @ restore floating-point registers .cfi_adjust_cfa_offset -64 - @ If `reg` is a caller-save register, save the result to its - @ corresponding stack slot; it will be restored by the "pop" - @ instruction below. Otherwise, move result into `reg`. - @ - @ (Note that saving `reg` to its stack slot will overwrite the value - @ previously stored by the "push" instruction above. That is - @ alright, as in that case we know that `reg` is not a live - @ register, as it is used to pass the argument and return the result - @ of this function.) - .ifc \reg, r0 - PUSH_REG r0, 0 @ copy result to r0's stack location - .else - .ifc \reg, r1 - PUSH_REG r0, 4 @ copy result to r1's stack location - .else - .ifc \reg, r2 - PUSH_REG r0, 8 @ copy result to r2's stack location - .else - .ifc \reg, r3 - PUSH_REG r0, 12 @ copy result to r3's stack location - .else - .ifc \reg, r4 - PUSH_REG r0, 16 @ copy result to r4's stack location - .else - .ifc \reg, r9 - PUSH_REG r0, 20 @ copy result to r9's stack location - .else - .ifc \reg, r12 - PUSH_REG r0, 24 @ copy result to r12's stack location - .else - mov \reg, r0 @ return result into `reg` - .endif - .endif - .endif - .endif - .endif - .endif - .endif - pop {r0-r4, r9, r12, pc} @ restore caller-save registers and return + pop {r0-r5, r9, lr} @ restore caller-save registers + mov \reg, ip @ copy result to reg .Lret_rb_\name: bx lr END \name @@ -1924,4 +1923,3 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12 diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc index cc5bf29609..55b09c318c 100644 --- a/runtime/arch/arm64/entrypoints_init_arm64.cc +++ b/runtime/arch/arm64/entrypoints_init_arm64.cc @@ -149,7 +149,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { qpoints->pReadBarrierMarkReg13 = art_quick_read_barrier_mark_reg13; qpoints->pReadBarrierMarkReg14 = art_quick_read_barrier_mark_reg14; qpoints->pReadBarrierMarkReg15 = art_quick_read_barrier_mark_reg15; - qpoints->pReadBarrierMarkReg16 = art_quick_read_barrier_mark_reg16; + qpoints->pReadBarrierMarkReg16 = nullptr; // IP0 is used as a temp by the asm stub. qpoints->pReadBarrierMarkReg17 = art_quick_read_barrier_mark_reg17; qpoints->pReadBarrierMarkReg18 = art_quick_read_barrier_mark_reg18; qpoints->pReadBarrierMarkReg19 = art_quick_read_barrier_mark_reg19; diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index bdad966496..4289cabbc6 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -331,22 +331,23 @@ #endif // Save FP registers. - stp d0, d1, [sp, #8] - stp d2, d3, [sp, #24] - stp d4, d5, [sp, #40] - stp d6, d7, [sp, #56] - stp d8, d9, [sp, #72] - stp d10, d11, [sp, #88] - stp d12, d13, [sp, #104] - stp d14, d15, [sp, #120] - stp d16, d17, [sp, #136] - stp d18, d19, [sp, #152] - stp d20, d21, [sp, #168] - stp d22, d23, [sp, #184] - stp d24, d25, [sp, #200] - stp d26, d27, [sp, #216] - stp d28, d29, [sp, #232] - stp d30, d31, [sp, #248] + str d0, [sp, #8] + stp d1, d2, [sp, #16] + stp d3, d4, [sp, #32] + stp d5, d6, [sp, #48] + stp d7, d8, [sp, #64] + stp d9, d10, [sp, #80] + stp d11, d12, [sp, #96] + stp d13, d14, [sp, #112] + stp d15, d16, [sp, #128] + stp d17, d18, [sp, #144] + stp d19, d20, [sp, #160] + stp d21, d22, [sp, #176] + stp d23, d24, [sp, #192] + stp d25, d26, [sp, #208] + stp d27, d28, [sp, #224] + stp d29, d30, [sp, #240] + str d31, [sp, #256] // Save core registers. str x0, [sp, #264] @@ -430,22 +431,23 @@ .macro RESTORE_SAVE_EVERYTHING_FRAME // Restore FP registers. - ldp d0, d1, [sp, #8] - ldp d2, d3, [sp, #24] - ldp d4, d5, [sp, #40] - ldp d6, d7, [sp, #56] - ldp d8, d9, [sp, #72] - ldp d10, d11, [sp, #88] - ldp d12, d13, [sp, #104] - ldp d14, d15, [sp, #120] - ldp d16, d17, [sp, #136] - ldp d18, d19, [sp, #152] - ldp d20, d21, [sp, #168] - ldp d22, d23, [sp, #184] - ldp d24, d25, [sp, #200] - ldp d26, d27, [sp, #216] - ldp d28, d29, [sp, #232] - ldp d30, d31, [sp, #248] + ldr d0, [sp, #8] + ldp d1, d2, [sp, #16] + ldp d3, d4, [sp, #32] + ldp d5, d6, [sp, #48] + ldp d7, d8, [sp, #64] + ldp d9, d10, [sp, #80] + ldp d11, d12, [sp, #96] + ldp d13, d14, [sp, #112] + ldp d15, d16, [sp, #128] + ldp d17, d18, [sp, #144] + ldp d19, d20, [sp, #160] + ldp d21, d22, [sp, #176] + ldp d23, d24, [sp, #192] + ldp d25, d26, [sp, #208] + ldp d27, d28, [sp, #224] + ldp d29, d30, [sp, #240] + ldr d31, [sp, #256] // Restore core registers. ldr x0, [sp, #264] @@ -1939,10 +1941,13 @@ END art_quick_alloc_object_rosalloc // (for 64 bit alignment). and \xTemp0, \xTemp0, #4 add \xTemp1, \xTemp1, \xTemp0 - and \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED // Round up the object size by the - // object alignment. (addr + 7) & ~7. - // Add by 7 is done above. - + and \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignemnt mask + // (addr + 7) & ~7. The mask must + // be 64 bits to keep high bits in + // case of overflow. + // Negative sized arrays are handled here since xCount holds a zero extended 32 bit value. + // Negative ints become large 64 bit unsigned ints which will always be larger than max signed + // 32 bit int. Since the max shift for arrays is 3, it can not become a negative 64 bit int. cmp \xTemp1, #MIN_LARGE_OBJECT_THRESHOLD // Possibly a large object, go slow bhs \slowPathLabel // path. @@ -1956,7 +1961,6 @@ END art_quick_alloc_object_rosalloc sub \xTemp2, \xTemp2, \xTemp0 cmp \xTemp1, \xTemp2 bhi \slowPathLabel - // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1. // Move old thread_local_pos to x0 // for the return value. @@ -2747,7 +2751,7 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15 -READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16 +// READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16 ip0 is blocked READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19 diff --git a/runtime/arch/instruction_set.cc b/runtime/arch/instruction_set.cc index 81ca010423..b35e0889e4 100644 --- a/runtime/arch/instruction_set.cc +++ b/runtime/arch/instruction_set.cc @@ -18,6 +18,7 @@ // Explicitly include our own elf.h to avoid Linux and other dependencies. #include "../elf.h" +#include "base/bit_utils.h" #include "globals.h" namespace art { @@ -113,14 +114,44 @@ size_t GetInstructionSetAlignment(InstructionSet isa) { } } -static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB; -static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes; -static constexpr size_t kMips64StackOverflowReservedBytes = kDefaultStackOverflowReservedBytes; - -static constexpr size_t kArmStackOverflowReservedBytes = 8 * KB; -static constexpr size_t kArm64StackOverflowReservedBytes = 8 * KB; -static constexpr size_t kX86StackOverflowReservedBytes = 8 * KB; -static constexpr size_t kX86_64StackOverflowReservedBytes = 8 * KB; +#if !defined(ART_STACK_OVERFLOW_GAP_arm) || !defined(ART_STACK_OVERFLOW_GAP_arm64) || \ + !defined(ART_STACK_OVERFLOW_GAP_mips) || !defined(ART_STACK_OVERFLOW_GAP_mips64) || \ + !defined(ART_STACK_OVERFLOW_GAP_x86) || !defined(ART_STACK_OVERFLOW_GAP_x86_64) +#error "Missing defines for stack overflow gap" +#endif + +static constexpr size_t kArmStackOverflowReservedBytes = ART_STACK_OVERFLOW_GAP_arm; +static constexpr size_t kArm64StackOverflowReservedBytes = ART_STACK_OVERFLOW_GAP_arm64; +static constexpr size_t kMipsStackOverflowReservedBytes = ART_STACK_OVERFLOW_GAP_mips; +static constexpr size_t kMips64StackOverflowReservedBytes = ART_STACK_OVERFLOW_GAP_mips64; +static constexpr size_t kX86StackOverflowReservedBytes = ART_STACK_OVERFLOW_GAP_x86; +static constexpr size_t kX86_64StackOverflowReservedBytes = ART_STACK_OVERFLOW_GAP_x86_64; + +static_assert(IsAligned<kPageSize>(kArmStackOverflowReservedBytes), "ARM gap not page aligned"); +static_assert(IsAligned<kPageSize>(kArm64StackOverflowReservedBytes), "ARM64 gap not page aligned"); +static_assert(IsAligned<kPageSize>(kMipsStackOverflowReservedBytes), "Mips gap not page aligned"); +static_assert(IsAligned<kPageSize>(kMips64StackOverflowReservedBytes), + "Mips64 gap not page aligned"); +static_assert(IsAligned<kPageSize>(kX86StackOverflowReservedBytes), "X86 gap not page aligned"); +static_assert(IsAligned<kPageSize>(kX86_64StackOverflowReservedBytes), + "X86_64 gap not page aligned"); + +#if !defined(ART_FRAME_SIZE_LIMIT) +#error "ART frame size limit missing" +#endif + +// TODO: Should we require an extra page (RoundUp(SIZE) + kPageSize)? +static_assert(ART_FRAME_SIZE_LIMIT < kArmStackOverflowReservedBytes, "Frame size limit too large"); +static_assert(ART_FRAME_SIZE_LIMIT < kArm64StackOverflowReservedBytes, + "Frame size limit too large"); +static_assert(ART_FRAME_SIZE_LIMIT < kMipsStackOverflowReservedBytes, + "Frame size limit too large"); +static_assert(ART_FRAME_SIZE_LIMIT < kMips64StackOverflowReservedBytes, + "Frame size limit too large"); +static_assert(ART_FRAME_SIZE_LIMIT < kX86StackOverflowReservedBytes, + "Frame size limit too large"); +static_assert(ART_FRAME_SIZE_LIMIT < kX86_64StackOverflowReservedBytes, + "Frame size limit too large"); size_t GetStackOverflowReservedBytes(InstructionSet isa) { switch (isa) { diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index ac8f5233da..32768b0263 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -910,7 +910,20 @@ MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION) END_MACRO // Generate the allocation entrypoints for each allocator. -GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR +GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS +// Comment out allocators that have x86_64 specific asm. +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB) +// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) +GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc). DEFINE_FUNCTION art_quick_alloc_object_rosalloc @@ -1003,6 +1016,14 @@ END_FUNCTION art_quick_alloc_object_rosalloc MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel) testl %edx, %edx // Check null class jz RAW_VAR(slowPathLabel) + ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel)) +END_MACRO + +// The common fast path code for art_quick_alloc_object_resolved_region_tlab. +// +// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value. +// RCX: scratch, r8: Thread::Current(). +MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel) // Check class status. cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx) jne RAW_VAR(slowPathLabel) @@ -1014,26 +1035,73 @@ MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel) // kAccClassIsFinalizable testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx) jnz RAW_VAR(slowPathLabel) - movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread - movq THREAD_LOCAL_END_OFFSET(%r8), %rax // Load thread_local_end. - subq THREAD_LOCAL_POS_OFFSET(%r8), %rax // Compute the remaining buffer size. - movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx // Load the object size. - cmpq %rax, %rcx // Check if it fits. OK to do this - // before rounding up the object size - // assuming the buf size alignment. + ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel)) +END_MACRO + +// The fast path code for art_quick_alloc_object_initialized_region_tlab. +// +// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value. +// RCX: scratch, r8: Thread::Current(). +MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel) + movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread + movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx // Load the object size. + movq THREAD_LOCAL_POS_OFFSET(%r8), %rax + leaq OBJECT_ALIGNMENT_MASK(%rax, %rcx), %rcx // Add size to pos, note that these + // are both 32 bit ints, overflow + // will cause the add to be past the + // end of the thread local region. + // Also sneak in alignment mask add. + andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %rcx // Align the size by 8. (addr + 7) & + // ~7. + cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx // Check if it fits. ja RAW_VAR(slowPathLabel) - addl LITERAL(OBJECT_ALIGNMENT_MASK), %ecx // Align the size by 8. (addr + 7) & ~7. - andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %ecx - movq THREAD_LOCAL_POS_OFFSET(%r8), %rax // Load thread_local_pos - // as allocated object. - addq %rax, %rcx // Add the object size. - movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8) // Update thread_local_pos. - addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8) // Increase thread_local_objects. - // Store the class pointer in the header. - // No fence needed for x86. + movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8) // Update thread_local_pos. + addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8) // Increase thread_local_objects. + // Store the class pointer in the + // header. + // No fence needed for x86. POISON_HEAP_REF edx movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax) - ret // Fast path succeeded. + ret // Fast path succeeded. +END_MACRO + +// The fast path code for art_quick_alloc_array_region_tlab. +// Inputs: RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod* method +// Temps: RCX: the class, r8, r9 +// Output: RAX: return value. +MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED, slowPathLabel) + movq %rcx, %r8 // Save class for later + movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %ecx // Load component type. + UNPOISON_HEAP_REF ecx + movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type. + shrq LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx // Get component size shift. + movq %rsi, %r9 + salq %cl, %r9 // Calculate array count shifted. + // Add array header + alignment rounding. + addq LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 + // Add 4 extra bytes if we are doing a long array. + addq LITERAL(1), %rcx + andq LITERAL(4), %rcx + addq %rcx, %r9 + movq %gs:THREAD_SELF_OFFSET, %rcx // rcx = thread +#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4 +#error Long array data offset must be 4 greater than int array data offset. +#endif + // Mask out the unaligned part to make sure we are 8 byte aligned. + andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %r9 + movq THREAD_LOCAL_POS_OFFSET(%rcx), %rax + addq %rax, %r9 + cmpq THREAD_LOCAL_END_OFFSET(%rcx), %r9 // Check if it fits. + ja RAW_VAR(slowPathLabel) + movq %r9, THREAD_LOCAL_POS_OFFSET(%rcx) // Update thread_local_pos. + addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%rcx) // Increase thread_local_objects. + // Store the class pointer in the + // header. + // No fence needed for x86. + POISON_HEAP_REF r8d + movl %r8d, MIRROR_OBJECT_CLASS_OFFSET(%rax) + movl %esi, MIRROR_ARRAY_LENGTH_OFFSET(%rax) + ret // Fast path succeeded. END_MACRO // The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab. @@ -1046,6 +1114,16 @@ MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name) RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception END_MACRO +// The slow path code for art_quick_alloc_array_region_tlab. +MACRO1(ALLOC_ARRAY_TLAB_SLOW_PATH, cxx_name) + SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC + // Outgoing argument set up + movq %gs:THREAD_SELF_OFFSET, %rcx // pass Thread::Current() + call CALLVAR(cxx_name) // cxx_name(arg0, arg1, arg2, Thread*) + RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address + RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception +END_MACRO + // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). DEFINE_FUNCTION art_quick_alloc_object_tlab // Fast path tlab allocation. @@ -1065,6 +1143,82 @@ DEFINE_FUNCTION art_quick_alloc_object_tlab ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB END_FUNCTION art_quick_alloc_object_tlab +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB). +DEFINE_FUNCTION art_quick_alloc_array_region_tlab + // Fast path region tlab allocation. + // RDI: uint32_t type_idx, RSI: int32_t component_count, RDX: ArtMethod* + // RCX: klass, R8, R9: free. RAX: return val. +#if !defined(USE_READ_BARRIER) + int3 + int3 +#endif + movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rdx), %rcx // Load dex cache resolved types array + movl 0(%rcx, %rdi, COMPRESSED_REFERENCE_SIZE), %ecx // Load the class + // Null check so that we can load the lock word. + testl %ecx, %ecx + jz .Lart_quick_alloc_array_region_tlab_slow_path + + cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET + jne .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking +.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit: + ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_region_tlab_slow_path +.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_marking: + // Check the mark bit, if it is 1 return. + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) + jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit +.Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path: + // The read barrier slow path. Mark the class. + PUSH rdi + PUSH rsi + PUSH rdx + // Outgoing argument set up + movq %rcx, %rdi // Pass the class as the first param. + call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj) + movq %rax, %rcx + POP rdx + POP rsi + POP rdi + jmp .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit +.Lart_quick_alloc_array_region_tlab_slow_path: + ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeRegionTLAB +END_FUNCTION art_quick_alloc_array_region_tlab + +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB). +DEFINE_FUNCTION art_quick_alloc_array_resolved_region_tlab + // Fast path region tlab allocation. + // RDI: mirror::Class* klass, RSI: int32_t component_count, RDX: ArtMethod* + // RCX: mirror::Class* klass, R8, R9: free. RAX: return val. +#if !defined(USE_READ_BARRIER) + int3 + int3 +#endif + movq %rdi, %rcx + // Already resolved, no null check. + cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET + jne .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking +.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit: + ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_array_resolved_region_tlab_slow_path +.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_marking: + // Check the mark bit, if it is 1 return. + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) + jnz .Lart_quick_alloc_array_region_tlab_class_load_read_barrier_slow_path_exit +.Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path: + // The read barrier slow path. Mark the class. + PUSH rdi + PUSH rsi + PUSH rdx + // Outgoing argument set up + movq %rcx, %rdi // Pass the class as the first param. + call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj) + movq %rax, %rcx + POP rdx + POP rsi + POP rdi + jmp .Lart_quick_alloc_array_resolved_region_tlab_class_load_read_barrier_slow_path_exit +.Lart_quick_alloc_array_resolved_region_tlab_slow_path: + ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedRegionTLAB +END_FUNCTION art_quick_alloc_array_resolved_region_tlab + // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB). DEFINE_FUNCTION art_quick_alloc_object_region_tlab // Fast path region tlab allocation. @@ -1074,29 +1228,30 @@ DEFINE_FUNCTION art_quick_alloc_object_region_tlab int3 int3 #endif - // Might need a special macro since rsi and edx is 32b/64b mismatched. movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx // Load dex cache resolved types array - // Might need to break down into multiple instructions to get the base address in a register. - // Load the class - movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx - cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET - jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit + movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx // Load the class // Null check so that we can load the lock word. testl %edx, %edx - jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit - // Check the mark bit, if it is 1 return. - testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) - jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path + jz .Lart_quick_alloc_object_region_tlab_slow_path + // Test if the GC is marking. + cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET + jne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit: ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path +.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking: + // Check the mark bit, if it is 1 avoid the read barrier. + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) + jnz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path: // The read barrier slow path. Mark the class. PUSH rdi PUSH rsi + subq LITERAL(8), %rsp // 16 byte alignment // Outgoing argument set up movq %rdx, %rdi // Pass the class as the first param. call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj) movq %rax, %rdx + addq LITERAL(8), %rsp POP rsi POP rdi jmp .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit @@ -1104,6 +1259,77 @@ DEFINE_FUNCTION art_quick_alloc_object_region_tlab ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB END_FUNCTION art_quick_alloc_object_region_tlab +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB). +DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab + // Fast path region tlab allocation. + // RDI: mirror::Class* klass, RSI: ArtMethod* + // RDX, RCX, R8, R9: free. RAX: return val. +#if !defined(USE_READ_BARRIER) + int3 + int3 +#endif + movq %rdi, %rdx + cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET + jne .Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_marking +.Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path_exit: + ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path +.Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_marking: + // Check the mark bit, if it is 1 avoid the read barrier. + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) + jnz .Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path_exit +.Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path: + // The read barrier slow path. Mark the class. + PUSH rdi + PUSH rsi + subq LITERAL(8), %rsp // 16 byte alignment + // Outgoing argument set up + movq %rdx, %rdi // Pass the class as the first param. + call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj) + movq %rax, %rdx + addq LITERAL(8), %rsp + POP rsi + POP rdi + jmp .Lart_quick_alloc_object_resolved_region_tlab_class_load_read_barrier_slow_path_exit +.Lart_quick_alloc_object_resolved_region_tlab_slow_path: + ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB +END_FUNCTION art_quick_alloc_object_resolved_region_tlab + +// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB). +DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab + // Fast path region tlab allocation. + // RDI: mirror::Class* klass, RSI: ArtMethod* + // RDX, RCX, R8, R9: free. RAX: return val. +#if !defined(USE_READ_BARRIER) + int3 + int3 +#endif + // Might need a special macro since rsi and edx is 32b/64b mismatched. + movq %rdi, %rdx + cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET + jne .Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_marking +.Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path_exit: + ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path +.Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_marking: + // Check the mark bit, if it is 1 avoid the read barrier. + testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) + jnz .Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path +.Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path: + // The read barrier slow path. Mark the class. + PUSH rdi + PUSH rsi + subq LITERAL(8), %rsp // 16 byte alignment + // Outgoing argument set up + movq %rdx, %rdi // Pass the class as the first param. + call SYMBOL(artReadBarrierMark) // cxx_name(mirror::Object* obj) + movq %rax, %rdx + addq LITERAL(8), %rsp + POP rsi + POP rdi + jmp .Lart_quick_alloc_object_initialized_region_tlab_class_load_read_barrier_slow_path_exit +.Lart_quick_alloc_object_initialized_region_tlab_slow_path: + ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB +END_FUNCTION art_quick_alloc_object_initialized_region_tlab + ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER diff --git a/runtime/base/logging.h b/runtime/base/logging.h index 6323eee53a..ac21a3f0ea 100644 --- a/runtime/base/logging.h +++ b/runtime/base/logging.h @@ -57,6 +57,7 @@ struct LogVerbosity { bool verifier; bool image; bool systrace_lock_logging; // Enabled with "-verbose:sys-locks". + bool agents; }; // Global log verbosity setting, initialized by InitLogging. diff --git a/runtime/base/macros.h b/runtime/base/macros.h index 3c43253e67..5a50247f5a 100644 --- a/runtime/base/macros.h +++ b/runtime/base/macros.h @@ -75,7 +75,7 @@ template<typename T> ART_FRIEND_TEST(test_set_name, individual_test) ALWAYS_INLINE void* operator new(size_t, void* ptr) noexcept { return ptr; } \ ALWAYS_INLINE void operator delete(void*, void*) noexcept { } \ private: \ - void* operator new(size_t) = delete // NOLINT + void* operator new(size_t) = delete // NOLINT // The arraysize(arr) macro returns the # of elements in an array arr. // The expression is a compile-time constant, and therefore can be @@ -135,13 +135,13 @@ char (&ArraySizeHelper(T (&array)[N]))[N]; #define ARRAYSIZE_UNSAFE(a) \ ((sizeof(a) / sizeof(*(a))) / static_cast<size_t>(!(sizeof(a) % sizeof(*(a))))) -#define SIZEOF_MEMBER(t, f) sizeof((reinterpret_cast<t*>(4096))->f) // NOLINT +#define SIZEOF_MEMBER(t, f) sizeof((reinterpret_cast<t*>(4096))->f) // NOLINT #define OFFSETOF_MEMBER(t, f) \ - (reinterpret_cast<uintptr_t>(&reinterpret_cast<t*>(16)->f) - static_cast<uintptr_t>(16u)) // NOLINT + (reinterpret_cast<uintptr_t>(&reinterpret_cast<t*>(16)->f) - static_cast<uintptr_t>(16u)) // NOLINT #define OFFSETOF_MEMBERPTR(t, f) \ - (reinterpret_cast<uintptr_t>(&(reinterpret_cast<t*>(16)->*f)) - static_cast<uintptr_t>(16)) // NOLINT + (reinterpret_cast<uintptr_t>(&(reinterpret_cast<t*>(16)->*f)) - static_cast<uintptr_t>(16)) // NOLINT #define PACKED(x) __attribute__ ((__aligned__(x), __packed__)) diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 46722ecad7..4d48da6a83 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -4630,18 +4630,23 @@ bool ClassLinker::InitializeClass(Thread* self, Handle<mirror::Class> klass, } else { value_it.ReadValueToField<false>(field); } + if (self->IsExceptionPending()) { + break; + } DCHECK(!value_it.HasNext() || field_it.HasNextStaticField()); } } } - ArtMethod* clinit = klass->FindClassInitializer(image_pointer_size_); - if (clinit != nullptr) { - CHECK(can_init_statics); - JValue result; - clinit->Invoke(self, nullptr, 0, &result, "V"); - } + if (!self->IsExceptionPending()) { + ArtMethod* clinit = klass->FindClassInitializer(image_pointer_size_); + if (clinit != nullptr) { + CHECK(can_init_statics); + JValue result; + clinit->Invoke(self, nullptr, 0, &result, "V"); + } + } self->AllowThreadSuspension(); uint64_t t1 = NanoTime(); diff --git a/runtime/experimental_flags.h b/runtime/experimental_flags.h index fde1a5f3ab..7faa2dc7e3 100644 --- a/runtime/experimental_flags.h +++ b/runtime/experimental_flags.h @@ -26,6 +26,8 @@ struct ExperimentalFlags { // The actual flag values. enum { kNone = 0x0000, + kAgents = 0x0001, // 0b00000001 + kRuntimePlugins = 0x0002, // 0b00000010 }; constexpr ExperimentalFlags() : value_(0x0000) {} @@ -61,9 +63,19 @@ struct ExperimentalFlags { uint32_t value_; }; -inline std::ostream& operator<<(std::ostream& stream, - const ExperimentalFlags& e ATTRIBUTE_UNUSED) { - stream << "kNone"; +inline std::ostream& operator<<(std::ostream& stream, const ExperimentalFlags& e) { + bool started = false; + if (e & ExperimentalFlags::kAgents) { + stream << (started ? "|" : "") << "kAgents"; + started = true; + } + if (e & ExperimentalFlags::kRuntimePlugins) { + stream << (started ? "|" : "") << "kRuntimePlugins"; + started = true; + } + if (!started) { + stream << "kNone"; + } return stream; } diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 88fbf781bc..b574c3bf3a 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -230,6 +230,9 @@ Heap::Heap(size_t initial_size, total_wait_time_(0), verify_object_mode_(kVerifyObjectModeDisabled), disable_moving_gc_count_(0), + semi_space_collector_(nullptr), + mark_compact_collector_(nullptr), + concurrent_copying_collector_(nullptr), is_running_on_memory_tool_(Runtime::Current()->IsRunningOnMemoryTool()), use_tlab_(use_tlab), main_space_backup_(nullptr), diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc index c2e2a1edd2..6fcad295bb 100644 --- a/runtime/gc/space/image_space.cc +++ b/runtime/gc/space/image_space.cc @@ -384,30 +384,7 @@ ImageSpace* ImageSpace::CreateBootImage(const char* image_location, &has_system, &cache_filename, &dalvik_cache_exists, &has_cache, &is_global_cache); - // If we're starting with the global cache, and we're the zygote, try to see whether there are - // OTA artifacts from the A/B OTA preopting to move over. - // (It is structurally simpler to check this here, instead of complicating the compile/relocate - // logic below.) const bool is_zygote = Runtime::Current()->IsZygote(); - if (is_global_cache && is_zygote) { - VLOG(startup) << "Checking for A/B OTA data."; - TryMoveOTAArtifacts(cache_filename, dalvik_cache_exists); - - // Retry. There are two cases where the old info is outdated: - // * There wasn't a boot image before (e.g., some failure on boot), but now the OTA preopted - // image has been moved in-place. - // * There was a boot image before, and we tried to move the OTA preopted image, but a failure - // happened and there is no file anymore. - found_image = FindImageFilename(image_location, - image_isa, - &system_filename, - &has_system, - &cache_filename, - &dalvik_cache_exists, - &has_cache, - &is_global_cache); - } - if (is_zygote && !secondary_image) { MarkZygoteStart(image_isa, Runtime::Current()->GetZygoteMaxFailedBoots()); } @@ -529,6 +506,17 @@ ImageSpace* ImageSpace::CreateBootImage(const char* image_location, error_msg); } if (space != nullptr) { + // Check whether there is enough space left over in the data partition. Even if we can load + // the image, we need to be conservative, as some parts of the platform are not very tolerant + // of space constraints. + // ImageSpace doesn't know about the data partition per se, it relies on the FindImageFilename + // helper (which relies on GetDalvikCache). So for now, if we load an image out of /system, + // ignore the check (as it would test for free space in /system instead). + if (!is_system && !CheckSpace(*image_filename, error_msg)) { + // No. Delete the generated image and try to run out of the dex files. + PruneDalvikCache(image_isa); + return nullptr; + } return space; } diff --git a/runtime/gc/space/image_space_fs.h b/runtime/gc/space/image_space_fs.h index 8e852fa54b..fa941c0376 100644 --- a/runtime/gc/space/image_space_fs.h +++ b/runtime/gc/space/image_space_fs.h @@ -79,115 +79,6 @@ static void DeleteDirectoryContents(const std::string& dir, bool recurse) { CHECK_EQ(0, closedir(c_dir)) << "Unable to close directory."; } -static bool HasContent(const char* dir) { - if (!OS::DirectoryExists(dir)) { - return false; - } - DIR* c_dir = opendir(dir); - if (c_dir == nullptr) { - PLOG(WARNING) << "Unable to open " << dir << " to delete it if empty"; - return false; - } - - for (struct dirent* de = readdir(c_dir); de != nullptr; de = readdir(c_dir)) { - const char* name = de->d_name; - if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) { - continue; - } - // Something here. - CHECK_EQ(0, closedir(c_dir)) << "Unable to close directory."; - return true; - } - CHECK_EQ(0, closedir(c_dir)) << "Unable to close directory."; - return false; -} - -// Delete this directory, if empty. Then repeat with the parents. Skips non-existing directories. -// If stop_at isn't null, the recursion will stop when a directory with the given name is found. -static void DeleteEmptyDirectoriesUpTo(const std::string& dir, const char* stop_at) { - if (HasContent(dir.c_str())) { - return; - } - if (stop_at != nullptr) { - // This check isn't precise, but good enough in practice. - if (EndsWith(dir, stop_at)) { - return; - } - } - if (OS::DirectoryExists(dir.c_str())) { - if (rmdir(dir.c_str()) != 0) { - PLOG(ERROR) << "Unable to rmdir " << dir; - return; - } - } - size_t last_slash = dir.rfind('/'); - if (last_slash != std::string::npos) { - DeleteEmptyDirectoriesUpTo(dir.substr(0, last_slash), stop_at); - } -} - -static void MoveOTAArtifacts(const char* src, const char* trg) { - DCHECK(OS::DirectoryExists(src)); - DCHECK(OS::DirectoryExists(trg)); - - if (HasContent(trg)) { - LOG(WARNING) << "We do not support merging caches, but the target isn't empty: " << src - << " to " << trg; - return; - } - - if (rename(src, trg) != 0) { - PLOG(ERROR) << "Could not rename OTA cache " << src << " to target " << trg; - } -} - -// This is some dlopen/dlsym and hardcoded data to avoid a dependency on libselinux. Make sure -// this stays in sync! -static bool RelabelOTAFiles(const std::string& dalvik_cache_dir) { - // We only expect selinux on devices. Don't even attempt this on the host. - if (!kIsTargetBuild) { - return true; - } - - // Custom deleter, so we can use std::unique_ptr. - struct HandleDeleter { - void operator()(void* in) { - if (in != nullptr && dlclose(in) != 0) { - PLOG(ERROR) << "Could not close selinux handle."; - } - } - }; - - // Look for selinux library. - std::unique_ptr<void, HandleDeleter> selinux_handle(dlopen("libselinux.so", RTLD_NOW)); - if (selinux_handle == nullptr) { - // Assume everything's OK if we can't open the library. - return true; - } - dlerror(); // Clean dlerror string. - - void* restorecon_ptr = dlsym(selinux_handle.get(), "selinux_android_restorecon"); - if (restorecon_ptr == nullptr) { - // Can't find the relabel function. That's bad. Make sure the zygote fails, as we have no - // other recourse to make this error obvious. - const char* error_string = dlerror(); - LOG(FATAL) << "Could not find selinux restorecon function: " - << ((error_string != nullptr) ? error_string : "(unknown error)"); - UNREACHABLE(); - } - - using RestoreconFn = int (*)(const char*, unsigned int); - constexpr unsigned int kRecursive = 4U; - - RestoreconFn restorecon_fn = reinterpret_cast<RestoreconFn>(restorecon_ptr); - if (restorecon_fn(dalvik_cache_dir.c_str(), kRecursive) != 0) { - LOG(ERROR) << "Failed to restorecon " << dalvik_cache_dir; - return false; - } - - return true; -} - } // namespace impl @@ -226,8 +117,21 @@ static void MarkZygoteStart(const InstructionSet isa, const uint32_t max_failed_ file.reset(OS::CreateEmptyFile(file_name)); if (file.get() == nullptr) { + int saved_errno = errno; PLOG(WARNING) << "Failed to create boot marker."; - return; + if (saved_errno != ENOSPC) { + return; + } + + LOG(WARNING) << "Pruning dalvik cache because of low-memory situation."; + impl::DeleteDirectoryContents(isa_subdir, false); + + // Try once more. + file.reset(OS::OpenFileReadWrite(file_name)); + if (file == nullptr) { + PLOG(WARNING) << "Failed to create boot marker."; + return; + } } } else { if (!file->ReadFully(&num_failed_boots, sizeof(num_failed_boots))) { @@ -262,53 +166,6 @@ static void MarkZygoteStart(const InstructionSet isa, const uint32_t max_failed_ } } -static void TryMoveOTAArtifacts(const std::string& cache_filename, bool dalvik_cache_exists) { - // We really assume here global means /data/dalvik-cache, and we'll inject 'ota.' Make sure - // that's true. - CHECK(StartsWith(cache_filename, "/data/dalvik-cache")) << cache_filename; - - // Inject ota subdirectory. - std::string ota_filename(cache_filename); - ota_filename = ota_filename.insert(strlen("/data/"), "ota/"); - CHECK(StartsWith(ota_filename, "/data/ota/dalvik-cache")) << ota_filename; - - // See if the file exists. - if (OS::FileExists(ota_filename.c_str())) { - VLOG(startup) << "OTA directory does exist, checking for artifacts"; - - size_t last_slash = ota_filename.rfind('/'); - CHECK_NE(last_slash, std::string::npos); - std::string ota_source_dir = ota_filename.substr(0, last_slash); - - // We need the dalvik cache now, really. - if (dalvik_cache_exists) { - size_t last_cache_slash = cache_filename.rfind('/'); - DCHECK_NE(last_cache_slash, std::string::npos); - std::string dalvik_cache_target_dir = cache_filename.substr(0, last_cache_slash); - - // First clean the target cache. - impl::DeleteDirectoryContents(dalvik_cache_target_dir.c_str(), false); - - // Now move things over. - impl::MoveOTAArtifacts(ota_source_dir.c_str(), dalvik_cache_target_dir.c_str()); - - // Last step: ensure the files have the right selinux label. - if (!impl::RelabelOTAFiles(dalvik_cache_target_dir)) { - // This isn't good. We potentially moved files, but they have the wrong label. Delete the - // files. - LOG(WARNING) << "Could not relabel files, must delete dalvik-cache."; - impl::DeleteDirectoryContents(dalvik_cache_target_dir.c_str(), false); - } - } - - // Cleanup. - impl::DeleteDirectoryContents(ota_source_dir.c_str(), true); - impl::DeleteEmptyDirectoriesUpTo(ota_source_dir, "ota"); - } else { - VLOG(startup) << "No OTA directory."; - } -} - } // namespace space } // namespace gc } // namespace art diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h index 96924722d8..716c23d1b0 100644 --- a/runtime/generated/asm_support_gen.h +++ b/runtime/generated/asm_support_gen.h @@ -98,6 +98,8 @@ DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_MARK_BIT_MASK_SHIFTED), (static_ DEFINE_CHECK_EQ(static_cast<size_t>(OBJECT_ALIGNMENT_MASK), (static_cast<size_t>(art::kObjectAlignment - 1))) #define OBJECT_ALIGNMENT_MASK_TOGGLED 0xfffffff8 DEFINE_CHECK_EQ(static_cast<uint32_t>(OBJECT_ALIGNMENT_MASK_TOGGLED), (static_cast<uint32_t>(~static_cast<uint32_t>(art::kObjectAlignment - 1)))) +#define OBJECT_ALIGNMENT_MASK_TOGGLED64 0xfffffffffffffff8 +DEFINE_CHECK_EQ(static_cast<uint64_t>(OBJECT_ALIGNMENT_MASK_TOGGLED64), (static_cast<uint64_t>(~static_cast<uint64_t>(art::kObjectAlignment - 1)))) #define ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE 128 DEFINE_CHECK_EQ(static_cast<int32_t>(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), (static_cast<int32_t>((art::gc::allocator::RosAlloc::kMaxThreadLocalBracketSize)))) #define ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT 3 diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc index c644cde5db..2401bec9f3 100644 --- a/runtime/java_vm_ext.cc +++ b/runtime/java_vm_ext.cc @@ -48,7 +48,7 @@ static size_t gGlobalsMax = 51200; // Arbitrary sanity check. (Must fit in 16 b static const size_t kWeakGlobalsInitial = 16; // Arbitrary. static const size_t kWeakGlobalsMax = 51200; // Arbitrary sanity check. (Must fit in 16 bits.) -static bool IsBadJniVersion(int version) { +bool JavaVMExt::IsBadJniVersion(int version) { // We don't support JNI_VERSION_1_1. These are the only other valid versions. return version != JNI_VERSION_1_2 && version != JNI_VERSION_1_4 && version != JNI_VERSION_1_6; } @@ -344,13 +344,6 @@ class JII { } static jint GetEnv(JavaVM* vm, void** env, jint version) { - // GetEnv always returns a JNIEnv* for the most current supported JNI version, - // and unlike other calls that take a JNI version doesn't care if you supply - // JNI_VERSION_1_1, which we don't otherwise support. - if (IsBadJniVersion(version) && version != JNI_VERSION_1_1) { - LOG(ERROR) << "Bad JNI version passed to GetEnv: " << version; - return JNI_EVERSION; - } if (vm == nullptr || env == nullptr) { return JNI_ERR; } @@ -359,8 +352,8 @@ class JII { *env = nullptr; return JNI_EDETACHED; } - *env = thread->GetJniEnv(); - return JNI_OK; + JavaVMExt* raw_vm = reinterpret_cast<JavaVMExt*>(vm); + return raw_vm->HandleGetEnv(env, version); } private: @@ -388,7 +381,7 @@ class JII { const char* thread_name = nullptr; jobject thread_group = nullptr; if (args != nullptr) { - if (IsBadJniVersion(args->version)) { + if (JavaVMExt::IsBadJniVersion(args->version)) { LOG(ERROR) << "Bad JNI version passed to " << (as_daemon ? "AttachCurrentThreadAsDaemon" : "AttachCurrentThread") << ": " << args->version; @@ -436,7 +429,8 @@ JavaVMExt::JavaVMExt(Runtime* runtime, const RuntimeArgumentMap& runtime_options weak_globals_lock_("JNI weak global reference table lock", kJniWeakGlobalsLock), weak_globals_(kWeakGlobalsInitial, kWeakGlobalsMax, kWeakGlobal), allow_accessing_weak_globals_(true), - weak_globals_add_condition_("weak globals add condition", weak_globals_lock_) { + weak_globals_add_condition_("weak globals add condition", weak_globals_lock_), + env_hooks_() { functions = unchecked_functions_; SetCheckJniEnabled(runtime_options.Exists(RuntimeArgumentMap::CheckJni)); } @@ -444,6 +438,26 @@ JavaVMExt::JavaVMExt(Runtime* runtime, const RuntimeArgumentMap& runtime_options JavaVMExt::~JavaVMExt() { } +jint JavaVMExt::HandleGetEnv(/*out*/void** env, jint version) { + for (GetEnvHook hook : env_hooks_) { + jint res = hook(this, env, version); + if (res == JNI_OK) { + return JNI_OK; + } else if (res != JNI_EVERSION) { + LOG(ERROR) << "Error returned from a plugin GetEnv handler! " << res; + return res; + } + } + LOG(ERROR) << "Bad JNI version passed to GetEnv: " << version; + return JNI_EVERSION; +} + +// Add a hook to handle getting environments from the GetEnv call. +void JavaVMExt::AddEnvironmentHook(GetEnvHook hook) { + CHECK(hook != nullptr) << "environment hooks shouldn't be null!"; + env_hooks_.push_back(hook); +} + void JavaVMExt::JniAbort(const char* jni_function_name, const char* msg) { Thread* self = Thread::Current(); ScopedObjectAccess soa(self); @@ -866,7 +880,7 @@ bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, if (version == JNI_ERR) { StringAppendF(error_msg, "JNI_ERR returned from JNI_OnLoad in \"%s\"", path.c_str()); - } else if (IsBadJniVersion(version)) { + } else if (JavaVMExt::IsBadJniVersion(version)) { StringAppendF(error_msg, "Bad JNI version returned from JNI_OnLoad in \"%s\": %d", path.c_str(), version); // It's unwise to call dlclose() here, but we can mark it @@ -939,7 +953,7 @@ void JavaVMExt::VisitRoots(RootVisitor* visitor) { extern "C" jint JNI_CreateJavaVM(JavaVM** p_vm, JNIEnv** p_env, void* vm_args) { ScopedTrace trace(__FUNCTION__); const JavaVMInitArgs* args = static_cast<JavaVMInitArgs*>(vm_args); - if (IsBadJniVersion(args->version)) { + if (JavaVMExt::IsBadJniVersion(args->version)) { LOG(ERROR) << "Bad JNI version passed to CreateJavaVM: " << args->version; return JNI_EVERSION; } diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h index 3d055cd7ce..ed9d3abfe2 100644 --- a/runtime/java_vm_ext.h +++ b/runtime/java_vm_ext.h @@ -36,6 +36,10 @@ class ParsedOptions; class Runtime; struct RuntimeArgumentMap; +class JavaVMExt; +// Hook definition for runtime plugins. +using GetEnvHook = jint (*)(JavaVMExt* vm, /*out*/void** new_env, jint version); + class JavaVMExt : public JavaVM { public: JavaVMExt(Runtime* runtime, const RuntimeArgumentMap& runtime_options); @@ -171,6 +175,12 @@ class JavaVMExt : public JavaVM { void TrimGlobals() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!globals_lock_); + jint HandleGetEnv(/*out*/void** env, jint version); + + void AddEnvironmentHook(GetEnvHook hook); + + static bool IsBadJniVersion(int version); + private: // Return true if self can currently access weak globals. bool MayAccessWeakGlobalsUnlocked(Thread* self) const SHARED_REQUIRES(Locks::mutator_lock_); @@ -215,6 +225,9 @@ class JavaVMExt : public JavaVM { Atomic<bool> allow_accessing_weak_globals_; ConditionVariable weak_globals_add_condition_ GUARDED_BY(weak_globals_lock_); + // TODO Maybe move this to Runtime. + std::vector<GetEnvHook> env_hooks_; + DISALLOW_COPY_AND_ASSIGN(JavaVMExt); }; diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc index 5a469e51b4..b35c958b0b 100644 --- a/runtime/jit/profile_saver.cc +++ b/runtime/jit/profile_saver.cc @@ -176,14 +176,13 @@ void ProfileSaver::NotifyJitActivityInternal() { MutexLock wait_mutex(Thread::Current(), wait_lock_); if ((NanoTime() - last_time_ns_saver_woke_up_) > MsToNs(options_.GetMinSavePeriodMs())) { WakeUpSaver(); + } else if (jit_activity_notifications_ > options_.GetMaxNotificationBeforeWake()) { + // Make sure to wake up the saver if we see a spike in the number of notifications. + // This is a precaution to avoid losing a big number of methods in case + // this is a spike with no jit after. + total_number_of_hot_spikes_++; + WakeUpSaver(); } - } else if (jit_activity_notifications_ > options_.GetMaxNotificationBeforeWake()) { - // Make sure to wake up the saver if we see a spike in the number of notifications. - // This is a precaution to avoid "loosing" a big number of methods in case - // this is a spike with no jit after. - total_number_of_hot_spikes_++; - MutexLock wait_mutex(Thread::Current(), wait_lock_); - WakeUpSaver(); } } diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc index 1ee1611ef7..40efc898b8 100644 --- a/runtime/jni_env_ext.cc +++ b/runtime/jni_env_ext.cc @@ -45,6 +45,20 @@ static bool CheckLocalsValid(JNIEnvExt* in) NO_THREAD_SAFETY_ANALYSIS { return in->locals.IsValid(); } +jint JNIEnvExt::GetEnvHandler(JavaVMExt* vm, /*out*/void** env, jint version) { + UNUSED(vm); + // GetEnv always returns a JNIEnv* for the most current supported JNI version, + // and unlike other calls that take a JNI version doesn't care if you supply + // JNI_VERSION_1_1, which we don't otherwise support. + if (JavaVMExt::IsBadJniVersion(version) && version != JNI_VERSION_1_1) { + return JNI_EVERSION; + } + Thread* thread = Thread::Current(); + CHECK(thread != nullptr); + *env = thread->GetJniEnv(); + return JNI_OK; +} + JNIEnvExt* JNIEnvExt::Create(Thread* self_in, JavaVMExt* vm_in) { std::unique_ptr<JNIEnvExt> ret(new JNIEnvExt(self_in, vm_in)); if (CheckLocalsValid(ret.get())) { diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h index d4accc342b..ac287d488a 100644 --- a/runtime/jni_env_ext.h +++ b/runtime/jni_env_ext.h @@ -54,6 +54,8 @@ struct JNIEnvExt : public JNIEnv { static Offset LocalRefCookieOffset(size_t pointer_size); static Offset SelfOffset(size_t pointer_size); + static jint GetEnvHandler(JavaVMExt* vm, /*out*/void** out, jint version); + jobject NewLocalRef(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_); void DeleteLocalRef(jobject obj) SHARED_REQUIRES(Locks::mutator_lock_); diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc index c7e4f8b343..174da79030 100644 --- a/runtime/parsed_options.cc +++ b/runtime/parsed_options.cc @@ -23,6 +23,7 @@ #include "gc/heap.h" #include "monitor.h" #include "runtime.h" +#include "ti/agent.h" #include "trace.h" #include "utils.h" @@ -90,6 +91,13 @@ std::unique_ptr<RuntimeParser> ParsedOptions::MakeParser(bool ignore_unrecognize .Define({"-Xrunjdwp:_", "-agentlib:jdwp=_"}) .WithType<JDWP::JdwpOptions>() .IntoKey(M::JdwpOptions) + // TODO Re-enable -agentlib: once I have a good way to transform the values. + // .Define("-agentlib:_") + // .WithType<std::vector<ti::Agent>>().AppendValues() + // .IntoKey(M::AgentLib) + .Define("-agentpath:_") + .WithType<std::vector<ti::Agent>>().AppendValues() + .IntoKey(M::AgentPath) .Define("-Xms_") .WithType<MemoryKiB>() .IntoKey(M::MemoryInitialSize) @@ -289,6 +297,9 @@ std::unique_ptr<RuntimeParser> ParsedOptions::MakeParser(bool ignore_unrecognize .IntoKey(M::Experimental) .Define("-Xforce-nb-testing") .IntoKey(M::ForceNativeBridge) + .Define("-Xplugin:_") + .WithType<std::vector<Plugin>>().AppendValues() + .IntoKey(M::Plugins) .Ignore({ "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa", "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_", @@ -583,6 +594,42 @@ bool ParsedOptions::DoParse(const RuntimeOptions& options, args.Set(M::HeapGrowthLimit, args.GetOrDefault(M::MemoryMaximumSize)); } + if (args.GetOrDefault(M::Experimental) & ExperimentalFlags::kRuntimePlugins) { + LOG(WARNING) << "Experimental runtime plugin support has been enabled. No guarantees are made " + << "about stability or usage of this plugin support. Use at your own risk. Do " + << "not attempt to write shipping code that relies on the implementation of " + << "runtime plugins."; + } else if (!args.GetOrDefault(M::Plugins).empty()) { + LOG(WARNING) << "Experimental runtime plugin support has not been enabled. Ignored options: "; + for (auto& op : args.GetOrDefault(M::Plugins)) { + LOG(WARNING) << " -plugin:" << op.GetLibrary(); + } + } + + if (args.GetOrDefault(M::Experimental) & ExperimentalFlags::kAgents) { + LOG(WARNING) << "Experimental runtime agent support has been enabled. No guarantees are made " + << "the completeness, accuracy, reliability, or stability of the agent " + << "implementation. Use at your own risk. Do not attempt to write shipping code " + << "that relies on the implementation of any part of this api."; + } else if (!args.GetOrDefault(M::AgentLib).empty() || !args.GetOrDefault(M::AgentPath).empty()) { + LOG(WARNING) << "agent support has not been enabled. Enable experimental agent " + << " support with '-XExperimental:agent'. Ignored options are:"; + for (auto op : args.GetOrDefault(M::AgentLib)) { + if (op.HasArgs()) { + LOG(WARNING) << " -agentlib:" << op.GetName() << "=" << op.GetArgs(); + } else { + LOG(WARNING) << " -agentlib:" << op.GetName(); + } + } + for (auto op : args.GetOrDefault(M::AgentPath)) { + if (op.HasArgs()) { + LOG(WARNING) << " -agentpath:" << op.GetName() << "=" << op.GetArgs(); + } else { + LOG(WARNING) << " -agentpath:" << op.GetName(); + } + } + } + *runtime_options = std::move(args); return true; } @@ -627,6 +674,11 @@ void ParsedOptions::Usage(const char* fmt, ...) { UsageMessage(stream, " -showversion\n"); UsageMessage(stream, " -help\n"); UsageMessage(stream, " -agentlib:jdwp=options\n"); + // TODO add back in once -agentlib actually does something. + // UsageMessage(stream, " -agentlib:library=options (Experimental feature, " + // "requires -Xexperimental:agent, some features might not be supported)\n"); + UsageMessage(stream, " -agentpath:library_path=options (Experimental feature, " + "requires -Xexperimental:agent, some features might not be supported)\n"); UsageMessage(stream, "\n"); UsageMessage(stream, "The following extended options are supported:\n"); @@ -703,6 +755,12 @@ void ParsedOptions::Usage(const char* fmt, ...) { UsageMessage(stream, " -X[no]image-dex2oat (Whether to create and use a boot image)\n"); UsageMessage(stream, " -Xno-dex-file-fallback " "(Don't fall back to dex files without oat files)\n"); + UsageMessage(stream, " -Xplugin:<library.so> " + "(Load a runtime plugin, requires -Xexperimental:runtime-plugins)\n"); + UsageMessage(stream, " -Xexperimental:runtime-plugins" + "(Enable new and experimental agent support)\n"); + UsageMessage(stream, " -Xexperimental:agents" + "(Enable new and experimental agent support)\n"); UsageMessage(stream, "\n"); UsageMessage(stream, "The following previously supported Dalvik options are ignored:\n"); diff --git a/runtime/plugin.cc b/runtime/plugin.cc new file mode 100644 index 0000000000..481b1caa15 --- /dev/null +++ b/runtime/plugin.cc @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "plugin.h" + +#include <dlfcn.h> +#include "base/stringprintf.h" +#include "base/logging.h" + +namespace art { + +const char* PLUGIN_INITIALIZATION_FUNCTION_NAME = "ArtPlugin_Initialize"; +const char* PLUGIN_DEINITIALIZATION_FUNCTION_NAME = "ArtPlugin_Deinitialize"; + +Plugin::Plugin(const Plugin& other) : library_(other.library_), dlopen_handle_(nullptr) { + if (other.IsLoaded()) { + std::string err; + Load(&err); + } +} + +bool Plugin::Load(/*out*/std::string* error_msg) { + DCHECK(!IsLoaded()); + void* res = dlopen(library_.c_str(), RTLD_LAZY); + if (res == nullptr) { + *error_msg = StringPrintf("dlopen failed: %s", dlerror()); + return false; + } + // Get the initializer function + PluginInitializationFunction init = reinterpret_cast<PluginInitializationFunction>( + dlsym(res, PLUGIN_INITIALIZATION_FUNCTION_NAME)); + if (init != nullptr) { + if (!init()) { + dlclose(res); + *error_msg = StringPrintf("Initialization of plugin failed"); + return false; + } + } else { + LOG(WARNING) << this << " does not include an initialization function"; + } + dlopen_handle_ = res; + return true; +} + +bool Plugin::Unload() { + DCHECK(IsLoaded()); + bool ret = true; + void* handle = dlopen_handle_; + PluginDeinitializationFunction deinit = reinterpret_cast<PluginDeinitializationFunction>( + dlsym(handle, PLUGIN_DEINITIALIZATION_FUNCTION_NAME)); + if (deinit != nullptr) { + if (!deinit()) { + LOG(WARNING) << this << " failed deinitialization"; + ret = false; + } + } else { + LOG(WARNING) << this << " does not include a deinitialization function"; + } + dlopen_handle_ = nullptr; + if (dlclose(handle) != 0) { + LOG(ERROR) << this << " failed to dlclose: " << dlerror(); + ret = false; + } + return ret; +} + +std::ostream& operator<<(std::ostream &os, const Plugin* m) { + return os << *m; +} + +std::ostream& operator<<(std::ostream &os, Plugin const& m) { + return os << "Plugin { library=\"" << m.library_ << "\", handle=" << m.dlopen_handle_ << " }"; +} + +} // namespace art diff --git a/runtime/plugin.h b/runtime/plugin.h new file mode 100644 index 0000000000..18f3977bd5 --- /dev/null +++ b/runtime/plugin.h @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_PLUGIN_H_ +#define ART_RUNTIME_PLUGIN_H_ + +#include <string> +#include "base/logging.h" + +namespace art { + +// This function is loaded from the plugin (if present) and called during runtime initialization. +// By the time this has been called the runtime has been fully initialized but not other native +// libraries have been loaded yet. Failure to initialize is considered a fatal error. +// TODO might want to give initialization function some arguments +using PluginInitializationFunction = bool (*)(); +using PluginDeinitializationFunction = bool (*)(); + +// A class encapsulating a plugin. There is no stable plugin ABI or API and likely never will be. +// TODO Might want to put some locking in this but ATM we only load these at initialization in a +// single-threaded fashion so not much need +class Plugin { + public: + static Plugin Create(std::string lib) { + return Plugin(lib); + } + + bool IsLoaded() const { + return dlopen_handle_ != nullptr; + } + + const std::string& GetLibrary() const { + return library_; + } + + bool Load(/*out*/std::string* error_msg); + bool Unload(); + + + ~Plugin() { + if (IsLoaded() && !Unload()) { + LOG(ERROR) << "Error unloading " << this; + } + } + + Plugin(const Plugin& other); + + // Create move constructor for putting this in a list + Plugin(Plugin&& other) + : library_(other.library_), + dlopen_handle_(other.dlopen_handle_) { + other.dlopen_handle_ = nullptr; + } + + private: + explicit Plugin(std::string library) : library_(library), dlopen_handle_(nullptr) { } + + std::string library_; + void* dlopen_handle_; + + friend std::ostream& operator<<(std::ostream &os, Plugin const& m); +}; + +std::ostream& operator<<(std::ostream &os, Plugin const& m); +std::ostream& operator<<(std::ostream &os, const Plugin* m); + +} // namespace art + +#endif // ART_RUNTIME_PLUGIN_H_ diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 68fa0d32be..ddcfb6d5aa 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -130,6 +130,7 @@ #include "signal_set.h" #include "thread.h" #include "thread_list.h" +#include "ti/agent.h" #include "trace.h" #include "transaction.h" #include "utils.h" @@ -281,6 +282,16 @@ Runtime::~Runtime() { jit_->StopProfileSaver(); } + // TODO Maybe do some locking. + for (auto& agent : agents_) { + agent.Unload(); + } + + // TODO Maybe do some locking + for (auto& plugin : plugins_) { + plugin.Unload(); + } + // Make sure our internal threads are dead before we start tearing down things they're using. Dbg::StopJdwp(); delete signal_catcher_; @@ -960,6 +971,16 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { experimental_flags_ = runtime_options.GetOrDefault(Opt::Experimental); is_low_memory_mode_ = runtime_options.Exists(Opt::LowMemoryMode); + if (experimental_flags_ & ExperimentalFlags::kRuntimePlugins) { + plugins_ = runtime_options.ReleaseOrDefault(Opt::Plugins); + } + if (experimental_flags_ & ExperimentalFlags::kAgents) { + agents_ = runtime_options.ReleaseOrDefault(Opt::AgentPath); + // TODO Add back in -agentlib + // for (auto lib : runtime_options.ReleaseOrDefault(Opt::AgentLib)) { + // agents_.push_back(lib); + // } + } XGcOption xgc_option = runtime_options.GetOrDefault(Opt::GcOption); heap_ = new gc::Heap(runtime_options.GetOrDefault(Opt::MemoryInitialSize), runtime_options.GetOrDefault(Opt::HeapGrowthLimit), @@ -1084,6 +1105,10 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { java_vm_ = new JavaVMExt(this, runtime_options); + // Add the JniEnv handler. + // TODO Refactor this stuff. + java_vm_->AddEnvironmentHook(JNIEnvExt::GetEnvHandler); + Thread::Startup(); // ClassLinker needs an attached thread, but we can't fully attach a thread without creating @@ -1200,6 +1225,16 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { pre_allocated_NoClassDefFoundError_ = GcRoot<mirror::Throwable>(self->GetException()); self->ClearException(); + // Runtime initialization is largely done now. + // We load plugins first since that can modify the runtime state slightly. + // Load all plugins + for (auto& plugin : plugins_) { + std::string err; + if (!plugin.Load(&err)) { + LOG(FATAL) << plugin << " failed to load: " << err; + } + } + // Look for a native bridge. // // The intended flow here is, in the case of a running system: @@ -1232,6 +1267,20 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { is_native_bridge_loaded_ = LoadNativeBridge(native_bridge_file_name); } + // Startup agents + // TODO Maybe we should start a new thread to run these on. Investigate RI behavior more. + for (auto& agent : agents_) { + // TODO Check err + int res = 0; + std::string err = ""; + ti::Agent::LoadError result = agent.Load(&res, &err); + if (result == ti::Agent::kInitializationError) { + LOG(FATAL) << "Unable to initialize agent!"; + } else if (result != ti::Agent::kNoError) { + LOG(ERROR) << "Unable to load an agent: " << err; + } + } + VLOG(startup) << "Runtime::Init exiting"; return true; diff --git a/runtime/runtime.h b/runtime/runtime.h index c971646195..6da60f27a3 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -63,6 +63,9 @@ namespace mirror { class String; class Throwable; } // namespace mirror +namespace ti { + class Agent; +} // namespace ti namespace verifier { class MethodVerifier; enum class VerifyMode : int8_t; @@ -80,6 +83,7 @@ class MonitorList; class MonitorPool; class NullPointerHandler; class OatFileManager; +class Plugin; struct RuntimeArgumentMap; class SignalCatcher; class StackOverflowHandler; @@ -698,6 +702,9 @@ class Runtime { std::string class_path_string_; std::vector<std::string> properties_; + std::vector<ti::Agent> agents_; + std::vector<Plugin> plugins_; + // The default stack size for managed threads created by the runtime. size_t default_stack_size_; diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def index b95dfad550..146afc7ad8 100644 --- a/runtime/runtime_options.def +++ b/runtime/runtime_options.def @@ -117,7 +117,10 @@ RUNTIME_OPTIONS_KEY (unsigned int, ZygoteMaxFailedBoots, 10) RUNTIME_OPTIONS_KEY (Unit, NoDexFileFallback) RUNTIME_OPTIONS_KEY (std::string, CpuAbiList) RUNTIME_OPTIONS_KEY (std::string, Fingerprint) -RUNTIME_OPTIONS_KEY (ExperimentalFlags, Experimental, ExperimentalFlags::kNone) // -Xexperimental:{none} +RUNTIME_OPTIONS_KEY (ExperimentalFlags, Experimental, ExperimentalFlags::kNone) // -Xexperimental:{none, agents} +RUNTIME_OPTIONS_KEY (std::vector<ti::Agent>, AgentLib) // -agentlib:<libname>=<options>, Requires -Xexperimental:agents +RUNTIME_OPTIONS_KEY (std::vector<ti::Agent>, AgentPath) // -agentpath:<libname>=<options>, Requires -Xexperimental:agents +RUNTIME_OPTIONS_KEY (std::vector<Plugin>, Plugins) // -Xplugin:<library> Requires -Xexperimental:runtime-plugins // Not parse-able from command line, but can be provided explicitly. // (Do not add anything here that is defined in ParsedOptions::MakeParser) diff --git a/runtime/simulator/Android.mk b/runtime/simulator/Android.mk index 953a37733d..a34a84100a 100644 --- a/runtime/simulator/Android.mk +++ b/runtime/simulator/Android.mk @@ -88,9 +88,9 @@ define build-libart-simulator LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE) # For simulator_arm64. ifeq ($$(art_ndebug_or_debug),debug) - LOCAL_SHARED_LIBRARIES += libvixl-arm64 + LOCAL_SHARED_LIBRARIES += libvixld-arm64 else - LOCAL_SHARED_LIBRARIES += libvixl-arm64 + LOCAL_SHARED_LIBRARIES += libvixl-arm64 endif ifeq ($$(art_target_or_host),target) include $(BUILD_SHARED_LIBRARY) diff --git a/runtime/simulator/code_simulator_arm64.h b/runtime/simulator/code_simulator_arm64.h index 69388b122c..59ea34fb80 100644 --- a/runtime/simulator/code_simulator_arm64.h +++ b/runtime/simulator/code_simulator_arm64.h @@ -20,10 +20,10 @@ #include "memory" #include "simulator/code_simulator.h" -// TODO: make vixl clean wrt -Wshadow. +// TODO(VIXL): Make VIXL compile with -Wshadow. #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshadow" -#include "a64/simulator-a64.h" +#include "aarch64/simulator-aarch64.h" #pragma GCC diagnostic pop namespace art { diff --git a/runtime/stack_map.h b/runtime/stack_map.h index 4647d67699..dd7e53100f 100644 --- a/runtime/stack_map.h +++ b/runtime/stack_map.h @@ -1050,7 +1050,7 @@ struct CodeInfoEncoding { inline_info_encoding = *reinterpret_cast<const InlineInfoEncoding*>(ptr); ptr += sizeof(InlineInfoEncoding); } else { - inline_info_encoding = InlineInfoEncoding{}; // NOLINT. + inline_info_encoding = InlineInfoEncoding{}; // NOLINT. } header_size = dchecked_integral_cast<uint8_t>(ptr - reinterpret_cast<const uint8_t*>(data)); } diff --git a/runtime/ti/agent.cc b/runtime/ti/agent.cc new file mode 100644 index 0000000000..41a21f70f3 --- /dev/null +++ b/runtime/ti/agent.cc @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "agent.h" +#include "java_vm_ext.h" +#include "runtime.h" + +namespace art { +namespace ti { + +const char* AGENT_ON_LOAD_FUNCTION_NAME = "Agent_OnLoad"; +const char* AGENT_ON_ATTACH_FUNCTION_NAME = "Agent_OnAttach"; +const char* AGENT_ON_UNLOAD_FUNCTION_NAME = "Agent_OnUnload"; + +Agent Agent::Create(std::string arg) { + size_t eq = arg.find_first_of('='); + if (eq == std::string::npos) { + return Agent(arg, ""); + } else { + return Agent(arg.substr(0, eq), arg.substr(eq + 1, arg.length())); + } +} + +// TODO We need to acquire some locks probably. +Agent::LoadError Agent::Load(/*out*/jint* call_res, /*out*/ std::string* error_msg) { + DCHECK(call_res != nullptr); + DCHECK(error_msg != nullptr); + if (IsStarted()) { + *error_msg = StringPrintf("the agent at %s has already been started!", name_.c_str()); + VLOG(agents) << "err: " << *error_msg; + return kAlreadyStarted; + } + LoadError err = DoDlOpen(error_msg); + if (err != kNoError) { + VLOG(agents) << "err: " << *error_msg; + return err; + } + if (onload_ == nullptr) { + *error_msg = StringPrintf("Unable to start agent %s: No Agent_OnLoad function found", + name_.c_str()); + VLOG(agents) << "err: " << *error_msg; + return kLoadingError; + } + // TODO Need to do some checks that we are at a good spot etc. + *call_res = onload_(static_cast<JavaVM*>(Runtime::Current()->GetJavaVM()), + args_.c_str(), + nullptr); + if (*call_res != 0) { + *error_msg = StringPrintf("Initialization of %s returned non-zero value of %d", + name_.c_str(), *call_res); + VLOG(agents) << "err: " << *error_msg; + return kInitializationError; + } else { + return kNoError; + } +} + +Agent::LoadError Agent::DoDlOpen(/*out*/std::string* error_msg) { + DCHECK(error_msg != nullptr); + dlopen_handle_ = dlopen(name_.c_str(), RTLD_LAZY); + if (dlopen_handle_ == nullptr) { + *error_msg = StringPrintf("Unable to dlopen %s: %s", name_.c_str(), dlerror()); + return kLoadingError; + } + + onload_ = reinterpret_cast<AgentOnLoadFunction>(dlsym(dlopen_handle_, + AGENT_ON_LOAD_FUNCTION_NAME)); + if (onload_ == nullptr) { + VLOG(agents) << "Unable to find 'Agent_OnLoad' symbol in " << this; + } + onattach_ = reinterpret_cast<AgentOnAttachFunction>(dlsym(dlopen_handle_, + AGENT_ON_ATTACH_FUNCTION_NAME)); + if (onattach_ == nullptr) { + VLOG(agents) << "Unable to find 'Agent_OnAttach' symbol in " << this; + } + onunload_= reinterpret_cast<AgentOnUnloadFunction>(dlsym(dlopen_handle_, + AGENT_ON_UNLOAD_FUNCTION_NAME)); + if (onunload_ == nullptr) { + VLOG(agents) << "Unable to find 'Agent_OnUnload' symbol in " << this; + } + return kNoError; +} + +// TODO Lock some stuff probably. +void Agent::Unload() { + if (dlopen_handle_ != nullptr) { + if (onunload_ != nullptr) { + onunload_(Runtime::Current()->GetJavaVM()); + } + dlclose(dlopen_handle_); + dlopen_handle_ = nullptr; + } else { + VLOG(agents) << this << " is not currently loaded!"; + } +} + +Agent::Agent(const Agent& other) + : name_(other.name_), + args_(other.args_), + dlopen_handle_(other.dlopen_handle_), + onload_(other.onload_), + onattach_(other.onattach_), + onunload_(other.onunload_) { + if (other.dlopen_handle_ != nullptr) { + dlopen(other.name_.c_str(), 0); + } +} + +Agent::~Agent() { + if (dlopen_handle_ != nullptr) { + dlclose(dlopen_handle_); + } +} + +std::ostream& operator<<(std::ostream &os, const Agent* m) { + return os << *m; +} + +std::ostream& operator<<(std::ostream &os, Agent const& m) { + return os << "Agent { name=\"" << m.name_ << "\", args=\"" << m.args_ << "\", handle=" + << m.dlopen_handle_ << " }"; +} + +} // namespace ti +} // namespace art diff --git a/runtime/ti/agent.h b/runtime/ti/agent.h new file mode 100644 index 0000000000..521e21e4e4 --- /dev/null +++ b/runtime/ti/agent.h @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_TI_AGENT_H_ +#define ART_RUNTIME_TI_AGENT_H_ + +#include <dlfcn.h> +#include <jni.h> // for jint, JavaVM* etc declarations + +#include "base/stringprintf.h" +#include "runtime.h" +#include "utils.h" + +namespace art { +namespace ti { + +using AgentOnLoadFunction = jint (*)(JavaVM*, const char*, void*); +using AgentOnAttachFunction = jint (*)(JavaVM*, const char*, void*); +using AgentOnUnloadFunction = void (*)(JavaVM*); + +class Agent { + public: + enum LoadError { + kNoError, // No error occurred.. + kAlreadyStarted, // The agent has already been loaded. + kLoadingError, // dlopen or dlsym returned an error. + kInitializationError, // The entrypoint did not return 0. This might require an abort. + }; + + bool IsStarted() const { + return dlopen_handle_ != nullptr; + } + + const std::string& GetName() const { + return name_; + } + + const std::string& GetArgs() const { + return args_; + } + + bool HasArgs() const { + return !GetArgs().empty(); + } + + // TODO We need to acquire some locks probably. + LoadError Load(/*out*/jint* call_res, /*out*/std::string* error_msg); + + // TODO We need to acquire some locks probably. + void Unload(); + + // Tries to attach the agent using its OnAttach method. Returns true on success. + // TODO We need to acquire some locks probably. + LoadError Attach(std::string* error_msg) { + // TODO + *error_msg = "Attach has not yet been implemented!"; + return kLoadingError; + } + + static Agent Create(std::string arg); + + static Agent Create(std::string name, std::string args) { + return Agent(name, args); + } + + ~Agent(); + + // We need move constructor and copy for vectors + Agent(const Agent& other); + + Agent(Agent&& other) + : name_(other.name_), + args_(other.args_), + dlopen_handle_(nullptr), + onload_(nullptr), + onattach_(nullptr), + onunload_(nullptr) { + other.dlopen_handle_ = nullptr; + other.onload_ = nullptr; + other.onattach_ = nullptr; + other.onunload_ = nullptr; + } + + // We don't need an operator= + void operator=(const Agent&) = delete; + + private: + Agent(std::string name, std::string args) + : name_(name), + args_(args), + dlopen_handle_(nullptr), + onload_(nullptr), + onattach_(nullptr), + onunload_(nullptr) { } + + LoadError DoDlOpen(/*out*/std::string* error_msg); + + const std::string name_; + const std::string args_; + void* dlopen_handle_; + + // The entrypoints. + AgentOnLoadFunction onload_; + AgentOnAttachFunction onattach_; + AgentOnUnloadFunction onunload_; + + friend std::ostream& operator<<(std::ostream &os, Agent const& m); +}; + +std::ostream& operator<<(std::ostream &os, Agent const& m); +std::ostream& operator<<(std::ostream &os, const Agent* m); + +} // namespace ti +} // namespace art + +#endif // ART_RUNTIME_TI_AGENT_H_ + diff --git a/test/617-clinit-oome/expected.txt b/test/617-clinit-oome/expected.txt new file mode 100644 index 0000000000..c1d33ff9e6 --- /dev/null +++ b/test/617-clinit-oome/expected.txt @@ -0,0 +1 @@ +Filling heap diff --git a/test/617-clinit-oome/info.txt b/test/617-clinit-oome/info.txt new file mode 100644 index 0000000000..ece35b28cd --- /dev/null +++ b/test/617-clinit-oome/info.txt @@ -0,0 +1 @@ +Regression test for encoded static strings caussing OOME b/30690988 diff --git a/test/617-clinit-oome/src/Main.java b/test/617-clinit-oome/src/Main.java new file mode 100644 index 0000000000..749a2325ef --- /dev/null +++ b/test/617-clinit-oome/src/Main.java @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static void main(String[] args) { + Class klass = Other.class; + Object[] data = new Object[100000]; + try { + System.out.println("Filling heap"); + int size = 256 * 1024 * 1024; + int index = 0; + while (true) { + try { + data[index] = new byte[size]; + index++; + } catch (OutOfMemoryError e) { + size /= 2; + if (size == 0) { + break; + } + } + } + // Initialize now that the heap is full. + Other.print(); + } catch (OutOfMemoryError e) { + } catch (Exception e) { + System.err.println(e); + } + } +} diff --git a/test/617-clinit-oome/src/Other.java b/test/617-clinit-oome/src/Other.java new file mode 100644 index 0000000000..20306ee4c4 --- /dev/null +++ b/test/617-clinit-oome/src/Other.java @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public final class Other { + public static final String string1 = "ABCDEFG1"; + public static final String string2 = "ABCDEFG2"; + public static final String string3 = "ABCDEFG3"; + public static final String string4 = "ABCDEFG4"; + public static final String string5 = "ABCDEFG5"; + public static final int int1 = 12; + + public static void print() { + System.out.println(string2); + } +} diff --git a/test/900-hello-plugin/build b/test/900-hello-plugin/build new file mode 100755 index 0000000000..898e2e54a2 --- /dev/null +++ b/test/900-hello-plugin/build @@ -0,0 +1,17 @@ +#!/bin/bash +# +# Copyright 2016 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +./default-build "$@" --experimental agents diff --git a/test/900-hello-plugin/expected.txt b/test/900-hello-plugin/expected.txt new file mode 100644 index 0000000000..43db31c722 --- /dev/null +++ b/test/900-hello-plugin/expected.txt @@ -0,0 +1,8 @@ +ArtPlugin_Initialize called in test 900 +Agent_OnLoad called with options "test_900" +GetEnvHandler called in test 900 +GetEnvHandler called with version 0x900fffff +GetEnv returned '900' environment! +Hello, world! +Agent_OnUnload called +ArtPlugin_Deinitialize called in test 900 diff --git a/test/900-hello-plugin/info.txt b/test/900-hello-plugin/info.txt new file mode 100644 index 0000000000..47b15c2e6a --- /dev/null +++ b/test/900-hello-plugin/info.txt @@ -0,0 +1,2 @@ +Tests that agents and plugins are loaded. + diff --git a/test/900-hello-plugin/load_unload.cc b/test/900-hello-plugin/load_unload.cc new file mode 100644 index 0000000000..a38cc3d6ac --- /dev/null +++ b/test/900-hello-plugin/load_unload.cc @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <jni.h> +#include <stdio.h> + +#include "art_method-inl.h" +#include "base/logging.h" +#include "base/macros.h" + +namespace art { + +constexpr jint TEST_900_ENV_VERSION_NUMBER = 0x900FFFFF; +constexpr uintptr_t ENV_VALUE = 900; + +// Allow this library to be used as a plugin too so we can test the stack. +static jint GetEnvHandler(JavaVMExt* vm ATTRIBUTE_UNUSED, void** new_env, jint version) { + printf("%s called in test 900\n", __func__); + if (version != TEST_900_ENV_VERSION_NUMBER) { + return JNI_EVERSION; + } + printf("GetEnvHandler called with version 0x%x\n", version); + *new_env = reinterpret_cast<void*>(ENV_VALUE); + return JNI_OK; +} + +extern "C" bool ArtPlugin_Initialize() { + printf("%s called in test 900\n", __func__); + Runtime::Current()->GetJavaVM()->AddEnvironmentHook(GetEnvHandler); + return true; +} + +extern "C" bool ArtPlugin_Deinitialize() { + printf("%s called in test 900\n", __func__); + return true; +} + +extern "C" JNIEXPORT jint JNICALL Agent_OnLoad(JavaVM* vm, + char* options, + void* reserved ATTRIBUTE_UNUSED) { + printf("Agent_OnLoad called with options \"%s\"\n", options); + uintptr_t env = 0; + jint res = vm->GetEnv(reinterpret_cast<void**>(&env), TEST_900_ENV_VERSION_NUMBER); + if (res != JNI_OK) { + printf("GetEnv(TEST_900_ENV_VERSION_NUMBER) returned non-zero\n"); + } + printf("GetEnv returned '%" PRIdPTR "' environment!\n", env); + return 0; +} + +extern "C" JNIEXPORT void JNICALL Agent_OnUnload(JavaVM* vm ATTRIBUTE_UNUSED) { + printf("Agent_OnUnload called\n"); +} + +} // namespace art diff --git a/test/900-hello-plugin/run b/test/900-hello-plugin/run new file mode 100755 index 0000000000..35b08715a1 --- /dev/null +++ b/test/900-hello-plugin/run @@ -0,0 +1,24 @@ +#!/bin/bash +# +# Copyright 2016 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +plugin=libartagentd.so +if [[ "$@" == *"-O"* ]]; then + plugin=libartagent.so +fi +./default-run "$@" --experimental agents \ + --experimental runtime-plugins \ + --runtime-option -agentpath:${plugin}=test_900 \ + --android-runtime-option -Xplugin:${plugin} diff --git a/test/900-hello-plugin/src/Main.java b/test/900-hello-plugin/src/Main.java new file mode 100644 index 0000000000..1ef6289559 --- /dev/null +++ b/test/900-hello-plugin/src/Main.java @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static void main(String[] args) { + System.out.println("Hello, world!"); + } +} diff --git a/test/Android.libartagent.mk b/test/Android.libartagent.mk new file mode 100644 index 0000000000..729de3f7ae --- /dev/null +++ b/test/Android.libartagent.mk @@ -0,0 +1,101 @@ +# +# Copyright (C) 2016 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +LOCAL_PATH := $(call my-dir) + +include art/build/Android.common_build.mk + +LIBARTAGENT_COMMON_SRC_FILES := \ + 900-hello-plugin/load_unload.cc + +# $(1): target or host +# $(2): debug or <empty> +define build-libartagent + ifneq ($(1),target) + ifneq ($(1),host) + $$(error expected target or host for argument 1, received $(1)) + endif + endif + ifneq ($(2),debug) + ifneq ($(2),) + $$(error d or empty for argument 2, received $(2)) + endif + suffix := d + else + suffix := + endif + + art_target_or_host := $(1) + + include $(CLEAR_VARS) + LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION) + LOCAL_MODULE := libartagent$$(suffix) + ifeq ($$(art_target_or_host),target) + LOCAL_MODULE_TAGS := tests + endif + LOCAL_SRC_FILES := $(LIBARTAGENT_COMMON_SRC_FILES) + LOCAL_SHARED_LIBRARIES += libart$$(suffix) libbacktrace libnativehelper + LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime + LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk + LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.libartagent.mk + ifeq ($$(art_target_or_host),target) + $(call set-target-local-clang-vars) + ifeq ($$(suffix),d) + $(call set-target-local-cflags-vars,debug) + else + $(call set-target-local-cflags-vars,ndebug) + endif + LOCAL_SHARED_LIBRARIES += libdl + LOCAL_MULTILIB := both + LOCAL_MODULE_PATH_32 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_32) + LOCAL_MODULE_PATH_64 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_64) + LOCAL_MODULE_TARGET_ARCH := $(ART_SUPPORTED_ARCH) + include $(BUILD_SHARED_LIBRARY) + else # host + LOCAL_CLANG := $(ART_HOST_CLANG) + LOCAL_CFLAGS := $(ART_HOST_CFLAGS) + LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS) + ifeq ($$(suffix),d) + LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS) + LOCAL_ASFLAGS += $(ART_HOST_DEBUG_ASFLAGS) + else + LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS) + LOCAL_ASFLAGS += $(ART_HOST_NON_DEBUG_ASFLAGS) + endif + LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread + LOCAL_IS_HOST_MODULE := true + LOCAL_MULTILIB := both + include $(BUILD_HOST_SHARED_LIBRARY) + endif + + # Clear locally used variables. + art_target_or_host := + suffix := +endef + +ifeq ($(ART_BUILD_TARGET),true) + $(eval $(call build-libartagent,target,)) + $(eval $(call build-libartagent,target,debug)) +endif +ifeq ($(ART_BUILD_HOST),true) + $(eval $(call build-libartagent,host,)) + $(eval $(call build-libartagent,host,debug)) +endif + +# Clear locally used variables. +LOCAL_PATH := +LIBARTAGENT_COMMON_SRC_FILES := diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index bba6f8e721..b87e142811 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -555,10 +555,13 @@ TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS := # Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT). # 484: Baker's fast path based read barrier compiler instrumentation generates code containing # more parallel moves on x86, thus some Checker assertions may fail. +# 527: On ARM64 and ARM, the read barrier instrumentation does not support the HIntermediateAddress +# instruction yet (b/26601270). # 537: Expects an array copy to be intrinsified on x86-64, but calling-on-slowpath intrinsics are # not yet handled in the read barrier configuration. TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \ 484-checker-register-hints \ + 527-checker-array-access-split \ 537-checker-arraycopy # Tests that should fail in the read barrier configuration with JIT (Optimizing compiler). @@ -653,6 +656,14 @@ $(foreach target, $(TARGET_TYPES), \ # only once). TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_EXECUTABLES) $(TARGET_CORE_IMG_OUTS) +# Also need libartagent. +TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libartagent.so +TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libartagentd.so +ifdef TARGET_2ND_ARCH +TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_2ND_ARCH)/libartagent.so +TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_2ND_ARCH)/libartagentd.so +endif + # Also need libarttest. TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so @@ -671,6 +682,8 @@ endif # specific version depending on the compiler. ART_TEST_HOST_RUN_TEST_DEPENDENCIES := \ $(ART_HOST_EXECUTABLES) \ + $(ART_HOST_OUT_SHARED_LIBRARIES)/libartagent$(ART_HOST_SHLIB_EXTENSION) \ + $(ART_HOST_OUT_SHARED_LIBRARIES)/libartagentd$(ART_HOST_SHLIB_EXTENSION) \ $(ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \ $(ART_HOST_OUT_SHARED_LIBRARIES)/libarttestd$(ART_HOST_SHLIB_EXTENSION) \ $(ART_HOST_OUT_SHARED_LIBRARIES)/libnativebridgetest$(ART_HOST_SHLIB_EXTENSION) \ @@ -680,6 +693,8 @@ ART_TEST_HOST_RUN_TEST_DEPENDENCIES := \ ifneq ($(HOST_PREFER_32_BIT),true) ART_TEST_HOST_RUN_TEST_DEPENDENCIES += \ + $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libartagent$(ART_HOST_SHLIB_EXTENSION) \ + $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libartagentd$(ART_HOST_SHLIB_EXTENSION) \ $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libarttest$(ART_HOST_SHLIB_EXTENSION) \ $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libarttestd$(ART_HOST_SHLIB_EXTENSION) \ $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libnativebridgetest$(ART_HOST_SHLIB_EXTENSION) \ @@ -1092,5 +1107,9 @@ ALL_ADDRESS_SIZES := RUN_TYPES := DEBUGGABLE_TYPES := -include $(LOCAL_PATH)/Android.libarttest.mk -include art/test/Android.libnativebridgetest.mk +MY_LOCAL_PATH := $(LOCAL_PATH) +include $(MY_LOCAL_PATH)/Android.libartagent.mk +include $(MY_LOCAL_PATH)/Android.libarttest.mk +include $(MY_LOCAL_PATH)/Android.libnativebridgetest.mk +MY_LOCAL_PATH := +LOCAL_PATH := diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar index c6c9380412..d12bd79b3a 100755 --- a/test/etc/run-test-jar +++ b/test/etc/run-test-jar @@ -21,6 +21,7 @@ DEX2OAT="" EXPERIMENTAL="" FALSE_BIN="/system/bin/false" FLAGS="" +ANDROID_FLAGS="" GDB="" GDB_ARGS="" GDB_SERVER="gdbserver" @@ -59,6 +60,9 @@ while true; do if [ "x$1" = "x--quiet" ]; then QUIET="y" shift + elif [ "x$1" = "x-O" ]; then + # Ignore this option. + shift elif [ "x$1" = "x--lib" ]; then shift if [ "x$1" = "x" ]; then @@ -93,6 +97,11 @@ while true; do FLAGS="${FLAGS} -Xcompiler-option $option" COMPILE_FLAGS="${COMPILE_FLAGS} $option" shift + elif [ "x$1" = "x--android-runtime-option" ]; then + shift + option="$1" + ANDROID_FLAGS="${ANDROID_FLAGS} $option" + shift elif [ "x$1" = "x--runtime-option" ]; then shift option="$1" @@ -233,6 +242,7 @@ while true; do done if [ "$USE_JVM" = "n" ]; then + FLAGS="${FLAGS} ${ANDROID_FLAGS}" for feature in ${EXPERIMENTAL}; do FLAGS="${FLAGS} -Xexperimental:${feature} -Xcompiler-option --runtime-arg -Xcompiler-option -Xexperimental:${feature}" COMPILE_FLAGS="${COMPILE_FLAGS} --runtime-arg -Xexperimental:${feature}" @@ -469,12 +479,12 @@ if [ "$HOST" = "n" ]; then adb push $TEST_NAME-ex.jar $DEX_LOCATION >/dev/null 2>&1 fi - LD_LIBRARY_PATH= + LD_LIBRARY_PATH=/data/art-test/$ISA if [ "$ANDROID_ROOT" != "/system" ]; then # Current default installation is dalvikvm 64bits and dex2oat 32bits, # so we can only use LD_LIBRARY_PATH when testing on a local # installation. - LD_LIBRARY_PATH=$ANDROID_ROOT/$LIBRARY_DIRECTORY + LD_LIBRARY_PATH=$ANDROID_ROOT/$LIBRARY_DIRECTORY:$LD_LIBRARY_PATH fi PUBLIC_LIBS=libart.so:libartd.so diff --git a/test/run-test b/test/run-test index edee4ae31f..621fc24a26 100755 --- a/test/run-test +++ b/test/run-test @@ -165,6 +165,7 @@ while true; do elif [ "x$1" = "x-O" ]; then lib="libart.so" testlib="arttest" + run_args="${run_args} -O" shift elif [ "x$1" = "x--dalvik" ]; then lib="libdvm.so" diff --git a/tools/cpp-define-generator/constant_globals.def b/tools/cpp-define-generator/constant_globals.def index 1e24d64dda..a3ccc72bb6 100644 --- a/tools/cpp-define-generator/constant_globals.def +++ b/tools/cpp-define-generator/constant_globals.def @@ -25,6 +25,7 @@ DEFINE_OBJECT_EXPR(ALIGNMENT_MASK, size_t, art::kObjectAlignment - 1) DEFINE_OBJECT_EXPR(ALIGNMENT_MASK_TOGGLED, uint32_t, ~static_cast<uint32_t>(art::kObjectAlignment - 1)) +DEFINE_OBJECT_EXPR(ALIGNMENT_MASK_TOGGLED64, uint64_t, ~static_cast<uint64_t>(art::kObjectAlignment - 1)) #undef DEFINE_OBJECT_EXPR |