diff options
Diffstat (limited to 'compiler')
69 files changed, 3597 insertions, 1861 deletions
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index b5fd1e074f..afc8463878 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -168,6 +168,12 @@ std::unordered_set<std::string>* CommonCompilerTest::GetCompiledMethods() { return nullptr; } +// Get ProfileCompilationInfo that should be passed to the driver. +ProfileCompilationInfo* CommonCompilerTest::GetProfileCompilationInfo() { + // Null, profile information will not be taken into account. + return nullptr; +} + void CommonCompilerTest::SetUp() { CommonRuntimeTest::SetUp(); { @@ -204,12 +210,10 @@ void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind, InstructionSe 2, true, true, - "", - false, timer_.get(), -1, /* dex_to_oat_map */ nullptr, - /* profile_compilation_info */ nullptr)); + GetProfileCompilationInfo())); // We typically don't generate an image in unit tests, disable this optimization by default. compiler_driver_->SetSupportBootImageFixup(false); } diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index b491946dc3..7e0fbabff8 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -23,6 +23,7 @@ #include "common_runtime_test.h" #include "compiler.h" +#include "jit/offline_profiling_info.h" #include "oat_file.h" namespace art { @@ -75,6 +76,8 @@ class CommonCompilerTest : public CommonRuntimeTest { // driver assumes ownership of the set, so the test should properly release the set. virtual std::unordered_set<std::string>* GetCompiledMethods(); + virtual ProfileCompilationInfo* GetProfileCompilationInfo(); + virtual void TearDown(); void CompileClass(mirror::ClassLoader* class_loader, const char* class_name) diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc index 658e7d67a0..c250bd9fd2 100644 --- a/compiler/dex/mir_method_info.cc +++ b/compiler/dex/mir_method_info.cc @@ -100,8 +100,12 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver, } else { // The method index is actually the dex PC in this case. // Calculate the proper dex file and target method idx. + + // We must be in JIT mode if we get here. CHECK(use_jit); - CHECK_EQ(invoke_type, kVirtual); + + // The invoke type better be virtual, except for the string init special case above. + CHECK_EQ(invoke_type, string_init ? kDirect : kVirtual); // Don't devirt if we are in a different dex file since we can't have direct invokes in // another dex file unless we always put a direct / patch pointer. devirt_target = nullptr; diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index 32d751861a..3766093fa8 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -39,6 +39,7 @@ static constexpr bool kIntrinsicIsStatic[] = { true, // kIntrinsicFloatCvt true, // kIntrinsicReverseBits true, // kIntrinsicReverseBytes + true, // kIntrinsicBitCount true, // kIntrinsicNumberOfLeadingZeros true, // kIntrinsicNumberOfTrailingZeros true, // kIntrinsicRotateRight @@ -99,6 +100,7 @@ static_assert(kIntrinsicIsStatic[kIntrinsicDoubleCvt], "DoubleCvt must be static static_assert(kIntrinsicIsStatic[kIntrinsicFloatCvt], "FloatCvt must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicReverseBits], "ReverseBits must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicReverseBytes], "ReverseBytes must be static"); +static_assert(kIntrinsicIsStatic[kIntrinsicBitCount], "BitCount must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfLeadingZeros], "NumberOfLeadingZeros must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfTrailingZeros], @@ -110,9 +112,9 @@ static_assert(kIntrinsicIsStatic[kIntrinsicAbsLong], "AbsLong must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicAbsFloat], "AbsFloat must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicAbsDouble], "AbsDouble must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxInt], "MinMaxInt must be static"); -static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong_must_be_static"); -static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat_must_be_static"); -static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble_must_be_static"); +static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxLong], "MinMaxLong must be static"); +static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], "MinMaxFloat must be static"); +static_assert(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], "MinMaxDouble must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicCos], "Cos must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicSin], "Sin must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicAcos], "Acos must be static"); @@ -153,7 +155,7 @@ static_assert(kIntrinsicIsStatic[kIntrinsicCurrentThread], "CurrentThread must b static_assert(kIntrinsicIsStatic[kIntrinsicPeek], "Peek must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicPoke], "Poke must be static"); static_assert(!kIntrinsicIsStatic[kIntrinsicCas], "Cas must not be static"); -static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], "UnsafeGet_must_not_be_static"); +static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], "UnsafeGet must not be static"); static_assert(!kIntrinsicIsStatic[kIntrinsicUnsafePut], "UnsafePut must not be static"); static_assert(kIntrinsicIsStatic[kIntrinsicSystemArrayCopyCharArray], "SystemArrayCopyCharArray must be static"); @@ -293,6 +295,7 @@ const char* const DexFileMethodInliner::kNameCacheNames[] = { "putObjectVolatile", // kNameCachePutObjectVolatile "putOrderedObject", // kNameCachePutOrderedObject "arraycopy", // kNameCacheArrayCopy + "bitCount", // kNameCacheBitCount "numberOfLeadingZeros", // kNameCacheNumberOfLeadingZeros "numberOfTrailingZeros", // kNameCacheNumberOfTrailingZeros "rotateRight", // kNameCacheRotateRight @@ -447,6 +450,8 @@ const DexFileMethodInliner::IntrinsicDef DexFileMethodInliner::kIntrinsicMethods INTRINSIC(JavaLangInteger, Reverse, I_I, kIntrinsicReverseBits, k32), INTRINSIC(JavaLangLong, Reverse, J_J, kIntrinsicReverseBits, k64), + INTRINSIC(JavaLangInteger, BitCount, I_I, kIntrinsicBitCount, k32), + INTRINSIC(JavaLangLong, BitCount, J_I, kIntrinsicBitCount, k64), INTRINSIC(JavaLangInteger, NumberOfLeadingZeros, I_I, kIntrinsicNumberOfLeadingZeros, k32), INTRINSIC(JavaLangLong, NumberOfLeadingZeros, J_I, kIntrinsicNumberOfLeadingZeros, k64), INTRINSIC(JavaLangInteger, NumberOfTrailingZeros, I_I, kIntrinsicNumberOfTrailingZeros, k32), @@ -745,6 +750,7 @@ bool DexFileMethodInliner::GenIntrinsic(Mir2Lir* backend, CallInfo* info) { intrinsic.d.data & kIntrinsicFlagIsOrdered); case kIntrinsicSystemArrayCopyCharArray: return backend->GenInlinedArrayCopyCharArray(info); + case kIntrinsicBitCount: case kIntrinsicNumberOfLeadingZeros: case kIntrinsicNumberOfTrailingZeros: case kIntrinsicRotateRight: diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h index ac70577b48..28036237d7 100644 --- a/compiler/dex/quick/dex_file_method_inliner.h +++ b/compiler/dex/quick/dex_file_method_inliner.h @@ -224,6 +224,7 @@ class DexFileMethodInliner { kNameCachePutObjectVolatile, kNameCachePutOrderedObject, kNameCacheArrayCopy, + kNameCacheBitCount, kNameCacheNumberOfLeadingZeros, kNameCacheNumberOfTrailingZeros, kNameCacheRotateRight, diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc index 12568a4ad4..c5df134493 100644 --- a/compiler/dex/quick/quick_cfi_test.cc +++ b/compiler/dex/quick/quick_cfi_test.cc @@ -69,6 +69,8 @@ class QuickCFITest : public CFITest { false, nullptr, nullptr, + false, + "", false); VerificationResults verification_results(&compiler_options); DexFileToMethodInlinerMap method_inliner_map; @@ -88,8 +90,6 @@ class QuickCFITest : public CFITest { 0, false, false, - "", - false, 0, -1, nullptr, diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc index b39fe4da4f..d63878d6b9 100644 --- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc +++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc @@ -52,6 +52,8 @@ class QuickAssembleX86TestBase : public testing::Test { false, nullptr, nullptr, + false, + "", false)); verification_results_.reset(new VerificationResults(compiler_options_.get())); method_inliner_map_.reset(new DexFileToMethodInlinerMap()); @@ -69,8 +71,6 @@ class QuickAssembleX86TestBase : public testing::Test { 0, false, false, - "", - false, 0, -1, nullptr, diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc index f18fa67ea5..2e2d1f99f3 100644 --- a/compiler/driver/compiled_method_storage_test.cc +++ b/compiler/driver/compiled_method_storage_test.cc @@ -41,8 +41,6 @@ TEST(CompiledMethodStorage, Deduplicate) { 1u, false, false, - "", - false, nullptr, -1, nullptr, diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 043bd93bd7..d0215255e8 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -345,7 +345,6 @@ CompilerDriver::CompilerDriver( std::unordered_set<std::string>* compiled_classes, std::unordered_set<std::string>* compiled_methods, size_t thread_count, bool dump_stats, bool dump_passes, - const std::string& dump_cfg_file_name, bool dump_cfg_append, CumulativeLogger* timer, int swap_fd, const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map, const ProfileCompilationInfo* profile_compilation_info) @@ -370,8 +369,6 @@ CompilerDriver::CompilerDriver( stats_(new AOTCompilationStats), dump_stats_(dump_stats), dump_passes_(dump_passes), - dump_cfg_file_name_(dump_cfg_file_name), - dump_cfg_append_(dump_cfg_append), timings_logger_(timer), compiler_context_(nullptr), support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64), @@ -1197,15 +1194,18 @@ bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx, const Dex if (equals_referrers_class != nullptr) { *equals_referrers_class = (method_id.class_idx_ == type_idx); } - mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_); - if (referrer_class == nullptr) { - stats_->TypeNeedsAccessCheck(); - return false; // Incomplete referrer knowledge needs access check. + bool is_accessible = resolved_class->IsPublic(); // Public classes are always accessible. + if (!is_accessible) { + mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_); + if (referrer_class == nullptr) { + stats_->TypeNeedsAccessCheck(); + return false; // Incomplete referrer knowledge needs access check. + } + // Perform access check, will return true if access is ok or false if we're going to have to + // check this at runtime (for example for class loaders). + is_accessible = referrer_class->CanAccess(resolved_class); } - // Perform access check, will return true if access is ok or false if we're going to have to - // check this at runtime (for example for class loaders). - bool result = referrer_class->CanAccess(resolved_class); - if (result) { + if (is_accessible) { stats_->TypeDoesntNeedAccessCheck(); if (type_known_final != nullptr) { *type_known_final = resolved_class->IsFinal() && !resolved_class->IsArrayClass(); @@ -1216,7 +1216,7 @@ bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx, const Dex } else { stats_->TypeNeedsAccessCheck(); } - return result; + return is_accessible; } bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx, @@ -1236,14 +1236,18 @@ bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_id } *finalizable = resolved_class->IsFinalizable(); const DexFile::MethodId& method_id = dex_file.GetMethodId(referrer_idx); - mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_); - if (referrer_class == nullptr) { - stats_->TypeNeedsAccessCheck(); - return false; // Incomplete referrer knowledge needs access check. - } - // Perform access and instantiable checks, will return true if access is ok or false if we're - // going to have to check this at runtime (for example for class loaders). - bool result = referrer_class->CanAccess(resolved_class) && resolved_class->IsInstantiable(); + bool is_accessible = resolved_class->IsPublic(); // Public classes are always accessible. + if (!is_accessible) { + mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_); + if (referrer_class == nullptr) { + stats_->TypeNeedsAccessCheck(); + return false; // Incomplete referrer knowledge needs access check. + } + // Perform access and instantiable checks, will return true if access is ok or false if we're + // going to have to check this at runtime (for example for class loaders). + is_accessible = referrer_class->CanAccess(resolved_class); + } + bool result = is_accessible && resolved_class->IsInstantiable(); if (result) { stats_->TypeDoesntNeedAccessCheck(); } else { diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 17b2f5e98d..6a2f7bfd4e 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -95,7 +95,6 @@ class CompilerDriver { std::unordered_set<std::string>* compiled_classes, std::unordered_set<std::string>* compiled_methods, size_t thread_count, bool dump_stats, bool dump_passes, - const std::string& dump_cfg_file_name, bool dump_cfg_append, CumulativeLogger* timer, int swap_fd, const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map, const ProfileCompilationInfo* profile_compilation_info); @@ -423,14 +422,6 @@ class CompilerDriver { return dump_passes_; } - const std::string& GetDumpCfgFileName() const { - return dump_cfg_file_name_; - } - - bool GetDumpCfgAppend() const { - return dump_cfg_append_; - } - CumulativeLogger* GetTimingsLogger() const { return timings_logger_; } @@ -668,8 +659,6 @@ class CompilerDriver { bool dump_stats_; const bool dump_passes_; - const std::string dump_cfg_file_name_; - const bool dump_cfg_append_; CumulativeLogger* const timings_logger_; diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index 82c0e86b25..4c03e5ddfe 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -31,6 +31,7 @@ #include "mirror/object_array-inl.h" #include "mirror/object-inl.h" #include "handle_scope-inl.h" +#include "jit/offline_profiling_info.h" #include "scoped_thread_state_change.h" namespace art { @@ -240,6 +241,94 @@ TEST_F(CompilerDriverMethodsTest, Selection) { EXPECT_TRUE(expected->empty()); } +class CompilerDriverProfileTest : public CompilerDriverTest { + protected: + ProfileCompilationInfo* GetProfileCompilationInfo() OVERRIDE { + ScopedObjectAccess soa(Thread::Current()); + std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles("ProfileTestMultiDex"); + + ProfileCompilationInfo info; + for (const std::unique_ptr<const DexFile>& dex_file : dex_files) { + std::cout << std::string(dex_file->GetLocation()); + profile_info_.AddData(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 1); + profile_info_.AddData(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 2); + } + return &profile_info_; + } + + std::unordered_set<std::string> GetExpectedMethodsForClass(const std::string& clazz) { + if (clazz == "Main") { + return std::unordered_set<std::string>({ + "java.lang.String Main.getA()", + "java.lang.String Main.getB()"}); + } else if (clazz == "Second") { + return std::unordered_set<std::string>({ + "java.lang.String Second.getX()", + "java.lang.String Second.getY()"}); + } else { + return std::unordered_set<std::string>(); + } + } + + void CheckCompiledMethods(jobject class_loader, + const std::string& clazz, + const std::unordered_set<std::string>& expected_methods) { + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); + StackHandleScope<1> hs(self); + Handle<mirror::ClassLoader> h_loader(hs.NewHandle( + reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader)))); + mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader); + ASSERT_NE(klass, nullptr); + + const auto pointer_size = class_linker->GetImagePointerSize(); + size_t number_of_compiled_methods = 0; + for (auto& m : klass->GetVirtualMethods(pointer_size)) { + std::string name = PrettyMethod(&m, true); + const void* code = m.GetEntryPointFromQuickCompiledCodePtrSize(pointer_size); + ASSERT_NE(code, nullptr); + if (expected_methods.find(name) != expected_methods.end()) { + number_of_compiled_methods++; + EXPECT_FALSE(class_linker->IsQuickToInterpreterBridge(code)); + } else { + EXPECT_TRUE(class_linker->IsQuickToInterpreterBridge(code)); + } + } + EXPECT_EQ(expected_methods.size(), number_of_compiled_methods); + } + + private: + ProfileCompilationInfo profile_info_; +}; + +TEST_F(CompilerDriverProfileTest, ProfileGuidedCompilation) { + TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK(); + TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK(); + TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS(); + Thread* self = Thread::Current(); + jobject class_loader; + { + ScopedObjectAccess soa(self); + class_loader = LoadDex("ProfileTestMultiDex"); + } + ASSERT_NE(class_loader, nullptr); + + // Need to enable dex-file writability. Methods rejected to be compiled will run through the + // dex-to-dex compiler. + ProfileCompilationInfo info; + for (const DexFile* dex_file : GetDexFiles(class_loader)) { + ASSERT_TRUE(dex_file->EnableWrite()); + } + + CompileAll(class_loader); + + std::unordered_set<std::string> m = GetExpectedMethodsForClass("Main"); + std::unordered_set<std::string> s = GetExpectedMethodsForClass("Second"); + CheckCompiledMethods(class_loader, "LMain;", m); + CheckCompiledMethods(class_loader, "LSecond;", s); +} + // TODO: need check-cast test (when stub complete & we can throw/catch } // namespace art diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 385f34a9f9..2644528e56 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -44,7 +44,9 @@ CompilerOptions::CompilerOptions() verbose_methods_(nullptr), pass_manager_options_(), abort_on_hard_verifier_failure_(false), - init_failure_output_(nullptr) { + init_failure_output_(nullptr), + dump_cfg_file_name_(""), + dump_cfg_append_(false) { } CompilerOptions::~CompilerOptions() { @@ -71,7 +73,9 @@ CompilerOptions::CompilerOptions(CompilerFilter compiler_filter, bool compile_pic, const std::vector<std::string>* verbose_methods, std::ostream* init_failure_output, - bool abort_on_hard_verifier_failure + bool abort_on_hard_verifier_failure, + const std::string& dump_cfg_file_name, + bool dump_cfg_append ) : // NOLINT(whitespace/parens) compiler_filter_(compiler_filter), huge_method_threshold_(huge_method_threshold), @@ -94,7 +98,9 @@ CompilerOptions::CompilerOptions(CompilerFilter compiler_filter, verbose_methods_(verbose_methods), pass_manager_options_(), abort_on_hard_verifier_failure_(abort_on_hard_verifier_failure), - init_failure_output_(init_failure_output) { + init_failure_output_(init_failure_output), + dump_cfg_file_name_(dump_cfg_file_name), + dump_cfg_append_(dump_cfg_append) { } void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) { @@ -238,6 +244,10 @@ bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usa ParsePassOptions(option, Usage); } else if (option.starts_with("--dump-init-failures=")) { ParseDumpInitFailures(option, Usage); + } else if (option.starts_with("--dump-cfg=")) { + dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data(); + } else if (option.starts_with("--dump-cfg-append")) { + dump_cfg_append_ = true; } else { // Option not recognized. return false; diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index f14bdc4a2f..d47fc2ad4b 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -83,7 +83,9 @@ class CompilerOptions FINAL { bool compile_pic, const std::vector<std::string>* verbose_methods, std::ostream* init_failure_output, - bool abort_on_hard_verifier_failure); + bool abort_on_hard_verifier_failure, + const std::string& dump_cfg_file_name, + bool dump_cfg_append); CompilerFilter GetCompilerFilter() const { return compiler_filter_; @@ -224,6 +226,14 @@ class CompilerOptions FINAL { bool ParseCompilerOption(const StringPiece& option, UsageFn Usage); + const std::string& GetDumpCfgFileName() const { + return dump_cfg_file_name_; + } + + bool GetDumpCfgAppend() const { + return dump_cfg_append_; + } + private: void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage); void ParsePassOptions(const StringPiece& option, UsageFn Usage); @@ -273,6 +283,9 @@ class CompilerOptions FINAL { // Log initialization of initialization failures to this stream if not null. std::unique_ptr<std::ostream> init_failure_output_; + std::string dump_cfg_file_name_; + bool dump_cfg_append_; + friend class Dex2Oat; DISALLOW_COPY_AND_ASSIGN(CompilerOptions); diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h index b67e8ddc9d..35b3e15d83 100644 --- a/compiler/dwarf/register.h +++ b/compiler/dwarf/register.h @@ -29,7 +29,7 @@ class Reg { // TODO: Arm S0–S31 register mapping is obsolescent. // We should use VFP-v3/Neon D0-D31 mapping instead. // However, D0 is aliased to pair of S0 and S1, so using that - // mapping we can not easily say S0 is spilled and S1 is not. + // mapping we cannot easily say S0 is spilled and S1 is not. // There are ways around this in DWARF but they are complex. // It would be much simpler to always spill whole D registers. // Arm64 mapping is correct since we already do this there. diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h index a7461a5525..46484b1cd6 100644 --- a/compiler/elf_builder.h +++ b/compiler/elf_builder.h @@ -100,12 +100,6 @@ class ElfBuilder FINAL { header_.sh_entsize = entsize; } - ~Section() OVERRIDE { - if (started_) { - CHECK(finished_); - } - } - // Start writing of this section. void Start() { CHECK(!started_); diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc index 99d2b84a8f..f3baf67463 100644 --- a/compiler/elf_writer_debug.cc +++ b/compiler/elf_writer_debug.cc @@ -213,7 +213,7 @@ static void WriteCIE(InstructionSet isa, case kNone: break; } - LOG(FATAL) << "Can not write CIE frame for ISA " << isa; + LOG(FATAL) << "Cannot write CIE frame for ISA " << isa; UNREACHABLE(); } diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 6859605095..12132c0cd0 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -95,25 +95,37 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { t.NewTiming("WriteElf"); SafeMap<std::string, std::string> key_value_store; - OatWriter oat_writer(class_linker->GetBootClassPath(), - 0, - 0, - 0, - compiler_driver_.get(), - writer.get(), - /*compiling_boot_image*/true, - &timings, - &key_value_store); + const std::vector<const DexFile*>& dex_files = class_linker->GetBootClassPath(); std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick( compiler_driver_->GetInstructionSet(), &compiler_driver_->GetCompilerOptions(), oat_file.GetFile()); - bool success = writer->PrepareImageAddressSpace(); - ASSERT_TRUE(success); - elf_writer->Start(); - + OatWriter oat_writer(/*compiling_boot_image*/true, &timings); OutputStream* rodata = elf_writer->StartRoData(); + for (const DexFile* dex_file : dex_files) { + ArrayRef<const uint8_t> raw_dex_file( + reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()), + dex_file->GetHeader().file_size_); + oat_writer.AddRawDexFileSource(raw_dex_file, + dex_file->GetLocation().c_str(), + dex_file->GetLocationChecksum()); + } + std::unique_ptr<MemMap> opened_dex_files_map; + std::vector<std::unique_ptr<const DexFile>> opened_dex_files; + bool dex_files_ok = oat_writer.WriteAndOpenDexFiles( + rodata, + oat_file.GetFile(), + compiler_driver_->GetInstructionSet(), + compiler_driver_->GetInstructionSetFeatures(), + &key_value_store, + &opened_dex_files_map, + &opened_dex_files); + ASSERT_TRUE(dex_files_ok); + oat_writer.PrepareLayout(compiler_driver_.get(), writer.get(), dex_files); + bool image_space_ok = writer->PrepareImageAddressSpace(); + ASSERT_TRUE(image_space_ok); + bool rodata_ok = oat_writer.WriteRodata(rodata); ASSERT_TRUE(rodata_ok); elf_writer->EndRoData(rodata); @@ -123,12 +135,15 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { ASSERT_TRUE(text_ok); elf_writer->EndText(text); + bool header_ok = oat_writer.WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u); + ASSERT_TRUE(header_ok); + elf_writer->SetBssSize(oat_writer.GetBssSize()); elf_writer->WriteDynamicSection(); elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo()); elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations()); - success = elf_writer->End(); + bool success = elf_writer->End(); ASSERT_TRUE(success); } diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index 8fdbf4a3f7..3a3275a5f4 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -110,7 +110,9 @@ JitCompiler::JitCompiler() : total_time_(0) { /* pic */ true, // TODO: Support non-PIC in optimizing. /* verbose_methods */ nullptr, /* init_failure_output */ nullptr, - /* abort_on_hard_verifier_failure */ false)); + /* abort_on_hard_verifier_failure */ false, + /* dump_cfg_file_name */ "", + /* dump_cfg_append */ false)); for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) { compiler_options_->ParseCompilerOption(argument, Usage); } @@ -166,8 +168,6 @@ JitCompiler::JitCompiler() : total_time_(0) { /* thread_count */ 1, /* dump_stats */ false, /* dump_passes */ false, - /* dump_cfg_file_name */ "", - /* dump_cfg_append */ false, cumulative_logger_.get(), /* swap_fd */ -1, /* dex to oat map */ nullptr, diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h index b10cc3534c..bf8e786f64 100644 --- a/compiler/linker/relative_patcher_test.h +++ b/compiler/linker/relative_patcher_test.h @@ -47,7 +47,7 @@ class RelativePatcherTest : public testing::Test { driver_(&compiler_options_, &verification_results_, &inliner_map_, Compiler::kQuick, instruction_set, nullptr, false, nullptr, nullptr, nullptr, 1u, - false, false, "", false, nullptr, -1, nullptr, nullptr), + false, false, nullptr, -1, nullptr, nullptr), error_msg_(), instruction_set_(instruction_set), features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)), diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 9f7ffa5ace..c0d15f3439 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -38,6 +38,7 @@ #include "oat_file-inl.h" #include "oat_writer.h" #include "scoped_thread_state_change.h" +#include "utils/test_dex_file_builder.h" namespace art { @@ -117,8 +118,6 @@ class OatTest : public CommonCompilerTest { 2, true, true, - "", - false, timer_.get(), -1, nullptr, @@ -129,23 +128,74 @@ class OatTest : public CommonCompilerTest { const std::vector<const DexFile*>& dex_files, SafeMap<std::string, std::string>& key_value_store) { TimingLogger timings("WriteElf", false, false); - OatWriter oat_writer(dex_files, - 42U, - 4096U, - 0, - compiler_driver_.get(), - nullptr, - /*compiling_boot_image*/false, - &timings, - &key_value_store); + OatWriter oat_writer(/*compiling_boot_image*/false, &timings); + for (const DexFile* dex_file : dex_files) { + ArrayRef<const uint8_t> raw_dex_file( + reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()), + dex_file->GetHeader().file_size_); + if (!oat_writer.AddRawDexFileSource(raw_dex_file, + dex_file->GetLocation().c_str(), + dex_file->GetLocationChecksum())) { + return false; + } + } + return DoWriteElf(file, oat_writer, key_value_store); + } + + bool WriteElf(File* file, + const std::vector<const char*>& dex_filenames, + SafeMap<std::string, std::string>& key_value_store) { + TimingLogger timings("WriteElf", false, false); + OatWriter oat_writer(/*compiling_boot_image*/false, &timings); + for (const char* dex_filename : dex_filenames) { + if (!oat_writer.AddDexFileSource(dex_filename, dex_filename)) { + return false; + } + } + return DoWriteElf(file, oat_writer, key_value_store); + } + + bool WriteElf(File* file, + ScopedFd&& zip_fd, + const char* location, + SafeMap<std::string, std::string>& key_value_store) { + TimingLogger timings("WriteElf", false, false); + OatWriter oat_writer(/*compiling_boot_image*/false, &timings); + if (!oat_writer.AddZippedDexFilesSource(std::move(zip_fd), location)) { + return false; + } + return DoWriteElf(file, oat_writer, key_value_store); + } + + bool DoWriteElf(File* file, + OatWriter& oat_writer, + SafeMap<std::string, std::string>& key_value_store) { std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick( compiler_driver_->GetInstructionSet(), &compiler_driver_->GetCompilerOptions(), file); - elf_writer->Start(); - OutputStream* rodata = elf_writer->StartRoData(); + std::unique_ptr<MemMap> opened_dex_files_map; + std::vector<std::unique_ptr<const DexFile>> opened_dex_files; + if (!oat_writer.WriteAndOpenDexFiles(rodata, + file, + compiler_driver_->GetInstructionSet(), + compiler_driver_->GetInstructionSetFeatures(), + &key_value_store, + &opened_dex_files_map, + &opened_dex_files)) { + return false; + } + Runtime* runtime = Runtime::Current(); + ClassLinker* const class_linker = runtime->GetClassLinker(); + std::vector<const DexFile*> dex_files; + for (const std::unique_ptr<const DexFile>& dex_file : opened_dex_files) { + dex_files.push_back(dex_file.get()); + ScopedObjectAccess soa(Thread::Current()); + class_linker->RegisterDexFile(*dex_file, runtime->GetLinearAlloc()); + } + oat_writer.PrepareLayout(compiler_driver_.get(), nullptr, dex_files); if (!oat_writer.WriteRodata(rodata)) { return false; } @@ -157,6 +207,10 @@ class OatTest : public CommonCompilerTest { } elf_writer->EndText(text); + if (!oat_writer.WriteHeader(elf_writer->GetStream(), 42U, 4096U, 0)) { + return false; + } + elf_writer->SetBssSize(oat_writer.GetBssSize()); elf_writer->WriteDynamicSection(); elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo()); @@ -169,6 +223,117 @@ class OatTest : public CommonCompilerTest { std::unique_ptr<QuickCompilerCallbacks> callbacks_; }; +class ZipBuilder { + public: + explicit ZipBuilder(File* zip_file) : zip_file_(zip_file) { } + + bool AddFile(const char* location, const void* data, size_t size) { + off_t offset = lseek(zip_file_->Fd(), 0, SEEK_CUR); + if (offset == static_cast<off_t>(-1)) { + return false; + } + + ZipFileHeader file_header; + file_header.crc32 = crc32(0u, reinterpret_cast<const Bytef*>(data), size); + file_header.compressed_size = size; + file_header.uncompressed_size = size; + file_header.filename_length = strlen(location); + + if (!zip_file_->WriteFully(&file_header, sizeof(file_header)) || + !zip_file_->WriteFully(location, file_header.filename_length) || + !zip_file_->WriteFully(data, size)) { + return false; + } + + CentralDirectoryFileHeader cdfh; + cdfh.crc32 = file_header.crc32; + cdfh.compressed_size = size; + cdfh.uncompressed_size = size; + cdfh.filename_length = file_header.filename_length; + cdfh.relative_offset_of_local_file_header = offset; + file_data_.push_back(FileData { cdfh, location }); + return true; + } + + bool Finish() { + off_t offset = lseek(zip_file_->Fd(), 0, SEEK_CUR); + if (offset == static_cast<off_t>(-1)) { + return false; + } + + size_t central_directory_size = 0u; + for (const FileData& file_data : file_data_) { + if (!zip_file_->WriteFully(&file_data.cdfh, sizeof(file_data.cdfh)) || + !zip_file_->WriteFully(file_data.location, file_data.cdfh.filename_length)) { + return false; + } + central_directory_size += sizeof(file_data.cdfh) + file_data.cdfh.filename_length; + } + EndOfCentralDirectoryRecord eocd_record; + eocd_record.number_of_central_directory_records_on_this_disk = file_data_.size(); + eocd_record.total_number_of_central_directory_records = file_data_.size(); + eocd_record.size_of_central_directory = central_directory_size; + eocd_record.offset_of_start_of_central_directory = offset; + return + zip_file_->WriteFully(&eocd_record, sizeof(eocd_record)) && + zip_file_->Flush() == 0; + } + + private: + struct PACKED(1) ZipFileHeader { + uint32_t signature = 0x04034b50; + uint16_t version_needed_to_extract = 10; + uint16_t general_purpose_bit_flag = 0; + uint16_t compression_method = 0; // 0 = store only. + uint16_t file_last_modification_time = 0u; + uint16_t file_last_modification_date = 0u; + uint32_t crc32; + uint32_t compressed_size; + uint32_t uncompressed_size; + uint16_t filename_length; + uint16_t extra_field_length = 0u; // No extra fields. + }; + + struct PACKED(1) CentralDirectoryFileHeader { + uint32_t signature = 0x02014b50; + uint16_t version_made_by = 10; + uint16_t version_needed_to_extract = 10; + uint16_t general_purpose_bit_flag = 0; + uint16_t compression_method = 0; // 0 = store only. + uint16_t file_last_modification_time = 0u; + uint16_t file_last_modification_date = 0u; + uint32_t crc32; + uint32_t compressed_size; + uint32_t uncompressed_size; + uint16_t filename_length; + uint16_t extra_field_length = 0u; // No extra fields. + uint16_t file_comment_length = 0u; // No file comment. + uint16_t disk_number_where_file_starts = 0u; + uint16_t internal_file_attributes = 0u; + uint32_t external_file_attributes = 0u; + uint32_t relative_offset_of_local_file_header; + }; + + struct PACKED(1) EndOfCentralDirectoryRecord { + uint32_t signature = 0x06054b50; + uint16_t number_of_this_disk = 0u; + uint16_t disk_where_central_directory_starts = 0u; + uint16_t number_of_central_directory_records_on_this_disk; + uint16_t total_number_of_central_directory_records; + uint32_t size_of_central_directory; + uint32_t offset_of_start_of_central_directory; + uint16_t comment_length = 0u; // No file comment. + }; + + struct FileData { + CentralDirectoryFileHeader cdfh; + const char* location; + }; + + File* zip_file_; + std::vector<FileData> file_data_; +}; + TEST_F(OatTest, WriteRead) { TimingLogger timings("OatTest::WriteRead", false, false); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); @@ -329,4 +494,189 @@ TEST_F(OatTest, EmptyTextSection) { EXPECT_LT(static_cast<size_t>(oat_file->Size()), static_cast<size_t>(tmp.GetFile()->GetLength())); } +TEST_F(OatTest, DexFileInput) { + TimingLogger timings("OatTest::DexFileInput", false, false); + + std::vector<const char*> input_filenames; + + ScratchFile dex_file1; + TestDexFileBuilder builder1; + builder1.AddField("Lsome.TestClass;", "int", "someField"); + builder1.AddMethod("Lsome.TestClass;", "()I", "foo"); + std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename()); + bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(), + dex_file1_data->GetHeader().file_size_); + ASSERT_TRUE(success); + success = dex_file1.GetFile()->Flush() == 0; + ASSERT_TRUE(success); + input_filenames.push_back(dex_file1.GetFilename().c_str()); + + ScratchFile dex_file2; + TestDexFileBuilder builder2; + builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField"); + builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar"); + std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename()); + success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(), + dex_file2_data->GetHeader().file_size_); + ASSERT_TRUE(success); + success = dex_file2.GetFile()->Flush() == 0; + ASSERT_TRUE(success); + input_filenames.push_back(dex_file2.GetFilename().c_str()); + + ScratchFile oat_file; + SafeMap<std::string, std::string> key_value_store; + key_value_store.Put(OatHeader::kImageLocationKey, "test.art"); + success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store); + ASSERT_TRUE(success); + + std::string error_msg; + std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(), + oat_file.GetFilename(), + nullptr, + nullptr, + false, + nullptr, + &error_msg)); + ASSERT_TRUE(opened_oat_file != nullptr); + ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size()); + std::unique_ptr<const DexFile> opened_dex_file1 = + opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg); + std::unique_ptr<const DexFile> opened_dex_file2 = + opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg); + + ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_); + ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(), + &opened_dex_file1->GetHeader(), + dex_file1_data->GetHeader().file_size_)); + ASSERT_EQ(dex_file1_data->GetLocation(), opened_dex_file1->GetLocation()); + + ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_); + ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(), + &opened_dex_file2->GetHeader(), + dex_file2_data->GetHeader().file_size_)); + ASSERT_EQ(dex_file2_data->GetLocation(), opened_dex_file2->GetLocation()); +} + +TEST_F(OatTest, ZipFileInput) { + TimingLogger timings("OatTest::DexFileInput", false, false); + + ScratchFile zip_file; + ZipBuilder zip_builder(zip_file.GetFile()); + + ScratchFile dex_file1; + TestDexFileBuilder builder1; + builder1.AddField("Lsome.TestClass;", "long", "someField"); + builder1.AddMethod("Lsome.TestClass;", "()D", "foo"); + std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename()); + bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(), + dex_file1_data->GetHeader().file_size_); + ASSERT_TRUE(success); + success = dex_file1.GetFile()->Flush() == 0; + ASSERT_TRUE(success); + success = zip_builder.AddFile("classes.dex", + &dex_file1_data->GetHeader(), + dex_file1_data->GetHeader().file_size_); + ASSERT_TRUE(success); + + ScratchFile dex_file2; + TestDexFileBuilder builder2; + builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField"); + builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar"); + std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename()); + success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(), + dex_file2_data->GetHeader().file_size_); + ASSERT_TRUE(success); + success = dex_file2.GetFile()->Flush() == 0; + ASSERT_TRUE(success); + success = zip_builder.AddFile("classes2.dex", + &dex_file2_data->GetHeader(), + dex_file2_data->GetHeader().file_size_); + ASSERT_TRUE(success); + + success = zip_builder.Finish(); + ASSERT_TRUE(success) << strerror(errno); + + SafeMap<std::string, std::string> key_value_store; + key_value_store.Put(OatHeader::kImageLocationKey, "test.art"); + { + // Test using the AddDexFileSource() interface with the zip file. + std::vector<const char*> input_filenames { zip_file.GetFilename().c_str() }; // NOLINT [readability/braces] [4] + + ScratchFile oat_file; + success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store); + ASSERT_TRUE(success); + + std::string error_msg; + std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(), + oat_file.GetFilename(), + nullptr, + nullptr, + false, + nullptr, + &error_msg)); + ASSERT_TRUE(opened_oat_file != nullptr); + ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size()); + std::unique_ptr<const DexFile> opened_dex_file1 = + opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg); + std::unique_ptr<const DexFile> opened_dex_file2 = + opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg); + + ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_); + ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(), + &opened_dex_file1->GetHeader(), + dex_file1_data->GetHeader().file_size_)); + ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()), + opened_dex_file1->GetLocation()); + + ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_); + ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(), + &opened_dex_file2->GetHeader(), + dex_file2_data->GetHeader().file_size_)); + ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()), + opened_dex_file2->GetLocation()); + } + + { + // Test using the AddZipDexFileSource() interface with the zip file handle. + ScopedFd zip_fd(dup(zip_file.GetFd())); + ASSERT_NE(-1, zip_fd.get()); + + ScratchFile oat_file; + success = WriteElf(oat_file.GetFile(), + std::move(zip_fd), + zip_file.GetFilename().c_str(), + key_value_store); + ASSERT_TRUE(success); + + std::string error_msg; + std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(), + oat_file.GetFilename(), + nullptr, + nullptr, + false, + nullptr, + &error_msg)); + ASSERT_TRUE(opened_oat_file != nullptr); + ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size()); + std::unique_ptr<const DexFile> opened_dex_file1 = + opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg); + std::unique_ptr<const DexFile> opened_dex_file2 = + opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg); + + ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_); + ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(), + &opened_dex_file1->GetHeader(), + dex_file1_data->GetHeader().file_size_)); + ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()), + opened_dex_file1->GetLocation()); + + ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_); + ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(), + &opened_dex_file2->GetHeader(), + dex_file2_data->GetHeader().file_size_)); + ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()), + opened_dex_file2->GetLocation()); + } +} + } // namespace art diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 025e35e178..c74c41f0c9 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -16,12 +16,14 @@ #include "oat_writer.h" +#include <unistd.h> #include <zlib.h> #include "arch/arm64/instruction_set_features_arm64.h" #include "art_method-inl.h" #include "base/allocator.h" #include "base/bit_vector.h" +#include "base/file_magic.h" #include "base/stl_util.h" #include "base/unix_file/fd_file.h" #include "class_linker.h" @@ -49,9 +51,77 @@ #include "type_lookup_table.h" #include "utils/dex_cache_arrays_layout-inl.h" #include "verifier/method_verifier.h" +#include "zip_archive.h" namespace art { +namespace { // anonymous namespace + +typedef DexFile::Header __attribute__((aligned(1))) UnalignedDexFileHeader; + +const UnalignedDexFileHeader* AsUnalignedDexFileHeader(const uint8_t* raw_data) { + return reinterpret_cast<const UnalignedDexFileHeader*>(raw_data); +} + +} // anonymous namespace + +// Defines the location of the raw dex file to write. +class OatWriter::DexFileSource { + public: + explicit DexFileSource(ZipEntry* zip_entry) + : type_(kZipEntry), source_(zip_entry) { + DCHECK(source_ != nullptr); + } + + explicit DexFileSource(File* raw_file) + : type_(kRawFile), source_(raw_file) { + DCHECK(source_ != nullptr); + } + + explicit DexFileSource(const uint8_t* dex_file) + : type_(kRawData), source_(dex_file) { + DCHECK(source_ != nullptr); + } + + bool IsZipEntry() const { return type_ == kZipEntry; } + bool IsRawFile() const { return type_ == kRawFile; } + bool IsRawData() const { return type_ == kRawData; } + + ZipEntry* GetZipEntry() const { + DCHECK(IsZipEntry()); + DCHECK(source_ != nullptr); + return static_cast<ZipEntry*>(const_cast<void*>(source_)); + } + + File* GetRawFile() const { + DCHECK(IsRawFile()); + DCHECK(source_ != nullptr); + return static_cast<File*>(const_cast<void*>(source_)); + } + + const uint8_t* GetRawData() const { + DCHECK(IsRawData()); + DCHECK(source_ != nullptr); + return static_cast<const uint8_t*>(source_); + } + + void Clear() { + type_ = kNone; + source_ = nullptr; + } + + private: + enum Type { + kNone, + kZipEntry, + kRawFile, + kRawData, + }; + + Type type_; + const void* source_; +}; + class OatWriter::OatClass { public: OatClass(size_t offset, @@ -116,11 +186,30 @@ class OatWriter::OatClass { class OatWriter::OatDexFile { public: - OatDexFile(size_t offset, const DexFile& dex_file); + OatDexFile(const char* dex_file_location, + DexFileSource source, + CreateTypeLookupTable create_type_lookup_table); OatDexFile(OatDexFile&& src) = default; + const char* GetLocation() const { + return dex_file_location_data_; + } + + void ReserveTypeLookupTable(OatWriter* oat_writer); + void ReserveClassOffsets(OatWriter* oat_writer); + size_t SizeOf() const; - bool Write(OatWriter* oat_writer, OutputStream* out, const size_t file_offset) const; + bool Write(OatWriter* oat_writer, OutputStream* out) const; + bool WriteClassOffsets(OatWriter* oat_writer, OutputStream* out); + + // The source of the dex file. + DexFileSource source_; + + // Whether to create the type lookup table. + CreateTypeLookupTable create_type_lookup_table_; + + // Dex file size. Initialized when writing the dex file. + size_t dex_file_size_; // Offset of start of OatDexFile from beginning of OatHeader. It is // used to validate file position when writing. @@ -128,11 +217,13 @@ class OatWriter::OatDexFile { // Data to write. uint32_t dex_file_location_size_; - const uint8_t* dex_file_location_data_; + const char* dex_file_location_data_; uint32_t dex_file_location_checksum_; uint32_t dex_file_offset_; + uint32_t class_offsets_offset_; uint32_t lookup_table_offset_; - TypeLookupTable* lookup_table_; // Owned by the dex file. + + // Data to write to a separate section. dchecked_vector<uint32_t> class_offsets_; private: @@ -151,26 +242,20 @@ class OatWriter::OatDexFile { DCHECK_EQ(static_cast<off_t>(file_offset + offset_), out->Seek(0, kSeekCurrent)) \ << "file_offset=" << file_offset << " offset_=" << offset_ -OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, - uint32_t image_file_location_oat_checksum, - uintptr_t image_file_location_oat_begin, - int32_t image_patch_delta, - const CompilerDriver* compiler, - ImageWriter* image_writer, - bool compiling_boot_image, - TimingLogger* timings, - SafeMap<std::string, std::string>* key_value_store) - : compiler_driver_(compiler), - image_writer_(image_writer), +OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings) + : write_state_(WriteState::kAddingDexFileSources), + timings_(timings), + raw_dex_files_(), + zip_archives_(), + zipped_dex_files_(), + zipped_dex_file_locations_(), + compiler_driver_(nullptr), + image_writer_(nullptr), compiling_boot_image_(compiling_boot_image), - dex_files_(&dex_files), + dex_files_(nullptr), size_(0u), bss_size_(0u), oat_data_offset_(0u), - image_file_location_oat_checksum_(image_file_location_oat_checksum), - image_file_location_oat_begin_(image_file_location_oat_begin), - image_patch_delta_(image_patch_delta), - key_value_store_(key_value_store), oat_header_(nullptr), size_dex_file_alignment_(0), size_executable_offset_alignment_(0), @@ -197,55 +282,192 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, size_oat_dex_file_location_data_(0), size_oat_dex_file_location_checksum_(0), size_oat_dex_file_offset_(0), + size_oat_dex_file_class_offsets_offset_(0), size_oat_dex_file_lookup_table_offset_(0), - size_oat_dex_file_class_offsets_(0), size_oat_lookup_table_alignment_(0), size_oat_lookup_table_(0), + size_oat_class_offsets_alignment_(0), + size_oat_class_offsets_(0), size_oat_class_type_(0), size_oat_class_status_(0), size_oat_class_method_bitmaps_(0), size_oat_class_method_offsets_(0), method_offset_map_() { - CHECK(key_value_store != nullptr); - if (compiling_boot_image) { - CHECK(image_writer != nullptr); +} + +bool OatWriter::AddDexFileSource(const char* filename, + const char* location, + CreateTypeLookupTable create_type_lookup_table) { + DCHECK(write_state_ == WriteState::kAddingDexFileSources); + uint32_t magic; + std::string error_msg; + ScopedFd fd(OpenAndReadMagic(filename, &magic, &error_msg)); + if (fd.get() == -1) { + PLOG(ERROR) << "Failed to read magic number from dex file: '" << filename << "'"; + return false; + } else if (IsDexMagic(magic)) { + // The file is open for reading, not writing, so it's OK to let the File destructor + // close it without checking for explicit Close(), so pass checkUsage = false. + raw_dex_files_.emplace_back(new File(fd.release(), location, /* checkUsage */ false)); + oat_dex_files_.emplace_back(location, + DexFileSource(raw_dex_files_.back().get()), + create_type_lookup_table); + } else if (IsZipMagic(magic)) { + if (!AddZippedDexFilesSource(std::move(fd), location, create_type_lookup_table)) { + return false; + } + } else { + LOG(ERROR) << "Expected valid zip or dex file: '" << filename << "'"; + return false; } - InstructionSet instruction_set = compiler_driver_->GetInstructionSet(); - const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures(); - relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features, - &method_offset_map_); + return true; +} - size_t offset; - { - TimingLogger::ScopedTiming split("InitOatHeader", timings); - offset = InitOatHeader(); +// Add dex file source(s) from a zip file specified by a file handle. +bool OatWriter::AddZippedDexFilesSource(ScopedFd&& zip_fd, + const char* location, + CreateTypeLookupTable create_type_lookup_table) { + DCHECK(write_state_ == WriteState::kAddingDexFileSources); + std::string error_msg; + zip_archives_.emplace_back(ZipArchive::OpenFromFd(zip_fd.release(), location, &error_msg)); + ZipArchive* zip_archive = zip_archives_.back().get(); + if (zip_archive == nullptr) { + LOG(ERROR) << "Failed to open zip from file descriptor for '" << location << "': " + << error_msg; + return false; } - { - TimingLogger::ScopedTiming split("InitOatDexFiles", timings); - offset = InitOatDexFiles(offset); + for (size_t i = 0; ; ++i) { + std::string entry_name = DexFile::GetMultiDexClassesDexName(i); + std::unique_ptr<ZipEntry> entry(zip_archive->Find(entry_name.c_str(), &error_msg)); + if (entry == nullptr) { + break; + } + zipped_dex_files_.push_back(std::move(entry)); + zipped_dex_file_locations_.push_back(DexFile::GetMultiDexLocation(i, location)); + const char* full_location = zipped_dex_file_locations_.back().c_str(); + oat_dex_files_.emplace_back(full_location, + DexFileSource(zipped_dex_files_.back().get()), + create_type_lookup_table); + } + if (zipped_dex_file_locations_.empty()) { + LOG(ERROR) << "No dex files in zip file '" << location << "': " << error_msg; + return false; } - { - TimingLogger::ScopedTiming split("InitDexFiles", timings); - offset = InitDexFiles(offset); + return true; +} + +// Add dex file source from raw memory. +bool OatWriter::AddRawDexFileSource(const ArrayRef<const uint8_t>& data, + const char* location, + uint32_t location_checksum, + CreateTypeLookupTable create_type_lookup_table) { + DCHECK(write_state_ == WriteState::kAddingDexFileSources); + if (data.size() < sizeof(DexFile::Header)) { + LOG(ERROR) << "Provided data is shorter than dex file header. size: " + << data.size() << " File: " << location; + return false; } - { - TimingLogger::ScopedTiming split("InitLookupTables", timings); - offset = InitLookupTables(offset); + if (!ValidateDexFileHeader(data.data(), location)) { + return false; + } + const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(data.data()); + if (data.size() < header->file_size_) { + LOG(ERROR) << "Truncated dex file data. Data size: " << data.size() + << " file size from header: " << header->file_size_ << " File: " << location; + return false; + } + + oat_dex_files_.emplace_back(location, DexFileSource(data.data()), create_type_lookup_table); + oat_dex_files_.back().dex_file_location_checksum_ = location_checksum; + return true; +} + +dchecked_vector<const char*> OatWriter::GetSourceLocations() const { + dchecked_vector<const char*> locations; + locations.reserve(oat_dex_files_.size()); + for (const OatDexFile& oat_dex_file : oat_dex_files_) { + locations.push_back(oat_dex_file.GetLocation()); } + return locations; +} + +bool OatWriter::WriteAndOpenDexFiles( + OutputStream* rodata, + File* file, + InstructionSet instruction_set, + const InstructionSetFeatures* instruction_set_features, + SafeMap<std::string, std::string>* key_value_store, + /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map, + /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) { + CHECK(write_state_ == WriteState::kAddingDexFileSources); + + size_t offset = InitOatHeader(instruction_set, + instruction_set_features, + dchecked_integral_cast<uint32_t>(oat_dex_files_.size()), + key_value_store); + offset = InitOatDexFiles(offset); + size_ = offset; + + std::unique_ptr<MemMap> dex_files_map; + std::vector<std::unique_ptr<const DexFile>> dex_files; + if (!WriteDexFiles(rodata, file)) { + return false; + } + // Reserve space for type lookup tables and update type_lookup_table_offset_. + for (OatDexFile& oat_dex_file : oat_dex_files_) { + oat_dex_file.ReserveTypeLookupTable(this); + } + size_t size_after_type_lookup_tables = size_; + // Reserve space for class offsets and update class_offsets_offset_. + for (OatDexFile& oat_dex_file : oat_dex_files_) { + oat_dex_file.ReserveClassOffsets(this); + } + if (!WriteOatDexFiles(rodata) || + !ExtendForTypeLookupTables(rodata, file, size_after_type_lookup_tables) || + !OpenDexFiles(file, &dex_files_map, &dex_files) || + !WriteTypeLookupTables(dex_files_map.get(), dex_files)) { + return false; + } + + *opened_dex_files_map = std::move(dex_files_map); + *opened_dex_files = std::move(dex_files); + write_state_ = WriteState::kPrepareLayout; + return true; +} + +void OatWriter::PrepareLayout(const CompilerDriver* compiler, + ImageWriter* image_writer, + const std::vector<const DexFile*>& dex_files) { + CHECK(write_state_ == WriteState::kPrepareLayout); + + dex_files_ = &dex_files; + + compiler_driver_ = compiler; + image_writer_ = image_writer; + if (compiling_boot_image_) { + CHECK(image_writer_ != nullptr); + } + InstructionSet instruction_set = compiler_driver_->GetInstructionSet(); + CHECK_EQ(instruction_set, oat_header_->GetInstructionSet()); + const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures(); + relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features, + &method_offset_map_); + + uint32_t offset = size_; { - TimingLogger::ScopedTiming split("InitOatClasses", timings); + TimingLogger::ScopedTiming split("InitOatClasses", timings_); offset = InitOatClasses(offset); } { - TimingLogger::ScopedTiming split("InitOatMaps", timings); + TimingLogger::ScopedTiming split("InitOatMaps", timings_); offset = InitOatMaps(offset); } { - TimingLogger::ScopedTiming split("InitOatCode", timings); + TimingLogger::ScopedTiming split("InitOatCode", timings_); offset = InitOatCode(offset); } { - TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings); + TimingLogger::ScopedTiming split("InitOatCodeDexFiles", timings_); offset = InitOatCodeDexFiles(offset); } size_ = offset; @@ -255,7 +477,7 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, size_t bss_start = RoundUp(size_, kPageSize); size_t pointer_size = GetInstructionSetPointerSize(instruction_set); bss_size_ = 0u; - for (const DexFile* dex_file : dex_files) { + for (const DexFile* dex_file : *dex_files_) { dex_cache_arrays_offsets_.Put(dex_file, bss_start + bss_size_); DexCacheArraysLayout layout(pointer_size, dex_file); bss_size_ += layout.Size(); @@ -265,9 +487,10 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, CHECK_EQ(dex_files_->size(), oat_dex_files_.size()); if (compiling_boot_image_) { CHECK_EQ(image_writer_ != nullptr, - key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end()); + oat_header_->GetStoreValueByKey(OatHeader::kImageLocationKey) == nullptr); } - CHECK_ALIGNED(image_patch_delta_, kPageSize); + + write_state_ = WriteState::kWriteRoData; } OatWriter::~OatWriter() { @@ -1134,59 +1357,26 @@ bool OatWriter::VisitDexMethods(DexMethodVisitor* visitor) { return true; } -size_t OatWriter::InitOatHeader() { - oat_header_.reset(OatHeader::Create(compiler_driver_->GetInstructionSet(), - compiler_driver_->GetInstructionSetFeatures(), - dchecked_integral_cast<uint32_t>(dex_files_->size()), - key_value_store_)); - oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum_); - oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin_); - +size_t OatWriter::InitOatHeader(InstructionSet instruction_set, + const InstructionSetFeatures* instruction_set_features, + uint32_t num_dex_files, + SafeMap<std::string, std::string>* key_value_store) { + TimingLogger::ScopedTiming split("InitOatHeader", timings_); + oat_header_.reset(OatHeader::Create(instruction_set, + instruction_set_features, + num_dex_files, + key_value_store)); + size_oat_header_ += sizeof(OatHeader); + size_oat_header_key_value_store_ += oat_header_->GetHeaderSize() - sizeof(OatHeader); return oat_header_->GetHeaderSize(); } size_t OatWriter::InitOatDexFiles(size_t offset) { - // create the OatDexFiles - for (size_t i = 0; i != dex_files_->size(); ++i) { - const DexFile* dex_file = (*dex_files_)[i]; - CHECK(dex_file != nullptr); - oat_dex_files_.emplace_back(offset, *dex_file); - offset += oat_dex_files_.back().SizeOf(); - } - return offset; -} - -size_t OatWriter::InitDexFiles(size_t offset) { - // calculate the offsets within OatDexFiles to the DexFiles - for (size_t i = 0; i != dex_files_->size(); ++i) { - // dex files are required to be 4 byte aligned - size_t original_offset = offset; - offset = RoundUp(offset, 4); - size_dex_file_alignment_ += offset - original_offset; - - // set offset in OatDexFile to DexFile - oat_dex_files_[i].dex_file_offset_ = offset; - - const DexFile* dex_file = (*dex_files_)[i]; - - // Initialize type lookup table - oat_dex_files_[i].lookup_table_ = dex_file->GetTypeLookupTable(); - - offset += dex_file->GetHeader().file_size_; - } - return offset; -} - -size_t OatWriter::InitLookupTables(size_t offset) { + TimingLogger::ScopedTiming split("InitOatDexFiles", timings_); + // Initialize offsets of dex files. for (OatDexFile& oat_dex_file : oat_dex_files_) { - if (oat_dex_file.lookup_table_ != nullptr) { - uint32_t aligned_offset = RoundUp(offset, 4); - oat_dex_file.lookup_table_offset_ = aligned_offset; - size_oat_lookup_table_alignment_ += aligned_offset - offset; - offset = aligned_offset + oat_dex_file.lookup_table_->RawDataLength(); - } else { - oat_dex_file.lookup_table_offset_ = 0; - } + oat_dex_file.offset_ = offset; + offset += oat_dex_file.SizeOf(); } return offset; } @@ -1239,7 +1429,6 @@ size_t OatWriter::InitOatCode(size_t offset) { oat_header_->SetExecutableOffset(offset); size_executable_offset_alignment_ = offset - old_offset; if (compiler_driver_->IsBootImage()) { - CHECK_EQ(image_patch_delta_, 0); InstructionSet instruction_set = compiler_driver_->GetInstructionSet(); #define DO_TRAMPOLINE(field, fn_name) \ @@ -1264,7 +1453,6 @@ size_t OatWriter::InitOatCode(size_t offset) { oat_header_->SetQuickImtConflictTrampolineOffset(0); oat_header_->SetQuickResolutionTrampolineOffset(0); oat_header_->SetQuickToInterpreterBridgeOffset(0); - oat_header_->SetImagePatchDelta(image_patch_delta_); } return offset; } @@ -1289,22 +1477,15 @@ size_t OatWriter::InitOatCodeDexFiles(size_t offset) { } bool OatWriter::WriteRodata(OutputStream* out) { - if (!GetOatDataOffset(out)) { - return false; - } - const size_t file_offset = oat_data_offset_; + CHECK(write_state_ == WriteState::kWriteRoData); - // Reserve space for header. It will be written last - after updating the checksum. - size_t header_size = oat_header_->GetHeaderSize(); - if (out->Seek(header_size, kSeekCurrent) == static_cast<off_t>(-1)) { - PLOG(ERROR) << "Failed to reserve space for oat header in " << out->GetLocation(); + if (!WriteClassOffsets(out)) { + LOG(ERROR) << "Failed to write class offsets to " << out->GetLocation(); return false; } - size_oat_header_ += sizeof(OatHeader); - size_oat_header_key_value_store_ += oat_header_->GetHeaderSize() - sizeof(OatHeader); - if (!WriteTables(out, file_offset)) { - LOG(ERROR) << "Failed to write oat tables to " << out->GetLocation(); + if (!WriteClasses(out)) { + LOG(ERROR) << "Failed to write classes to " << out->GetLocation(); return false; } @@ -1313,6 +1494,7 @@ bool OatWriter::WriteRodata(OutputStream* out) { LOG(ERROR) << "Failed to seek to oat code position in " << out->GetLocation(); return false; } + size_t file_offset = oat_data_offset_; size_t relative_offset = static_cast<size_t>(tables_end_offset) - file_offset; relative_offset = WriteMaps(out, file_offset, relative_offset); if (relative_offset == 0) { @@ -1332,11 +1514,13 @@ bool OatWriter::WriteRodata(OutputStream* out) { } DCHECK_OFFSET(); + write_state_ = WriteState::kWriteText; return true; } bool OatWriter::WriteCode(OutputStream* out) { - size_t header_size = oat_header_->GetHeaderSize(); + CHECK(write_state_ == WriteState::kWriteText); + const size_t file_offset = oat_data_offset_; size_t relative_offset = oat_header_->GetExecutableOffset(); DCHECK_OFFSET(); @@ -1390,10 +1574,12 @@ bool OatWriter::WriteCode(OutputStream* out) { DO_STAT(size_oat_dex_file_location_data_); DO_STAT(size_oat_dex_file_location_checksum_); DO_STAT(size_oat_dex_file_offset_); + DO_STAT(size_oat_dex_file_class_offsets_offset_); DO_STAT(size_oat_dex_file_lookup_table_offset_); - DO_STAT(size_oat_dex_file_class_offsets_); DO_STAT(size_oat_lookup_table_alignment_); DO_STAT(size_oat_lookup_table_); + DO_STAT(size_oat_class_offsets_alignment_); + DO_STAT(size_oat_class_offsets_); DO_STAT(size_oat_class_type_); DO_STAT(size_oat_class_status_); DO_STAT(size_oat_class_method_bitmaps_); @@ -1408,89 +1594,91 @@ bool OatWriter::WriteCode(OutputStream* out) { CHECK_EQ(file_offset + size_, static_cast<size_t>(oat_end_file_offset)); CHECK_EQ(size_, relative_offset); - // Finalize the header checksum. + write_state_ = WriteState::kWriteHeader; + return true; +} + +bool OatWriter::WriteHeader(OutputStream* out, + uint32_t image_file_location_oat_checksum, + uintptr_t image_file_location_oat_begin, + int32_t image_patch_delta) { + CHECK(write_state_ == WriteState::kWriteHeader); + + oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum); + oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin); + if (compiler_driver_->IsBootImage()) { + CHECK_EQ(image_patch_delta, 0); + CHECK_EQ(oat_header_->GetImagePatchDelta(), 0); + } else { + CHECK_ALIGNED(image_patch_delta, kPageSize); + oat_header_->SetImagePatchDelta(image_patch_delta); + } oat_header_->UpdateChecksumWithHeaderData(); - // Write the header now that the checksum is final. + const size_t file_offset = oat_data_offset_; + + off_t current_offset = out->Seek(0, kSeekCurrent); + if (current_offset == static_cast<off_t>(-1)) { + PLOG(ERROR) << "Failed to get current offset from " << out->GetLocation(); + return false; + } if (out->Seek(file_offset, kSeekSet) == static_cast<off_t>(-1)) { PLOG(ERROR) << "Failed to seek to oat header position in " << out->GetLocation(); return false; } DCHECK_EQ(file_offset, static_cast<size_t>(out->Seek(0, kSeekCurrent))); + + // Flush all other data before writing the header. + if (!out->Flush()) { + PLOG(ERROR) << "Failed to flush before writing oat header to " << out->GetLocation(); + return false; + } + // Write the header. + size_t header_size = oat_header_->GetHeaderSize(); if (!out->WriteFully(oat_header_.get(), header_size)) { PLOG(ERROR) << "Failed to write oat header to " << out->GetLocation(); return false; } - if (out->Seek(oat_end_file_offset, kSeekSet) == static_cast<off_t>(-1)) { - PLOG(ERROR) << "Failed to seek to end after writing oat header to " << out->GetLocation(); + // Flush the header data. + if (!out->Flush()) { + PLOG(ERROR) << "Failed to flush after writing oat header to " << out->GetLocation(); + return false; + } + + if (out->Seek(current_offset, kSeekSet) == static_cast<off_t>(-1)) { + PLOG(ERROR) << "Failed to seek back after writing oat header to " << out->GetLocation(); return false; } - DCHECK_EQ(oat_end_file_offset, out->Seek(0, kSeekCurrent)); + DCHECK_EQ(current_offset, out->Seek(0, kSeekCurrent)); + write_state_ = WriteState::kDone; return true; } -bool OatWriter::WriteTables(OutputStream* out, const size_t file_offset) { - for (size_t i = 0; i != oat_dex_files_.size(); ++i) { - if (!oat_dex_files_[i].Write(this, out, file_offset)) { - PLOG(ERROR) << "Failed to write oat dex information to " << out->GetLocation(); - return false; - } - } - for (size_t i = 0; i != oat_dex_files_.size(); ++i) { - uint32_t expected_offset = file_offset + oat_dex_files_[i].dex_file_offset_; - off_t actual_offset = out->Seek(expected_offset, kSeekSet); - if (static_cast<uint32_t>(actual_offset) != expected_offset) { - const DexFile* dex_file = (*dex_files_)[i]; - PLOG(ERROR) << "Failed to seek to dex file section. Actual: " << actual_offset - << " Expected: " << expected_offset << " File: " << dex_file->GetLocation(); - return false; - } - const DexFile* dex_file = (*dex_files_)[i]; - if (!out->WriteFully(&dex_file->GetHeader(), dex_file->GetHeader().file_size_)) { - PLOG(ERROR) << "Failed to write dex file " << dex_file->GetLocation() - << " to " << out->GetLocation(); - return false; - } - size_dex_file_ += dex_file->GetHeader().file_size_; - } - if (!WriteLookupTables(out, file_offset)) { - return false; - } - for (size_t i = 0; i != oat_classes_.size(); ++i) { - if (!oat_classes_[i].Write(this, out, file_offset)) { - PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation(); - return false; +bool OatWriter::WriteClassOffsets(OutputStream* out) { + for (OatDexFile& oat_dex_file : oat_dex_files_) { + if (oat_dex_file.class_offsets_offset_ != 0u) { + uint32_t expected_offset = oat_data_offset_ + oat_dex_file.class_offsets_offset_; + off_t actual_offset = out->Seek(expected_offset, kSeekSet); + if (static_cast<uint32_t>(actual_offset) != expected_offset) { + PLOG(ERROR) << "Failed to seek to oat class offsets section. Actual: " << actual_offset + << " Expected: " << expected_offset << " File: " << oat_dex_file.GetLocation(); + return false; + } + if (!oat_dex_file.WriteClassOffsets(this, out)) { + return false; + } } } return true; } -bool OatWriter::WriteLookupTables(OutputStream* out, const size_t file_offset) { - for (size_t i = 0; i < oat_dex_files_.size(); ++i) { - const uint32_t lookup_table_offset = oat_dex_files_[i].lookup_table_offset_; - const TypeLookupTable* table = oat_dex_files_[i].lookup_table_; - DCHECK_EQ(lookup_table_offset == 0, table == nullptr); - if (lookup_table_offset == 0) { - continue; - } - const uint32_t expected_offset = file_offset + lookup_table_offset; - off_t actual_offset = out->Seek(expected_offset, kSeekSet); - if (static_cast<uint32_t>(actual_offset) != expected_offset) { - const DexFile* dex_file = (*dex_files_)[i]; - PLOG(ERROR) << "Failed to seek to lookup table section. Actual: " << actual_offset - << " Expected: " << expected_offset << " File: " << dex_file->GetLocation(); +bool OatWriter::WriteClasses(OutputStream* out) { + for (OatClass& oat_class : oat_classes_) { + if (!oat_class.Write(this, out, oat_data_offset_)) { + PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation(); return false; } - if (table != nullptr) { - if (!WriteData(out, table->RawData(), table->RawDataLength())) { - const DexFile* dex_file = (*dex_files_)[i]; - PLOG(ERROR) << "Failed to write lookup table for " << dex_file->GetLocation() - << " to " << out->GetLocation(); - return false; - } - size_oat_lookup_table_ += table->RawDataLength(); - } } return true; } @@ -1585,6 +1773,455 @@ bool OatWriter::GetOatDataOffset(OutputStream* out) { return true; } +bool OatWriter::ReadDexFileHeader(File* file, OatDexFile* oat_dex_file) { + // Read the dex file header and perform minimal verification. + uint8_t raw_header[sizeof(DexFile::Header)]; + if (!file->ReadFully(&raw_header, sizeof(DexFile::Header))) { + PLOG(ERROR) << "Failed to read dex file header. Actual: " + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + if (!ValidateDexFileHeader(raw_header, oat_dex_file->GetLocation())) { + return false; + } + + const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header); + oat_dex_file->dex_file_size_ = header->file_size_; + oat_dex_file->dex_file_location_checksum_ = header->checksum_; + oat_dex_file->class_offsets_.resize(header->class_defs_size_); + return true; +} + +bool OatWriter::ValidateDexFileHeader(const uint8_t* raw_header, const char* location) { + if (!DexFile::IsMagicValid(raw_header)) { + LOG(ERROR) << "Invalid magic number in dex file header. " << " File: " << location; + return false; + } + if (!DexFile::IsVersionValid(raw_header)) { + LOG(ERROR) << "Invalid version number in dex file header. " << " File: " << location; + return false; + } + const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_header); + if (header->file_size_ < sizeof(DexFile::Header)) { + LOG(ERROR) << "Dex file header specifies file size insufficient to contain the header." + << " File: " << location; + return false; + } + return true; +} + +bool OatWriter::WriteDexFiles(OutputStream* rodata, File* file) { + TimingLogger::ScopedTiming split("WriteDexFiles", timings_); + + // Get the elf file offset of the oat file. + if (!GetOatDataOffset(rodata)) { + return false; + } + + // Write dex files. + for (OatDexFile& oat_dex_file : oat_dex_files_) { + if (!WriteDexFile(rodata, file, &oat_dex_file)) { + return false; + } + } + + // Close sources. + for (OatDexFile& oat_dex_file : oat_dex_files_) { + oat_dex_file.source_.Clear(); // Get rid of the reference, it's about to be invalidated. + } + zipped_dex_files_.clear(); + zip_archives_.clear(); + raw_dex_files_.clear(); + return true; +} + +bool OatWriter::WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file) { + if (!SeekToDexFile(rodata, file, oat_dex_file)) { + return false; + } + if (oat_dex_file->source_.IsZipEntry()) { + if (!WriteDexFile(rodata, file, oat_dex_file, oat_dex_file->source_.GetZipEntry())) { + return false; + } + } else if (oat_dex_file->source_.IsRawFile()) { + if (!WriteDexFile(rodata, file, oat_dex_file, oat_dex_file->source_.GetRawFile())) { + return false; + } + } else { + DCHECK(oat_dex_file->source_.IsRawData()); + if (!WriteDexFile(rodata, oat_dex_file, oat_dex_file->source_.GetRawData())) { + return false; + } + } + + // Update current size and account for the written data. + DCHECK_EQ(size_, oat_dex_file->dex_file_offset_); + size_ += oat_dex_file->dex_file_size_; + size_dex_file_ += oat_dex_file->dex_file_size_; + return true; +} + +bool OatWriter::SeekToDexFile(OutputStream* out, File* file, OatDexFile* oat_dex_file) { + // Dex files are required to be 4 byte aligned. + size_t original_offset = size_; + size_t offset = RoundUp(original_offset, 4); + size_dex_file_alignment_ += offset - original_offset; + + // Seek to the start of the dex file and flush any pending operations in the stream. + // Verify that, after flushing the stream, the file is at the same offset as the stream. + uint32_t start_offset = oat_data_offset_ + offset; + off_t actual_offset = out->Seek(start_offset, kSeekSet); + if (actual_offset != static_cast<off_t>(start_offset)) { + PLOG(ERROR) << "Failed to seek to dex file section. Actual: " << actual_offset + << " Expected: " << start_offset + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + if (!out->Flush()) { + PLOG(ERROR) << "Failed to flush before writing dex file." + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + actual_offset = lseek(file->Fd(), 0, SEEK_CUR); + if (actual_offset != static_cast<off_t>(start_offset)) { + PLOG(ERROR) << "Stream/file position mismatch! Actual: " << actual_offset + << " Expected: " << start_offset + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + + size_ = offset; + oat_dex_file->dex_file_offset_ = offset; + return true; +} + +bool OatWriter::WriteDexFile(OutputStream* rodata, + File* file, + OatDexFile* oat_dex_file, + ZipEntry* dex_file) { + size_t start_offset = oat_data_offset_ + size_; + DCHECK_EQ(static_cast<off_t>(start_offset), rodata->Seek(0, kSeekCurrent)); + + // Extract the dex file and get the extracted size. + std::string error_msg; + if (!dex_file->ExtractToFile(*file, &error_msg)) { + LOG(ERROR) << "Failed to extract dex file from ZIP entry: " << error_msg + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + if (file->Flush() != 0) { + PLOG(ERROR) << "Failed to flush dex file from ZIP entry." + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + off_t extracted_end = lseek(file->Fd(), 0, SEEK_CUR); + if (extracted_end == static_cast<off_t>(-1)) { + PLOG(ERROR) << "Failed get end offset after writing dex file from ZIP entry." + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + if (extracted_end < static_cast<off_t>(start_offset)) { + LOG(ERROR) << "Dex file end position is before start position! End: " << extracted_end + << " Start: " << start_offset + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + uint64_t extracted_size = static_cast<uint64_t>(extracted_end - start_offset); + if (extracted_size < sizeof(DexFile::Header)) { + LOG(ERROR) << "Extracted dex file is shorter than dex file header. size: " + << extracted_size << " File: " << oat_dex_file->GetLocation(); + return false; + } + + // Read the dex file header and extract required data to OatDexFile. + off_t actual_offset = lseek(file->Fd(), start_offset, SEEK_SET); + if (actual_offset != static_cast<off_t>(start_offset)) { + PLOG(ERROR) << "Failed to seek back to dex file header. Actual: " << actual_offset + << " Expected: " << start_offset + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + if (!ReadDexFileHeader(file, oat_dex_file)) { + return false; + } + if (extracted_size < oat_dex_file->dex_file_size_) { + LOG(ERROR) << "Extracted truncated dex file. Extracted size: " << extracted_size + << " file size from header: " << oat_dex_file->dex_file_size_ + << " File: " << oat_dex_file->GetLocation(); + return false; + } + + // Override the checksum from header with the CRC from ZIP entry. + oat_dex_file->dex_file_location_checksum_ = dex_file->GetCrc32(); + + // Seek both file and stream to the end offset. + size_t end_offset = start_offset + oat_dex_file->dex_file_size_; + actual_offset = lseek(file->Fd(), end_offset, SEEK_SET); + if (actual_offset != static_cast<off_t>(end_offset)) { + PLOG(ERROR) << "Failed to seek to end of dex file. Actual: " << actual_offset + << " Expected: " << end_offset + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + actual_offset = rodata->Seek(end_offset, kSeekSet); + if (actual_offset != static_cast<off_t>(end_offset)) { + PLOG(ERROR) << "Failed to seek stream to end of dex file. Actual: " << actual_offset + << " Expected: " << end_offset << " File: " << oat_dex_file->GetLocation(); + return false; + } + if (!rodata->Flush()) { + PLOG(ERROR) << "Failed to flush stream after seeking over dex file." + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + + // If we extracted more than the size specified in the header, truncate the file. + if (extracted_size > oat_dex_file->dex_file_size_) { + if (file->SetLength(end_offset) != 0) { + PLOG(ERROR) << "Failed to truncate excessive dex file length." + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + } + + return true; +} + +bool OatWriter::WriteDexFile(OutputStream* rodata, + File* file, + OatDexFile* oat_dex_file, + File* dex_file) { + size_t start_offset = oat_data_offset_ + size_; + DCHECK_EQ(static_cast<off_t>(start_offset), rodata->Seek(0, kSeekCurrent)); + + off_t input_offset = lseek(dex_file->Fd(), 0, SEEK_SET); + if (input_offset != static_cast<off_t>(0)) { + PLOG(ERROR) << "Failed to seek to dex file header. Actual: " << input_offset + << " Expected: 0" + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + if (!ReadDexFileHeader(dex_file, oat_dex_file)) { + return false; + } + + // Copy the input dex file using sendfile(). + if (!file->Copy(dex_file, 0, oat_dex_file->dex_file_size_)) { + PLOG(ERROR) << "Failed to copy dex file to oat file." + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + if (file->Flush() != 0) { + PLOG(ERROR) << "Failed to flush dex file." + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + + // Check file position and seek the stream to the end offset. + size_t end_offset = start_offset + oat_dex_file->dex_file_size_; + off_t actual_offset = lseek(file->Fd(), 0, SEEK_CUR); + if (actual_offset != static_cast<off_t>(end_offset)) { + PLOG(ERROR) << "Unexpected file position after copying dex file. Actual: " << actual_offset + << " Expected: " << end_offset + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + actual_offset = rodata->Seek(end_offset, kSeekSet); + if (actual_offset != static_cast<off_t>(end_offset)) { + PLOG(ERROR) << "Failed to seek stream to end of dex file. Actual: " << actual_offset + << " Expected: " << end_offset << " File: " << oat_dex_file->GetLocation(); + return false; + } + if (!rodata->Flush()) { + PLOG(ERROR) << "Failed to flush stream after seeking over dex file." + << " File: " << oat_dex_file->GetLocation() << " Output: " << file->GetPath(); + return false; + } + + return true; +} + +bool OatWriter::WriteDexFile(OutputStream* rodata, + OatDexFile* oat_dex_file, + const uint8_t* dex_file) { + // Note: The raw data has already been checked to contain the header + // and all the data that the header specifies as the file size. + DCHECK(dex_file != nullptr); + DCHECK(ValidateDexFileHeader(dex_file, oat_dex_file->GetLocation())); + const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(dex_file); + + if (!rodata->WriteFully(dex_file, header->file_size_)) { + PLOG(ERROR) << "Failed to write dex file " << oat_dex_file->GetLocation() + << " to " << rodata->GetLocation(); + return false; + } + if (!rodata->Flush()) { + PLOG(ERROR) << "Failed to flush stream after writing dex file." + << " File: " << oat_dex_file->GetLocation(); + return false; + } + + // Update dex file size and resize class offsets in the OatDexFile. + // Note: For raw data, the checksum is passed directly to AddRawDexFileSource(). + oat_dex_file->dex_file_size_ = header->file_size_; + oat_dex_file->class_offsets_.resize(header->class_defs_size_); + return true; +} + +bool OatWriter::WriteOatDexFiles(OutputStream* rodata) { + TimingLogger::ScopedTiming split("WriteOatDexFiles", timings_); + + // Seek to the start of OatDexFiles, i.e. to the end of the OatHeader. If there are + // no OatDexFiles, no data is actually written to .rodata before WriteHeader() and + // this Seek() ensures that we reserve the space for OatHeader in .rodata. + DCHECK(oat_dex_files_.empty() || oat_dex_files_[0u].offset_ == oat_header_->GetHeaderSize()); + uint32_t expected_offset = oat_data_offset_ + oat_header_->GetHeaderSize(); + off_t actual_offset = rodata->Seek(expected_offset, kSeekSet); + if (static_cast<uint32_t>(actual_offset) != expected_offset) { + PLOG(ERROR) << "Failed to seek to OatDexFile table section. Actual: " << actual_offset + << " Expected: " << expected_offset << " File: " << rodata->GetLocation(); + return false; + } + + for (size_t i = 0, size = oat_dex_files_.size(); i != size; ++i) { + OatDexFile* oat_dex_file = &oat_dex_files_[i]; + + DCHECK_EQ(oat_data_offset_ + oat_dex_file->offset_, + static_cast<size_t>(rodata->Seek(0, kSeekCurrent))); + + // Write OatDexFile. + if (!oat_dex_file->Write(this, rodata)) { + PLOG(ERROR) << "Failed to write oat dex information to " << rodata->GetLocation(); + return false; + } + } + + return true; +} + +bool OatWriter::ExtendForTypeLookupTables(OutputStream* rodata, File* file, size_t offset) { + TimingLogger::ScopedTiming split("ExtendForTypeLookupTables", timings_); + + int64_t new_length = oat_data_offset_ + dchecked_integral_cast<int64_t>(offset); + if (file->SetLength(new_length) != 0) { + PLOG(ERROR) << "Failed to extend file for type lookup tables. new_length: " << new_length + << "File: " << file->GetPath(); + return false; + } + off_t actual_offset = rodata->Seek(new_length, kSeekSet); + if (actual_offset != static_cast<off_t>(new_length)) { + PLOG(ERROR) << "Failed to seek stream after extending file for type lookup tables." + << " Actual: " << actual_offset << " Expected: " << new_length + << " File: " << rodata->GetLocation(); + return false; + } + if (!rodata->Flush()) { + PLOG(ERROR) << "Failed to flush stream after extending for type lookup tables." + << " File: " << rodata->GetLocation(); + return false; + } + return true; +} + +bool OatWriter::OpenDexFiles( + File* file, + /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map, + /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) { + TimingLogger::ScopedTiming split("OpenDexFiles", timings_); + + if (oat_dex_files_.empty()) { + // Nothing to do. + return true; + } + + size_t map_offset = oat_dex_files_[0].dex_file_offset_; + size_t length = size_ - map_offset; + std::string error_msg; + std::unique_ptr<MemMap> dex_files_map(MemMap::MapFile(length, + PROT_READ | PROT_WRITE, + MAP_SHARED, + file->Fd(), + oat_data_offset_ + map_offset, + /* low_4gb */ false, + file->GetPath().c_str(), + &error_msg)); + if (dex_files_map == nullptr) { + LOG(ERROR) << "Failed to mmap() dex files from oat file. File: " << file->GetPath() + << " error: " << error_msg; + return false; + } + std::vector<std::unique_ptr<const DexFile>> dex_files; + for (OatDexFile& oat_dex_file : oat_dex_files_) { + // Make sure no one messed with input files while we were copying data. + // At the very least we need consistent file size and number of class definitions. + const uint8_t* raw_dex_file = + dex_files_map->Begin() + oat_dex_file.dex_file_offset_ - map_offset; + if (!ValidateDexFileHeader(raw_dex_file, oat_dex_file.GetLocation())) { + // Note: ValidateDexFileHeader() already logged an error message. + LOG(ERROR) << "Failed to verify written dex file header!" + << " Output: " << file->GetPath() << " ~ " << std::hex << map_offset + << " ~ " << static_cast<const void*>(raw_dex_file); + return false; + } + const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_dex_file); + if (header->file_size_ != oat_dex_file.dex_file_size_) { + LOG(ERROR) << "File size mismatch in written dex file header! Expected: " + << oat_dex_file.dex_file_size_ << " Actual: " << header->file_size_ + << " Output: " << file->GetPath(); + return false; + } + if (header->class_defs_size_ != oat_dex_file.class_offsets_.size()) { + LOG(ERROR) << "Class defs size mismatch in written dex file header! Expected: " + << oat_dex_file.class_offsets_.size() << " Actual: " << header->class_defs_size_ + << " Output: " << file->GetPath(); + return false; + } + + // Now, open the dex file. + dex_files.emplace_back(DexFile::Open(raw_dex_file, + oat_dex_file.dex_file_size_, + oat_dex_file.GetLocation(), + oat_dex_file.dex_file_location_checksum_, + /* oat_dex_file */ nullptr, + &error_msg)); + if (dex_files.back() == nullptr) { + LOG(ERROR) << "Failed to open dex file from oat file. File:" << oat_dex_file.GetLocation(); + return false; + } + } + + *opened_dex_files_map = std::move(dex_files_map); + *opened_dex_files = std::move(dex_files); + return true; +} + +bool OatWriter::WriteTypeLookupTables( + MemMap* opened_dex_files_map, + const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files) { + TimingLogger::ScopedTiming split("WriteTypeLookupTables", timings_); + + DCHECK_EQ(opened_dex_files.size(), oat_dex_files_.size()); + for (size_t i = 0, size = opened_dex_files.size(); i != size; ++i) { + OatDexFile* oat_dex_file = &oat_dex_files_[i]; + if (oat_dex_file->lookup_table_offset_ != 0u) { + DCHECK(oat_dex_file->create_type_lookup_table_ == CreateTypeLookupTable::kCreate); + DCHECK_NE(oat_dex_file->class_offsets_.size(), 0u); + size_t map_offset = oat_dex_files_[0].dex_file_offset_; + size_t lookup_table_offset = oat_dex_file->lookup_table_offset_; + uint8_t* lookup_table = opened_dex_files_map->Begin() + (lookup_table_offset - map_offset); + opened_dex_files[i]->CreateTypeLookupTable(lookup_table); + } + } + + DCHECK_EQ(opened_dex_files_map == nullptr, opened_dex_files.empty()); + if (opened_dex_files_map != nullptr && !opened_dex_files_map->Sync()) { + PLOG(ERROR) << "Failed to Sync() type lookup tables. Map: " << opened_dex_files_map->GetName(); + return false; + } + + return true; +} + bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) { static const uint8_t kPadding[] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u @@ -1611,15 +2248,20 @@ std::pair<bool, uint32_t> OatWriter::MethodOffsetMap::FindMethodOffset(MethodRef } } -OatWriter::OatDexFile::OatDexFile(size_t offset, const DexFile& dex_file) { - offset_ = offset; - const std::string& location(dex_file.GetLocation()); - dex_file_location_size_ = location.size(); - dex_file_location_data_ = reinterpret_cast<const uint8_t*>(location.data()); - dex_file_location_checksum_ = dex_file.GetLocationChecksum(); - dex_file_offset_ = 0; - lookup_table_offset_ = 0; - class_offsets_.resize(dex_file.NumClassDefs()); +OatWriter::OatDexFile::OatDexFile(const char* dex_file_location, + DexFileSource source, + CreateTypeLookupTable create_type_lookup_table) + : source_(source), + create_type_lookup_table_(create_type_lookup_table), + dex_file_size_(0), + offset_(0), + dex_file_location_size_(strlen(dex_file_location)), + dex_file_location_data_(dex_file_location), + dex_file_location_checksum_(0u), + dex_file_offset_(0u), + class_offsets_offset_(0u), + lookup_table_offset_(0u), + class_offsets_() { } size_t OatWriter::OatDexFile::SizeOf() const { @@ -1627,24 +2269,54 @@ size_t OatWriter::OatDexFile::SizeOf() const { + dex_file_location_size_ + sizeof(dex_file_location_checksum_) + sizeof(dex_file_offset_) - + sizeof(lookup_table_offset_) - + (sizeof(class_offsets_[0]) * class_offsets_.size()); + + sizeof(class_offsets_offset_) + + sizeof(lookup_table_offset_); +} + +void OatWriter::OatDexFile::ReserveTypeLookupTable(OatWriter* oat_writer) { + DCHECK_EQ(lookup_table_offset_, 0u); + if (create_type_lookup_table_ == CreateTypeLookupTable::kCreate && !class_offsets_.empty()) { + size_t table_size = TypeLookupTable::RawDataLength(class_offsets_.size()); + if (table_size != 0u) { + // Type tables are required to be 4 byte aligned. + size_t original_offset = oat_writer->size_; + size_t offset = RoundUp(original_offset, 4); + oat_writer->size_oat_lookup_table_alignment_ += offset - original_offset; + lookup_table_offset_ = offset; + oat_writer->size_ = offset + table_size; + oat_writer->size_oat_lookup_table_ += table_size; + } + } } -bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, - OutputStream* out, - const size_t file_offset) const { +void OatWriter::OatDexFile::ReserveClassOffsets(OatWriter* oat_writer) { + DCHECK_EQ(class_offsets_offset_, 0u); + if (!class_offsets_.empty()) { + // Class offsets are required to be 4 byte aligned. + size_t original_offset = oat_writer->size_; + size_t offset = RoundUp(original_offset, 4); + oat_writer->size_oat_class_offsets_alignment_ += offset - original_offset; + class_offsets_offset_ = offset; + oat_writer->size_ = offset + GetClassOffsetsRawSize(); + } +} + +bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) const { + const size_t file_offset = oat_writer->oat_data_offset_; DCHECK_OFFSET_(); + if (!oat_writer->WriteData(out, &dex_file_location_size_, sizeof(dex_file_location_size_))) { PLOG(ERROR) << "Failed to write dex file location length to " << out->GetLocation(); return false; } oat_writer->size_oat_dex_file_location_size_ += sizeof(dex_file_location_size_); + if (!oat_writer->WriteData(out, dex_file_location_data_, dex_file_location_size_)) { PLOG(ERROR) << "Failed to write dex file location data to " << out->GetLocation(); return false; } oat_writer->size_oat_dex_file_location_data_ += dex_file_location_size_; + if (!oat_writer->WriteData(out, &dex_file_location_checksum_, sizeof(dex_file_location_checksum_))) { @@ -1652,21 +2324,35 @@ bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, return false; } oat_writer->size_oat_dex_file_location_checksum_ += sizeof(dex_file_location_checksum_); + if (!oat_writer->WriteData(out, &dex_file_offset_, sizeof(dex_file_offset_))) { PLOG(ERROR) << "Failed to write dex file offset to " << out->GetLocation(); return false; } oat_writer->size_oat_dex_file_offset_ += sizeof(dex_file_offset_); + + if (!oat_writer->WriteData(out, &class_offsets_offset_, sizeof(class_offsets_offset_))) { + PLOG(ERROR) << "Failed to write class offsets offset to " << out->GetLocation(); + return false; + } + oat_writer->size_oat_dex_file_class_offsets_offset_ += sizeof(class_offsets_offset_); + if (!oat_writer->WriteData(out, &lookup_table_offset_, sizeof(lookup_table_offset_))) { PLOG(ERROR) << "Failed to write lookup table offset to " << out->GetLocation(); return false; } oat_writer->size_oat_dex_file_lookup_table_offset_ += sizeof(lookup_table_offset_); + + return true; +} + +bool OatWriter::OatDexFile::WriteClassOffsets(OatWriter* oat_writer, OutputStream* out) { if (!oat_writer->WriteData(out, class_offsets_.data(), GetClassOffsetsRawSize())) { - PLOG(ERROR) << "Failed to write methods offsets to " << out->GetLocation(); + PLOG(ERROR) << "Failed to write oat class offsets for " << GetLocation() + << " to " << out->GetLocation(); return false; } - oat_writer->size_oat_dex_file_class_offsets_ += GetClassOffsetsRawSize(); + oat_writer->size_oat_class_offsets_ += GetClassOffsetsRawSize(); return true; } diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index 5feb5fc516..d681998774 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -27,7 +27,9 @@ #include "method_reference.h" #include "mirror/class.h" #include "oat.h" +#include "os.h" #include "safe_map.h" +#include "ScopedFd.h" #include "utils/array_ref.h" namespace art { @@ -39,6 +41,7 @@ class ImageWriter; class OutputStream; class TimingLogger; class TypeLookupTable; +class ZipEntry; namespace dwarf { struct MethodDebugInfo; @@ -61,6 +64,11 @@ struct MethodDebugInfo; // ... // TypeLookupTable[D] // +// ClassOffsets[0] one table of OatClass offsets for each class def for each OatDexFile. +// ClassOffsets[1] +// ... +// ClassOffsets[D] +// // OatClass[0] one variable sized OatClass for each of C DexFile::ClassDefs // OatClass[1] contains OatClass entries with class status, offsets to code, etc. // ... @@ -93,15 +101,65 @@ struct MethodDebugInfo; // class OatWriter { public: - OatWriter(const std::vector<const DexFile*>& dex_files, - uint32_t image_file_location_oat_checksum, - uintptr_t image_file_location_oat_begin, - int32_t image_patch_delta, - const CompilerDriver* compiler, - ImageWriter* image_writer, - bool compiling_boot_image, - TimingLogger* timings, - SafeMap<std::string, std::string>* key_value_store); + enum class CreateTypeLookupTable { + kCreate, + kDontCreate, + kDefault = kCreate + }; + + OatWriter(bool compiling_boot_image, TimingLogger* timings); + + // To produce a valid oat file, the user must first add sources with any combination of + // - AddDexFileSource(), + // - AddZippedDexFilesSource(), + // - AddRawDexFileSource(). + // Then the user must call in order + // - WriteAndOpenDexFiles() + // - PrepareLayout(), + // - WriteRodata(), + // - WriteCode(), + // - WriteHeader(). + + // Add dex file source(s) from a file, either a plain dex file or + // a zip file with one or more dex files. + bool AddDexFileSource( + const char* filename, + const char* location, + CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault); + // Add dex file source(s) from a zip file specified by a file handle. + bool AddZippedDexFilesSource( + ScopedFd&& zip_fd, + const char* location, + CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault); + // Add dex file source from raw memory. + bool AddRawDexFileSource( + const ArrayRef<const uint8_t>& data, + const char* location, + uint32_t location_checksum, + CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault); + dchecked_vector<const char*> GetSourceLocations() const; + + // Write raw dex files to the .rodata section and open them from the oat file. + bool WriteAndOpenDexFiles(OutputStream* rodata, + File* file, + InstructionSet instruction_set, + const InstructionSetFeatures* instruction_set_features, + SafeMap<std::string, std::string>* key_value_store, + /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map, + /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files); + // Prepare layout of remaining data. + void PrepareLayout(const CompilerDriver* compiler, + ImageWriter* image_writer, + const std::vector<const DexFile*>& dex_files); + // Write the rest of .rodata section (ClassOffsets[], OatClass[], maps). + bool WriteRodata(OutputStream* out); + // Write the code to the .text section. + bool WriteCode(OutputStream* out); + // Write the oat header. This finalizes the oat file. + bool WriteHeader(OutputStream* out, + uint32_t image_file_location_oat_checksum, + uintptr_t image_file_location_oat_begin, + int32_t image_patch_delta); // Returns whether the oat file has an associated image. bool HasImage() const { @@ -130,9 +188,6 @@ class OatWriter { return ArrayRef<const uintptr_t>(absolute_patch_locations_); } - bool WriteRodata(OutputStream* out); - bool WriteCode(OutputStream* out); - ~OatWriter(); ArrayRef<const dwarf::MethodDebugInfo> GetMethodDebugInfo() const { @@ -144,6 +199,7 @@ class OatWriter { } private: + class DexFileSource; class OatClass; class OatDexFile; @@ -174,29 +230,65 @@ class OatWriter { // with a given DexMethodVisitor. bool VisitDexMethods(DexMethodVisitor* visitor); - size_t InitOatHeader(); + size_t InitOatHeader(InstructionSet instruction_set, + const InstructionSetFeatures* instruction_set_features, + uint32_t num_dex_files, + SafeMap<std::string, std::string>* key_value_store); size_t InitOatDexFiles(size_t offset); - size_t InitLookupTables(size_t offset); - size_t InitDexFiles(size_t offset); size_t InitOatClasses(size_t offset); size_t InitOatMaps(size_t offset); size_t InitOatCode(size_t offset); size_t InitOatCodeDexFiles(size_t offset); - bool WriteTables(OutputStream* out, const size_t file_offset); - bool WriteLookupTables(OutputStream* out, const size_t file_offset); + bool WriteClassOffsets(OutputStream* out); + bool WriteClasses(OutputStream* out); size_t WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset); size_t WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset); size_t WriteCodeDexFiles(OutputStream* out, const size_t file_offset, size_t relative_offset); bool GetOatDataOffset(OutputStream* out); + bool ReadDexFileHeader(File* file, OatDexFile* oat_dex_file); + bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location); + bool WriteDexFiles(OutputStream* rodata, File* file); + bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file); + bool SeekToDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file); + bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file, ZipEntry* dex_file); + bool WriteDexFile(OutputStream* rodata, File* file, OatDexFile* oat_dex_file, File* dex_file); + bool WriteDexFile(OutputStream* rodata, OatDexFile* oat_dex_file, const uint8_t* dex_file); + bool WriteOatDexFiles(OutputStream* rodata); + bool ExtendForTypeLookupTables(OutputStream* rodata, File* file, size_t offset); + bool OpenDexFiles(File* file, + /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map, + /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files); + bool WriteTypeLookupTables(MemMap* opened_dex_files_map, + const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files); bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta); bool WriteData(OutputStream* out, const void* data, size_t size); + enum class WriteState { + kAddingDexFileSources, + kPrepareLayout, + kWriteRoData, + kWriteText, + kWriteHeader, + kDone + }; + + WriteState write_state_; + TimingLogger* timings_; + + std::vector<std::unique_ptr<File>> raw_dex_files_; + std::vector<std::unique_ptr<ZipArchive>> zip_archives_; + std::vector<std::unique_ptr<ZipEntry>> zipped_dex_files_; + + // Using std::list<> which doesn't move elements around on push/emplace_back(). + // We need this because we keep plain pointers to the strings' c_str(). + std::list<std::string> zipped_dex_file_locations_; + dchecked_vector<dwarf::MethodDebugInfo> method_info_; - const CompilerDriver* const compiler_driver_; - ImageWriter* const image_writer_; + const CompilerDriver* compiler_driver_; + ImageWriter* image_writer_; const bool compiling_boot_image_; // note OatFile does not take ownership of the DexFiles @@ -215,13 +307,7 @@ class OatWriter { // Offset of the oat data from the start of the mmapped region of the elf file. size_t oat_data_offset_; - // dependencies on the image. - uint32_t image_file_location_oat_checksum_; - uintptr_t image_file_location_oat_begin_; - int32_t image_patch_delta_; - // data to write - SafeMap<std::string, std::string>* key_value_store_; std::unique_ptr<OatHeader> oat_header_; dchecked_vector<OatDexFile> oat_dex_files_; dchecked_vector<OatClass> oat_classes_; @@ -257,10 +343,12 @@ class OatWriter { uint32_t size_oat_dex_file_location_data_; uint32_t size_oat_dex_file_location_checksum_; uint32_t size_oat_dex_file_offset_; + uint32_t size_oat_dex_file_class_offsets_offset_; uint32_t size_oat_dex_file_lookup_table_offset_; - uint32_t size_oat_dex_file_class_offsets_; uint32_t size_oat_lookup_table_alignment_; uint32_t size_oat_lookup_table_; + uint32_t size_oat_class_offsets_alignment_; + uint32_t size_oat_class_offsets_; uint32_t size_oat_class_type_; uint32_t size_oat_class_status_; uint32_t size_oat_class_method_bitmaps_; @@ -269,7 +357,7 @@ class OatWriter { std::unique_ptr<linker::RelativePatcher> relative_patcher_; // The locations of absolute patches relative to the start of the executable section. - std::vector<uintptr_t> absolute_patch_locations_; + dchecked_vector<uintptr_t> absolute_patch_locations_; // Map method reference to assigned offset. // Wrap the map in a class implementing linker::RelativePatcherTargetProvider. diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 26bf1cbc75..1d604e7135 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -56,7 +56,6 @@ class HGraphBuilder : public ValueObject { return_type_(Primitive::GetType(dex_compilation_unit_->GetShorty()[0])), code_start_(nullptr), latest_result_(nullptr), - can_use_baseline_for_string_init_(true), compilation_stats_(compiler_stats), interpreter_metadata_(interpreter_metadata), dex_cache_(dex_cache) {} @@ -77,7 +76,6 @@ class HGraphBuilder : public ValueObject { return_type_(return_type), code_start_(nullptr), latest_result_(nullptr), - can_use_baseline_for_string_init_(true), compilation_stats_(nullptr), interpreter_metadata_(nullptr), null_dex_cache_(), @@ -85,10 +83,6 @@ class HGraphBuilder : public ValueObject { bool BuildGraph(const DexFile::CodeItem& code); - bool CanUseBaselineForStringInit() const { - return can_use_baseline_for_string_init_; - } - static constexpr const char* kBuilderPassName = "builder"; // The number of entries in a packed switch before we use a jump table or specified @@ -363,11 +357,6 @@ class HGraphBuilder : public ValueObject { // used by move-result instructions. HInstruction* latest_result_; - // We need to know whether we have built a graph that has calls to StringFactory - // and hasn't gone through the verifier. If the following flag is `false`, then - // we cannot compile with baseline. - bool can_use_baseline_for_string_init_; - OptimizingCompilerStats* compilation_stats_; const uint8_t* interpreter_metadata_; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index ea0b9eca9a..a3bbfdbd27 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -142,23 +142,6 @@ size_t CodeGenerator::GetCachePointerOffset(uint32_t index) { return pointer_size * index; } -void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { - Initialize(); - if (!is_leaf) { - MarkNotLeaf(); - } - const bool is_64_bit = Is64BitInstructionSet(GetInstructionSet()); - InitializeCodeGeneration(GetGraph()->GetNumberOfLocalVRegs() - + GetGraph()->GetTemporariesVRegSlots() - + 1 /* filler */, - 0, /* the baseline compiler does not have live registers at slow path */ - 0, /* the baseline compiler does not have live registers at slow path */ - GetGraph()->GetMaximumNumberOfOutVRegs() - + (is_64_bit ? 2 : 1) /* current method */, - GetGraph()->GetBlocks()); - CompileInternal(allocator, /* is_baseline */ true); -} - bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const { DCHECK_EQ((*block_order_)[current_block_index_], current); return GetNextBlockToEmit() == FirstNonEmptyBlock(next); @@ -220,8 +203,12 @@ void CodeGenerator::GenerateSlowPaths() { current_slow_path_ = nullptr; } -void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) { - is_baseline_ = is_baseline; +void CodeGenerator::Compile(CodeAllocator* allocator) { + // The register allocator already called `InitializeCodeGeneration`, + // where the frame size has been computed. + DCHECK(block_order_ != nullptr); + Initialize(); + HGraphVisitor* instruction_visitor = GetInstructionVisitor(); DCHECK_EQ(current_block_index_, 0u); @@ -242,9 +229,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); DisassemblyScope disassembly_scope(current, *this); - if (is_baseline) { - InitLocationsBaseline(current); - } DCHECK(CheckTypeConsistency(current)); current->Accept(instruction_visitor); } @@ -254,7 +238,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) // Emit catch stack maps at the end of the stack map stream as expected by the // runtime exception handler. - if (!is_baseline && graph_->HasTryCatch()) { + if (graph_->HasTryCatch()) { RecordCatchBlockInfo(); } @@ -262,14 +246,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) Finalize(allocator); } -void CodeGenerator::CompileOptimized(CodeAllocator* allocator) { - // The register allocator already called `InitializeCodeGeneration`, - // where the frame size has been computed. - DCHECK(block_order_ != nullptr); - Initialize(); - CompileInternal(allocator, /* is_baseline */ false); -} - void CodeGenerator::Finalize(CodeAllocator* allocator) { size_t code_size = GetAssembler()->CodeSize(); uint8_t* buffer = allocator->Allocate(code_size); @@ -282,29 +258,6 @@ void CodeGenerator::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches A // No linker patches by default. } -size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) { - for (size_t i = 0; i < length; ++i) { - if (!array[i]) { - array[i] = true; - return i; - } - } - LOG(FATAL) << "Could not find a register in baseline register allocator"; - UNREACHABLE(); -} - -size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length) { - for (size_t i = 0; i < length - 1; i += 2) { - if (!array[i] && !array[i + 1]) { - array[i] = true; - array[i + 1] = true; - return i; - } - } - LOG(FATAL) << "Could not find a register in baseline register allocator"; - UNREACHABLE(); -} - void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, size_t maximum_number_of_live_core_registers, size_t maximum_number_of_live_fpu_registers, @@ -592,123 +545,6 @@ void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const { } } -void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { - LocationSummary* locations = instruction->GetLocations(); - if (locations == nullptr) return; - - for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) { - blocked_core_registers_[i] = false; - } - - for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) { - blocked_fpu_registers_[i] = false; - } - - for (size_t i = 0, e = number_of_register_pairs_; i < e; ++i) { - blocked_register_pairs_[i] = false; - } - - // Mark all fixed input, temp and output registers as used. - for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { - BlockIfInRegister(locations->InAt(i)); - } - - for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) { - Location loc = locations->GetTemp(i); - BlockIfInRegister(loc); - } - Location result_location = locations->Out(); - if (locations->OutputCanOverlapWithInputs()) { - BlockIfInRegister(result_location, /* is_out */ true); - } - - SetupBlockedRegisters(/* is_baseline */ true); - - // Allocate all unallocated input locations. - for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { - Location loc = locations->InAt(i); - HInstruction* input = instruction->InputAt(i); - if (loc.IsUnallocated()) { - if ((loc.GetPolicy() == Location::kRequiresRegister) - || (loc.GetPolicy() == Location::kRequiresFpuRegister)) { - loc = AllocateFreeRegister(input->GetType()); - } else { - DCHECK_EQ(loc.GetPolicy(), Location::kAny); - HLoadLocal* load = input->AsLoadLocal(); - if (load != nullptr) { - loc = GetStackLocation(load); - } else { - loc = AllocateFreeRegister(input->GetType()); - } - } - locations->SetInAt(i, loc); - } - } - - // Allocate all unallocated temp locations. - for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) { - Location loc = locations->GetTemp(i); - if (loc.IsUnallocated()) { - switch (loc.GetPolicy()) { - case Location::kRequiresRegister: - // Allocate a core register (large enough to fit a 32-bit integer). - loc = AllocateFreeRegister(Primitive::kPrimInt); - break; - - case Location::kRequiresFpuRegister: - // Allocate a core register (large enough to fit a 64-bit double). - loc = AllocateFreeRegister(Primitive::kPrimDouble); - break; - - default: - LOG(FATAL) << "Unexpected policy for temporary location " - << loc.GetPolicy(); - } - locations->SetTempAt(i, loc); - } - } - if (result_location.IsUnallocated()) { - switch (result_location.GetPolicy()) { - case Location::kAny: - case Location::kRequiresRegister: - case Location::kRequiresFpuRegister: - result_location = AllocateFreeRegister(instruction->GetType()); - break; - case Location::kSameAsFirstInput: - result_location = locations->InAt(0); - break; - } - locations->UpdateOut(result_location); - } -} - -void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) { - AllocateLocations(instruction); - if (instruction->GetLocations() == nullptr) { - if (instruction->IsTemporary()) { - HInstruction* previous = instruction->GetPrevious(); - Location temp_location = GetTemporaryLocation(instruction->AsTemporary()); - Move(previous, temp_location, instruction); - } - return; - } - AllocateRegistersLocally(instruction); - for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) { - Location location = instruction->GetLocations()->InAt(i); - HInstruction* input = instruction->InputAt(i); - if (location.IsValid()) { - // Move the input to the desired location. - if (input->GetNext()->IsTemporary()) { - // If the input was stored in a temporary, use that temporary to - // perform the move. - Move(input->GetNext(), location, instruction); - } else { - Move(input, location, instruction); - } - } - } -} - void CodeGenerator::AllocateLocations(HInstruction* instruction) { instruction->Accept(GetLocationBuilder()); DCHECK(CheckTypeConsistency(instruction)); @@ -789,132 +625,6 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph, } } -void CodeGenerator::BuildNativeGCMap( - ArenaVector<uint8_t>* data, const CompilerDriver& compiler_driver) const { - const std::vector<uint8_t>& gc_map_raw = - compiler_driver.GetVerifiedMethod(&GetGraph()->GetDexFile(), GetGraph()->GetMethodIdx()) - ->GetDexGcMap(); - verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]); - - uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset(); - - size_t num_stack_maps = stack_map_stream_.GetNumberOfStackMaps(); - GcMapBuilder builder(data, num_stack_maps, max_native_offset, dex_gc_map.RegWidth()); - for (size_t i = 0; i != num_stack_maps; ++i) { - const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); - uint32_t native_offset = stack_map_entry.native_pc_offset; - uint32_t dex_pc = stack_map_entry.dex_pc; - const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false); - CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc; - builder.AddEntry(native_offset, references); - } -} - -void CodeGenerator::BuildMappingTable(ArenaVector<uint8_t>* data) const { - uint32_t pc2dex_data_size = 0u; - uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps(); - uint32_t pc2dex_offset = 0u; - int32_t pc2dex_dalvik_offset = 0; - uint32_t dex2pc_data_size = 0u; - uint32_t dex2pc_entries = 0u; - uint32_t dex2pc_offset = 0u; - int32_t dex2pc_dalvik_offset = 0; - - for (size_t i = 0; i < pc2dex_entries; i++) { - const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); - pc2dex_data_size += UnsignedLeb128Size(stack_map_entry.native_pc_offset - pc2dex_offset); - pc2dex_data_size += SignedLeb128Size(stack_map_entry.dex_pc - pc2dex_dalvik_offset); - pc2dex_offset = stack_map_entry.native_pc_offset; - pc2dex_dalvik_offset = stack_map_entry.dex_pc; - } - - // Walk over the blocks and find which ones correspond to catch block entries. - for (HBasicBlock* block : graph_->GetBlocks()) { - if (block->IsCatchBlock()) { - intptr_t native_pc = GetAddressOf(block); - ++dex2pc_entries; - dex2pc_data_size += UnsignedLeb128Size(native_pc - dex2pc_offset); - dex2pc_data_size += SignedLeb128Size(block->GetDexPc() - dex2pc_dalvik_offset); - dex2pc_offset = native_pc; - dex2pc_dalvik_offset = block->GetDexPc(); - } - } - - uint32_t total_entries = pc2dex_entries + dex2pc_entries; - uint32_t hdr_data_size = UnsignedLeb128Size(total_entries) + UnsignedLeb128Size(pc2dex_entries); - uint32_t data_size = hdr_data_size + pc2dex_data_size + dex2pc_data_size; - data->resize(data_size); - - uint8_t* data_ptr = &(*data)[0]; - uint8_t* write_pos = data_ptr; - - write_pos = EncodeUnsignedLeb128(write_pos, total_entries); - write_pos = EncodeUnsignedLeb128(write_pos, pc2dex_entries); - DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size); - uint8_t* write_pos2 = write_pos + pc2dex_data_size; - - pc2dex_offset = 0u; - pc2dex_dalvik_offset = 0u; - dex2pc_offset = 0u; - dex2pc_dalvik_offset = 0u; - - for (size_t i = 0; i < pc2dex_entries; i++) { - const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); - DCHECK(pc2dex_offset <= stack_map_entry.native_pc_offset); - write_pos = EncodeUnsignedLeb128(write_pos, stack_map_entry.native_pc_offset - pc2dex_offset); - write_pos = EncodeSignedLeb128(write_pos, stack_map_entry.dex_pc - pc2dex_dalvik_offset); - pc2dex_offset = stack_map_entry.native_pc_offset; - pc2dex_dalvik_offset = stack_map_entry.dex_pc; - } - - for (HBasicBlock* block : graph_->GetBlocks()) { - if (block->IsCatchBlock()) { - intptr_t native_pc = GetAddressOf(block); - write_pos2 = EncodeUnsignedLeb128(write_pos2, native_pc - dex2pc_offset); - write_pos2 = EncodeSignedLeb128(write_pos2, block->GetDexPc() - dex2pc_dalvik_offset); - dex2pc_offset = native_pc; - dex2pc_dalvik_offset = block->GetDexPc(); - } - } - - - DCHECK_EQ(static_cast<size_t>(write_pos - data_ptr), hdr_data_size + pc2dex_data_size); - DCHECK_EQ(static_cast<size_t>(write_pos2 - data_ptr), data_size); - - if (kIsDebugBuild) { - // Verify the encoded table holds the expected data. - MappingTable table(data_ptr); - CHECK_EQ(table.TotalSize(), total_entries); - CHECK_EQ(table.PcToDexSize(), pc2dex_entries); - auto it = table.PcToDexBegin(); - auto it2 = table.DexToPcBegin(); - for (size_t i = 0; i < pc2dex_entries; i++) { - const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); - CHECK_EQ(stack_map_entry.native_pc_offset, it.NativePcOffset()); - CHECK_EQ(stack_map_entry.dex_pc, it.DexPc()); - ++it; - } - for (HBasicBlock* block : graph_->GetBlocks()) { - if (block->IsCatchBlock()) { - CHECK_EQ(GetAddressOf(block), it2.NativePcOffset()); - CHECK_EQ(block->GetDexPc(), it2.DexPc()); - ++it2; - } - } - CHECK(it == table.PcToDexEnd()); - CHECK(it2 == table.DexToPcEnd()); - } -} - -void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const { - Leb128Encoder<ArenaVector<uint8_t>> vmap_encoder(data); - // We currently don't use callee-saved registers. - size_t size = 0 + 1 /* marker */ + 0; - vmap_encoder.Reserve(size + 1u); // All values are likely to be one byte in ULEB128 (<128). - vmap_encoder.PushBackUnsigned(size); - vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker); -} - size_t CodeGenerator::ComputeStackMapsSize() { return stack_map_stream_.PrepareForFillIn(); } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 5958cd89bc..4f8f146753 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -158,10 +158,8 @@ class FieldAccessCallingConvention { class CodeGenerator { public: - // Compiles the graph to executable instructions. Returns whether the compilation - // succeeded. - void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false); - void CompileOptimized(CodeAllocator* allocator); + // Compiles the graph to executable instructions. + void Compile(CodeAllocator* allocator); static CodeGenerator* Create(HGraph* graph, InstructionSet instruction_set, const InstructionSetFeatures& isa_features, @@ -214,7 +212,7 @@ class CodeGenerator { size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } - virtual void SetupBlockedRegisters(bool is_baseline) const = 0; + virtual void SetupBlockedRegisters() const = 0; virtual void ComputeSpillMask() { core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; @@ -290,17 +288,9 @@ class CodeGenerator { slow_paths_.push_back(slow_path); } - void BuildMappingTable(ArenaVector<uint8_t>* vector) const; - void BuildVMapTable(ArenaVector<uint8_t>* vector) const; - void BuildNativeGCMap( - ArenaVector<uint8_t>* vector, const CompilerDriver& compiler_driver) const; void BuildStackMaps(MemoryRegion region); size_t ComputeStackMapsSize(); - bool IsBaseline() const { - return is_baseline_; - } - bool IsLeafMethod() const { return is_leaf_; } @@ -489,7 +479,6 @@ class CodeGenerator { fpu_callee_save_mask_(fpu_callee_save_mask), stack_map_stream_(graph->GetArena()), block_order_(nullptr), - is_baseline_(false), disasm_info_(nullptr), stats_(stats), graph_(graph), @@ -502,15 +491,6 @@ class CodeGenerator { slow_paths_.reserve(8); } - // Register allocation logic. - void AllocateRegistersLocally(HInstruction* instruction) const; - - // Backend specific implementation for allocating a register. - virtual Location AllocateFreeRegister(Primitive::Type type) const = 0; - - static size_t FindFreeEntry(bool* array, size_t length); - static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length); - virtual Location GetStackLocation(HLoadLocal* load) const = 0; virtual HGraphVisitor* GetLocationBuilder() = 0; @@ -593,16 +573,11 @@ class CodeGenerator { // The order to use for code generation. const ArenaVector<HBasicBlock*>* block_order_; - // Whether we are using baseline. - bool is_baseline_; - DisassemblyInformation* disasm_info_; private: - void InitLocationsBaseline(HInstruction* instruction); size_t GetStackOffsetOfSavedRegister(size_t index); void GenerateSlowPaths(); - void CompileInternal(CodeAllocator* allocator, bool is_baseline); void BlockIfInRegister(Location location, bool is_out = false) const; void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index a11ceb9bd9..272579219f 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -47,9 +47,7 @@ static bool ExpectedPairLayout(Location location) { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = R0; -// We unconditionally allocate R5 to ensure we can do long operations -// with baseline. -static constexpr Register kCoreSavedRegisterForBaseline = R5; +static constexpr Register kCoreAlwaysSpillRegister = R5; static constexpr Register kCoreCalleeSaves[] = { R5, R6, R7, R8, R10, R11, LR }; static constexpr SRegister kFpuCalleeSaves[] = @@ -728,6 +726,24 @@ inline Condition ARMUnsignedCondition(IfCondition cond) { UNREACHABLE(); } +inline Condition ARMFPCondition(IfCondition cond, bool gt_bias) { + // The ARM condition codes can express all the necessary branches, see the + // "Meaning (floating-point)" column in the table A8-1 of the ARMv7 reference manual. + // There is no dex instruction or HIR that would need the missing conditions + // "equal or unordered" or "not equal". + switch (cond) { + case kCondEQ: return EQ; + case kCondNE: return NE /* unordered */; + case kCondLT: return gt_bias ? CC : LT /* unordered */; + case kCondLE: return gt_bias ? LS : LE /* unordered */; + case kCondGT: return gt_bias ? HI /* unordered */ : GT; + case kCondGE: return gt_bias ? CS /* unordered */ : GE; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const { stream << Register(reg); } @@ -815,58 +831,7 @@ void CodeGeneratorARM::Finalize(CodeAllocator* allocator) { CodeGenerator::Finalize(allocator); } -Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const { - switch (type) { - case Primitive::kPrimLong: { - size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs); - ArmManagedRegister pair = - ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg)); - DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]); - DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]); - - blocked_core_registers_[pair.AsRegisterPairLow()] = true; - blocked_core_registers_[pair.AsRegisterPairHigh()] = true; - UpdateBlockedPairRegisters(); - return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh()); - } - - case Primitive::kPrimByte: - case Primitive::kPrimBoolean: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters); - // Block all register pairs that contain `reg`. - for (int i = 0; i < kNumberOfRegisterPairs; i++) { - ArmManagedRegister current = - ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); - if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) { - blocked_register_pairs_[i] = true; - } - } - return Location::RegisterLocation(reg); - } - - case Primitive::kPrimFloat: { - int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfSRegisters); - return Location::FpuRegisterLocation(reg); - } - - case Primitive::kPrimDouble: { - int reg = FindTwoFreeConsecutiveAlignedEntries(blocked_fpu_registers_, kNumberOfSRegisters); - DCHECK_EQ(reg % 2, 0); - return Location::FpuRegisterPairLocation(reg, reg + 1); - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << type; - } - - return Location::NoLocation(); -} - -void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const { +void CodeGeneratorARM::SetupBlockedRegisters() const { // Don't allocate the dalvik style register pair passing. blocked_register_pairs_[R1_R2] = true; @@ -881,15 +846,7 @@ void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const { // Reserve temp register. blocked_core_registers_[IP] = true; - if (is_baseline) { - for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { - blocked_core_registers_[kCoreCalleeSaves[i]] = true; - } - - blocked_core_registers_[kCoreSavedRegisterForBaseline] = false; - } - - if (is_baseline || GetGraph()->IsDebuggable()) { + if (GetGraph()->IsDebuggable()) { // Stubs do not save callee-save floating point registers. If the graph // is debuggable, we need to deal with these registers differently. For // now, just block them. @@ -919,11 +876,10 @@ InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGene void CodeGeneratorARM::ComputeSpillMask() { core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; - // Save one extra register for baseline. Note that on thumb2, there is no easy - // instruction to restore just the PC, so this actually helps both baseline - // and non-baseline to save and restore at least two registers at entry and exit. - core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline); DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; + // There is no easy instruction to restore just the PC on thumb2. We spill and + // restore another arbitrary register. + core_spill_mask_ |= (1 << kCoreAlwaysSpillRegister); fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; // We use vpush and vpop for saving and restoring floating point registers, which take // a SRegister and the number of registers to save/restore after that SRegister. We @@ -1416,15 +1372,9 @@ void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond, Label* true_label, - Label* false_label) { + Label* false_label ATTRIBUTE_UNUSED) { __ vmstat(); // transfer FP status register to ARM APSR. - // TODO: merge into a single branch (except "equal or unordered" and "not equal") - if (cond->IsFPConditionTrueIfNaN()) { - __ b(true_label, VS); // VS for unordered. - } else if (cond->IsFPConditionFalseIfNaN()) { - __ b(false_label, VS); // VS for unordered. - } - __ b(true_label, ARMCondition(cond->GetCondition())); + __ b(true_label, ARMFPCondition(cond->GetCondition(), cond->IsGtBias())); } void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, @@ -1972,9 +1922,9 @@ void InstructionCodeGeneratorARM::VisitInvokeUnresolved(HInvokeUnresolved* invok } void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(), codegen_->GetAssembler(), @@ -2004,9 +1954,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) } void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; @@ -3803,6 +3753,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { Label less, greater, done; Primitive::Type type = compare->InputAt(0)->GetType(); + Condition less_cond; switch (type) { case Primitive::kPrimLong: { __ cmp(left.AsRegisterPairHigh<Register>(), @@ -3813,6 +3764,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { __ LoadImmediate(out, 0); __ cmp(left.AsRegisterPairLow<Register>(), ShifterOperand(right.AsRegisterPairLow<Register>())); // Unsigned compare. + less_cond = LO; break; } case Primitive::kPrimFloat: @@ -3825,14 +3777,15 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { FromLowSToD(right.AsFpuRegisterPairLow<SRegister>())); } __ vmstat(); // transfer FP status register to ARM APSR. - __ b(compare->IsGtBias() ? &greater : &less, VS); // VS for unordered. + less_cond = ARMFPCondition(kCondLT, compare->IsGtBias()); break; } default: LOG(FATAL) << "Unexpected compare type " << type; + UNREACHABLE(); } __ b(&done, EQ); - __ b(&less, LO); // LO is for both: unsigned compare for longs and 'less than' for floats. + __ b(&less, less_cond); __ Bind(&greater); __ LoadImmediate(out, 1); @@ -5530,7 +5483,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: { // Note that we indeed only call on slow path, but we always go - // into the slow path for the unresolved & interface check + // into the slow path for the unresolved and interface check // cases. // // We cannot directly call the InstanceofNonTrivial runtime @@ -5740,8 +5693,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - // We always go into the type check slow path for the unresolved & - // interface check cases. + // We always go into the type check slow path for the unresolved + // and interface check cases. // // We cannot directly call the CheckCast runtime entry point // without resorting to a type checking slow path here (i.e. by @@ -6027,6 +5980,7 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root); codegen_->AddSlowPath(slow_path); + // IP = Thread::Current()->GetIsGcMarking() __ LoadFromOffset( kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value()); __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel()); @@ -6105,11 +6059,8 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // } // // Note: the original implementation in ReadBarrier::Barrier is - // slightly more complex as: - // - it implements the load-load fence using a data dependency on - // the high-bits of rb_state, which are expected to be all zeroes; - // - it performs additional checks that we do not do here for - // performance reasons. + // slightly more complex as it performs additional checks that we do + // not do here for performance reasons. Register ref_reg = ref.AsRegister<Register>(); Register temp_reg = temp.AsRegister<Register>(); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 26d6d63b31..d45ea973f9 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -340,9 +340,7 @@ class CodeGeneratorARM : public CodeGenerator { return GetLabelOf(block)->Position(); } - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; - - Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; + void SetupBlockedRegisters() const OVERRIDE; Location GetStackLocation(HLoadLocal* load) const OVERRIDE; @@ -444,7 +442,7 @@ class CodeGeneratorARM : public CodeGenerator { // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, - Location out, + Location ref, Register obj, uint32_t offset, Location temp, @@ -452,7 +450,7 @@ class CodeGeneratorARM : public CodeGenerator { // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location out, + Location ref, Register obj, uint32_t data_offset, Location index, diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 6ed2c5ab38..c0e3959933 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -93,6 +93,24 @@ inline Condition ARM64Condition(IfCondition cond) { UNREACHABLE(); } +inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) { + // The ARM64 condition codes can express all the necessary branches, see the + // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual. + // There is no dex instruction or HIR that would need the missing conditions + // "equal or unordered" or "not equal". + switch (cond) { + case kCondEQ: return eq; + case kCondNE: return ne /* unordered */; + case kCondLT: return gt_bias ? cc : lt /* unordered */; + case kCondLE: return gt_bias ? ls : le /* unordered */; + case kCondGT: return gt_bias ? hi /* unordered */ : gt; + case kCondGE: return gt_bias ? cs /* unordered */ : ge; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + Location ARM64ReturnLocation(Primitive::Type return_type) { // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`, @@ -566,6 +584,56 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { } } +// Slow path marking an object during a read barrier. +class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location out, Location obj) + : instruction_(instruction), out_(out), obj_(obj) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast()) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), obj_, type); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>(); + arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + private: + HInstruction* const instruction_; + const Location out_; + const Location obj_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { public: @@ -587,7 +655,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { // to be instrumented, e.g.: // // __ Ldr(out, HeapOperand(out, class_offset); - // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); // // In that case, we have lost the information about the original // object, and the emitted read barrier cannot work properly. @@ -603,7 +671,9 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); DCHECK(!instruction_->IsInvoke() || (instruction_->IsInvokeStaticOrDirect() && - instruction_->GetLocations()->Intrinsified())); + instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for heap reference slow path: " + << instruction_->DebugName(); // The read barrier instrumentation does not support the // HArm64IntermediateAddress instruction yet. DCHECK(!(instruction_->IsArrayGet() && @@ -751,14 +821,18 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) - : instruction_(instruction), out_(out), root_(root) {} + : instruction_(instruction), out_(out), root_(root) { + DCHECK(kEmitCompilerReadBarrier); + } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Primitive::Type type = Primitive::kPrimNot; DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -1094,7 +1168,7 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_ } } -void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const { +void CodeGeneratorARM64::SetupBlockedRegisters() const { // Blocked core registers: // lr : Runtime reserved. // tr : Runtime reserved. @@ -1115,40 +1189,17 @@ void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const { blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true; } - if (is_baseline) { - CPURegList reserved_core_baseline_registers = callee_saved_core_registers; - while (!reserved_core_baseline_registers.IsEmpty()) { - blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true; - } - } - - if (is_baseline || GetGraph()->IsDebuggable()) { + if (GetGraph()->IsDebuggable()) { // Stubs do not save callee-save floating point registers. If the graph // is debuggable, we need to deal with these registers differently. For // now, just block them. - CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers; - while (!reserved_fp_baseline_registers.IsEmpty()) { - blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true; + CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers; + while (!reserved_fp_registers_debuggable.IsEmpty()) { + blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().code()] = true; } } } -Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const { - if (type == Primitive::kPrimVoid) { - LOG(FATAL) << "Unreachable type " << type; - } - - if (Primitive::IsFloatingPointType(type)) { - ssize_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfAllocatableFPRegisters); - DCHECK_NE(reg, -1); - return Location::FpuRegisterLocation(reg); - } else { - ssize_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfAllocatableRegisters); - DCHECK_NE(reg, -1); - return Location::RegisterLocation(reg); - } -} - size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); __ Str(reg, MemOperand(sp, stack_index)); @@ -1343,7 +1394,8 @@ void CodeGeneratorARM64::Load(Primitive::Type type, void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, CPURegister dst, - const MemOperand& src) { + const MemOperand& src, + bool needs_null_check) { MacroAssembler* masm = GetVIXLAssembler(); BlockPoolsScope block_pools(masm); UseScratchRegisterScope temps(masm); @@ -1359,20 +1411,28 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, switch (type) { case Primitive::kPrimBoolean: __ Ldarb(Register(dst), base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } break; case Primitive::kPrimByte: __ Ldarb(Register(dst), base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); break; case Primitive::kPrimChar: __ Ldarh(Register(dst), base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } break; case Primitive::kPrimShort: __ Ldarh(Register(dst), base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); break; case Primitive::kPrimInt: @@ -1380,7 +1440,9 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, case Primitive::kPrimLong: DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type)); __ Ldar(Register(dst), base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } break; case Primitive::kPrimFloat: case Primitive::kPrimDouble: { @@ -1389,7 +1451,9 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); __ Ldar(temp, base); - MaybeRecordImplicitNullCheck(instruction); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } __ Fmov(FPRegister(dst), temp); break; } @@ -1510,7 +1574,7 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod __ Bind(slow_path->GetExitLabel()); } -void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { +void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { BarrierType type = BarrierAll; switch (kind) { @@ -1646,33 +1710,62 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) { void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + LocationSummary* locations = instruction->GetLocations(); + Location base_loc = locations->InAt(0); + Location out = locations->Out(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); Primitive::Type field_type = field_info.GetFieldType(); BlockPoolsScope block_pools(GetVIXLAssembler()); MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset()); bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease(); - if (field_info.IsVolatile()) { - if (use_acquire_release) { - // NB: LoadAcquire will record the pc info if needed. - codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field); + if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Object FieldGet with Baker's read barrier case. + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + // /* HeapReference<Object> */ out = *(base + offset) + Register base = RegisterFrom(base_loc, Primitive::kPrimNot); + Register temp = temps.AcquireW(); + // Note that potential implicit null checks are handled in this + // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, + out, + base, + offset, + temp, + /* needs_null_check */ true, + field_info.IsVolatile() && use_acquire_release); + if (field_info.IsVolatile() && !use_acquire_release) { + // For IRIW sequential consistency kLoadAny is not sufficient. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + } else { + // General case. + if (field_info.IsVolatile()) { + if (use_acquire_release) { + // Note that a potential implicit null check is handled in this + // CodeGeneratorARM64::LoadAcquire call. + // NB: LoadAcquire will record the pc info if needed. + codegen_->LoadAcquire( + instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true); + } else { + codegen_->Load(field_type, OutputCPURegister(instruction), field); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // For IRIW sequential consistency kLoadAny is not sufficient. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } } else { codegen_->Load(field_type, OutputCPURegister(instruction), field); codegen_->MaybeRecordImplicitNullCheck(instruction); - // For IRIW sequential consistency kLoadAny is not sufficient. - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } - } else { - codegen_->Load(field_type, OutputCPURegister(instruction), field); - codegen_->MaybeRecordImplicitNullCheck(instruction); - } - - if (field_type == Primitive::kPrimNot) { - LocationSummary* locations = instruction->GetLocations(); - Location base = locations->InAt(0); - Location out = locations->Out(); - uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset); + if (field_type == Primitive::kPrimNot) { + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); + } } } @@ -1718,10 +1811,10 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); } else { - GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); codegen_->Store(field_type, source, HeapOperand(obj, offset)); codegen_->MaybeRecordImplicitNullCheck(instruction); - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } } else { codegen_->Store(field_type, source, HeapOperand(obj, offset)); @@ -2026,50 +2119,62 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); Location index = locations->InAt(1); uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); - MemOperand source = HeapOperand(obj); - CPURegister dest = OutputCPURegister(instruction); + Location out = locations->Out(); MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(masm); // Block pools between `Load` and `MaybeRecordImplicitNullCheck`. BlockPoolsScope block_pools(masm); - if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); - source = HeapOperand(obj, offset); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Object ArrayGet with Baker's read barrier case. + Register temp = temps.AcquireW(); + // The read barrier instrumentation does not support the + // HArm64IntermediateAddress instruction yet. + DCHECK(!instruction->GetArray()->IsArm64IntermediateAddress()); + // Note that a potential implicit null check is handled in the + // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true); } else { - Register temp = temps.AcquireSameSizeAs(obj); - if (instruction->GetArray()->IsArm64IntermediateAddress()) { - // The read barrier instrumentation does not support the - // HArm64IntermediateAddress instruction yet. - DCHECK(!kEmitCompilerReadBarrier); - // We do not need to compute the intermediate address from the array: the - // input instruction has done it already. See the comment in - // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`. - if (kIsDebugBuild) { - HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress(); - DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset); - } - temp = obj; + // General case. + MemOperand source = HeapOperand(obj); + if (index.IsConstant()) { + offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); + source = HeapOperand(obj, offset); } else { - __ Add(temp, obj, offset); + Register temp = temps.AcquireSameSizeAs(obj); + if (instruction->GetArray()->IsArm64IntermediateAddress()) { + // The read barrier instrumentation does not support the + // HArm64IntermediateAddress instruction yet. + DCHECK(!kEmitCompilerReadBarrier); + // We do not need to compute the intermediate address from the array: the + // input instruction has done it already. See the comment in + // `InstructionSimplifierArm64::TryExtractArrayAccessAddress()`. + if (kIsDebugBuild) { + HArm64IntermediateAddress* tmp = instruction->GetArray()->AsArm64IntermediateAddress(); + DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset); + } + temp = obj; + } else { + __ Add(temp, obj, offset); + } + source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); } - source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); - } - codegen_->Load(type, dest, source); - codegen_->MaybeRecordImplicitNullCheck(instruction); + codegen_->Load(type, OutputCPURegister(instruction), source); + codegen_->MaybeRecordImplicitNullCheck(instruction); - if (type == Primitive::kPrimNot) { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - Location obj_loc = locations->InAt(0); - Location out = locations->Out(); - if (index.IsConstant()) { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); - } else { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index); + if (type == Primitive::kPrimNot) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + Location obj_loc = locations->InAt(0); + if (index.IsConstant()) { + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset); + } else { + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index); + } } } } @@ -2199,12 +2304,12 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { // __ Mov(temp2, temp); // // /* HeapReference<Class> */ temp = temp->component_type_ // __ Ldr(temp, HeapOperand(temp, component_offset)); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp_loc, temp_loc, temp2_loc, component_offset); // // // /* HeapReference<Class> */ temp2 = value->klass_ // __ Ldr(temp2, HeapOperand(Register(value), class_offset)); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc); // // __ Cmp(temp, temp2); @@ -2381,12 +2486,8 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { } else { __ Fcmp(left, InputFPRegisterAt(compare, 1)); } - if (compare->IsGtBias()) { - __ Cset(result, ne); - } else { - __ Csetm(result, ne); - } - __ Cneg(result, result, compare->IsGtBias() ? mi : gt); + __ Cset(result, ne); + __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias())); break; } default: @@ -2422,7 +2523,6 @@ void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) { LocationSummary* locations = instruction->GetLocations(); Register res = RegisterFrom(locations->Out(), instruction->GetType()); IfCondition if_cond = instruction->GetCondition(); - Condition arm64_cond = ARM64Condition(if_cond); if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) { FPRegister lhs = InputFPRegisterAt(instruction, 0); @@ -2433,20 +2533,13 @@ void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) { } else { __ Fcmp(lhs, InputFPRegisterAt(instruction, 1)); } - __ Cset(res, arm64_cond); - if (instruction->IsFPConditionTrueIfNaN()) { - // res = IsUnordered(arm64_cond) ? 1 : res <=> res = IsNotUnordered(arm64_cond) ? res : 1 - __ Csel(res, res, Operand(1), vc); // VC for "not unordered". - } else if (instruction->IsFPConditionFalseIfNaN()) { - // res = IsUnordered(arm64_cond) ? 0 : res <=> res = IsNotUnordered(arm64_cond) ? res : 0 - __ Csel(res, res, Operand(0), vc); // VC for "not unordered". - } + __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias())); } else { // Integer cases. Register lhs = InputRegisterAt(instruction, 0); Operand rhs = InputOperandAt(instruction, 1); __ Cmp(lhs, rhs); - __ Cset(res, arm64_cond); + __ Cset(res, ARM64Condition(if_cond)); } } @@ -2816,15 +2909,11 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct } else { __ Fcmp(lhs, InputFPRegisterAt(condition, 1)); } - if (condition->IsFPConditionTrueIfNaN()) { - __ B(vs, true_target == nullptr ? &fallthrough_target : true_target); - } else if (condition->IsFPConditionFalseIfNaN()) { - __ B(vs, false_target == nullptr ? &fallthrough_target : false_target); - } if (true_target == nullptr) { - __ B(ARM64Condition(condition->GetOppositeCondition()), false_target); + IfCondition opposite_condition = condition->GetOppositeCondition(); + __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target); } else { - __ B(ARM64Condition(condition->GetCondition()), true_target); + __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target); } } else { // Integer cases. @@ -2841,7 +2930,8 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct non_fallthrough_target = true_target; } - if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) { + if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) && + rhs.IsImmediate() && (rhs.immediate() == 0)) { switch (arm64_cond) { case eq: __ Cbz(lhs, non_fallthrough_target); @@ -2942,6 +3032,14 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); } +static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { + return kEmitCompilerReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck); +} + void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); @@ -2968,21 +3066,22 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); // When read barriers are enabled, we need a temporary register for // some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); Register out = OutputRegister(instruction); + Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(0) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -2998,10 +3097,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - __ Ldr(out, HeapOperand(obj.W(), class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: { __ Cmp(out, cls); __ Cset(out, eq); @@ -3016,17 +3114,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { // object to avoid doing a comparison we know will fail. vixl::Label loop, success; __ Bind(&loop); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = WRegisterFrom(temp_loc); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ Ldr(out, HeapOperand(out, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Cmp(out, cls); @@ -3044,17 +3133,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Bind(&loop); __ Cmp(out, cls); __ B(eq, &success); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = WRegisterFrom(temp_loc); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ Ldr(out, HeapOperand(out, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc); __ Cbnz(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ B(&done); @@ -3072,17 +3152,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Cmp(out, cls); __ B(eq, &exact_check); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = WRegisterFrom(temp_loc); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->component_type_ - __ Ldr(out, HeapOperand(out, component_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Ldrh(out, HeapOperand(out, primitive_offset)); @@ -3121,6 +3192,13 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { // HInstanceOf instruction (following the runtime calling // convention), which might be cluttered by the potential first // read barrier emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, /* is_fatal */ false); @@ -3173,30 +3251,29 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { locations->SetInAt(1, Location::RequiresRegister()); // Note that TypeCheckSlowPathARM64 uses this "temp" register too. locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); // When read barriers are enabled, we need an additional temporary // register for some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { - locations->AddTemp(Location::RequiresRegister()); + if (TypeCheckNeedsATemporary(type_check_kind)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); Location temp_loc = locations->GetTemp(0); + Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(1) : + Location::NoLocation(); Register temp = WRegisterFrom(temp_loc); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); bool is_type_check_slow_path_fatal = (type_check_kind == TypeCheckKind::kExactCheck || type_check_kind == TypeCheckKind::kAbstractClassCheck || @@ -3215,8 +3292,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - __ Ldr(temp, HeapOperand(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); switch (type_check_kind) { case TypeCheckKind::kExactCheck: @@ -3233,18 +3309,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // object to avoid doing a comparison we know will fail. vixl::Label loop, compare_classes; __ Bind(&loop); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = WRegisterFrom(temp2_loc); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ Ldr(temp, HeapOperand(temp, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc); // If the class reference currently in `temp` is not null, jump // to the `compare_classes` label to compare it with the checked @@ -3256,8 +3322,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ Ldr(temp, HeapOperand(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ B(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -3273,18 +3339,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ Cmp(temp, cls); __ B(eq, &done); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = WRegisterFrom(temp2_loc); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ Ldr(temp, HeapOperand(temp, super_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc); // If the class reference currently in `temp` is not null, jump // back at the beginning of the loop. @@ -3295,8 +3351,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ Ldr(temp, HeapOperand(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ B(type_check_slow_path->GetEntryLabel()); break; } @@ -3308,19 +3364,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ B(eq, &done); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = WRegisterFrom(temp2_loc); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->component_type_ - __ Ldr(temp, HeapOperand(temp, component_offset)); - codegen_->MaybeGenerateReadBarrier( - instruction, temp_loc, temp_loc, temp2_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc); // If the component type is not null (i.e. the object is indeed // an array), jump to label `check_non_primitive_component_type` @@ -3333,8 +3378,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ Ldr(temp, HeapOperand(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ B(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -3343,8 +3388,8 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ Cbz(temp, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - __ Ldr(temp, HeapOperand(obj, class_offset)); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters( + instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc); __ B(type_check_slow_path->GetEntryLabel()); break; } @@ -3361,6 +3406,13 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { // instruction (following the runtime calling convention), which // might be cluttered by the potential first read barrier // emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. __ B(type_check_slow_path->GetEntryLabel()); break; } @@ -3462,9 +3514,9 @@ void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena()); if (intrinsic.TryDispatch(invoke)) { @@ -3486,7 +3538,7 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method ATTRIBUTE_UNUSED) { - // On arm64 we support all dispatch types. + // On ARM64 we support all dispatch types. return desired_dispatch_info; } @@ -3712,9 +3764,9 @@ vixl::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral( void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; @@ -3763,32 +3815,17 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ Add(out.X(), current_method.X(), declaring_class_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ Ldr(out, MemOperand(current_method, declaring_class_offset)); - } + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize); // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value())); - - size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &out[type_index] - __ Add(out.X(), out.X(), cache_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = out[type_index] - __ Ldr(out, MemOperand(out.X(), cache_offset)); - } + // /* GcRoot<mirror::Class> */ out = out[type_index] + GenerateGcRootFieldLoad( + cls, out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); @@ -3851,30 +3888,14 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { Register out = OutputRegister(load); Register current_method = InputRegisterAt(load, 0); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ Add(out.X(), current_method.X(), declaring_class_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ Ldr(out, MemOperand(current_method, declaring_class_offset)); - } - + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); - - size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::String>* */ out = &out[string_index] - __ Add(out.X(), out.X(), cache_offset); - // /* mirror::String* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::String> */ out = out[string_index] - __ Ldr(out, MemOperand(out.X(), cache_offset)); - } + // /* GcRoot<mirror::String> */ out = out[string_index] + GenerateGcRootFieldLoad( + load, out_loc, out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex())); if (!load->IsInDexCache()) { SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); @@ -4243,7 +4264,7 @@ void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { } void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { - GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); + codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); } void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { @@ -4628,14 +4649,288 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst } } -void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp) { + Primitive::Type type = Primitive::kPrimNot; + Register out_reg = RegisterFrom(out, type); + if (kEmitCompilerReadBarrier) { + Register temp_reg = RegisterFrom(maybe_temp, type); + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + out_reg, + offset, + temp_reg, + /* needs_null_check */ false, + /* use_load_acquire */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `maybe_temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ Mov(temp_reg, out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ Ldr(out_reg, HeapOperand(out_reg, offset)); + codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ Ldr(out_reg, HeapOperand(out_reg, offset)); + GetAssembler()->MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp) { + Primitive::Type type = Primitive::kPrimNot; + Register out_reg = RegisterFrom(out, type); + Register obj_reg = RegisterFrom(obj, type); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + Register temp_reg = RegisterFrom(maybe_temp, type); + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + obj_reg, + offset, + temp_reg, + /* needs_null_check */ false, + /* use_load_acquire */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ Ldr(out_reg, HeapOperand(obj_reg, offset)); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ Ldr(out_reg, HeapOperand(obj_reg, offset)); + GetAssembler()->MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + vixl::Register obj, + uint32_t offset) { + Register root_reg = RegisterFrom(root, Primitive::kPrimNot); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Fast path implementation of art::ReadBarrier::BarrierForRoot when + // Baker's read barrier are used: + // + // root = obj.field; + // if (Thread::Current()->GetIsGcMarking()) { + // root = ReadBarrier::Mark(root) + // } + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ Ldr(root_reg, MemOperand(obj, offset)); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path used to mark the GC root `root`. + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root, root); + codegen_->AddSlowPath(slow_path); + + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + Register temp = temps.AcquireW(); + // temp = Thread::Current()->GetIsGcMarking() + __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64WordSize>().Int32Value())); + __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + __ Add(root_reg.X(), obj.X(), offset); + // /* mirror::Object* */ root = root->Read() + codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + } + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ Ldr(root_reg, MemOperand(obj, offset)); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. + } +} + +void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t offset, + Register temp, + bool needs_null_check, + bool use_load_acquire) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = *(obj + offset) + Location no_index = Location::NoLocation(); + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, offset, no_index, temp, needs_null_check, use_load_acquire); +} + +void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t data_offset, + Location index, + Register temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // Array cells are never volatile variables, therefore array loads + // never use Load-Acquire instructions on ARM64. + const bool use_load_acquire = false; + + // /* HeapReference<Object> */ ref = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, data_offset, index, temp, needs_null_check, use_load_acquire); +} + +void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t offset, + Location index, + Register temp, + bool needs_null_check, + bool use_load_acquire) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + // If `index` is a valid location, then we are emitting an array + // load, so we shouldn't be using a Load Acquire instruction. + // In other words: `index.IsValid()` => `!use_load_acquire`. + DCHECK(!index.IsValid() || !use_load_acquire); + + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. + // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as it performs additional checks that we do + // not do here for performance reasons. + + Primitive::Type type = Primitive::kPrimNot; + Register ref_reg = RegisterFrom(ref, type); + DCHECK(obj.IsW()); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + // /* int32_t */ monitor = obj->monitor_ + __ Ldr(temp, HeapOperand(obj, monitor_offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + // /* uint32_t */ rb_state = lock_word.ReadBarrierState() + __ Lsr(temp, temp, LockWord::kReadBarrierStateShift); + __ And(temp, temp, Operand(LockWord::kReadBarrierStateMask)); + static_assert( + LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, + "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); + + // Introduce a dependency on the high bits of rb_state, which shall + // be all zeroes, to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + // temp2 = rb_state & ~LockWord::kReadBarrierStateMask = 0 + Register temp2 = temps.AcquireW(); + __ Bic(temp2, temp, Operand(LockWord::kReadBarrierStateMask)); + // obj is unchanged by this operation, but its value now depends on + // temp2, which depends on temp. + __ Add(obj, obj, Operand(temp2)); + temps.Release(temp2); + + // The actual reference load. + if (index.IsValid()) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + temp2 = temps.AcquireW(); + // /* HeapReference<Object> */ ref = + // *(obj + offset + index * sizeof(HeapReference<Object>)) + MemOperand source = HeapOperand(obj); + if (index.IsConstant()) { + uint32_t computed_offset = + offset + (Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type)); + source = HeapOperand(obj, computed_offset); + } else { + __ Add(temp2, obj, offset); + source = HeapOperand(temp2, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); + } + Load(type, ref_reg, source); + temps.Release(temp2); + } else { + // /* HeapReference<Object> */ ref = *(obj + offset) + MemOperand field = HeapOperand(obj, offset); + if (use_load_acquire) { + LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); + } else { + Load(type, ref_reg, field); + } + } + + // Object* ref = ref_addr->AsMirrorPtr() + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + + // Slow path used to mark the object `ref` when it is gray. + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref, ref); + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + __ Cmp(temp, ReadBarrier::gray_ptr_); + __ B(eq, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the reference load. + // // If heap poisoning is enabled, the unpoisoning of the loaded // reference will be carried out by the runtime within the slow // path. @@ -4649,57 +4944,41 @@ void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction, ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); - // TODO: When read barrier has a fast path, add it here. - /* Currently the read barrier call is inserted after the original load. - * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the - * original load. This load-load ordering is required by the read barrier. - * The fast path/slow path (for Baker's algorithm) should look like: - * - * bool isGray = obj.LockWord & kReadBarrierMask; - * lfence; // load fence or artificial data dependence to prevent load-load reordering - * ref = obj.field; // this is the original load - * if (isGray) { - * ref = Mark(ref); // ideally the slow path just does Mark(ref) - * } - */ - __ B(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } -void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { if (kEmitCompilerReadBarrier) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); // If heap poisoning is enabled, unpoisoning will be taken care of // by the runtime within the slow path. - GenerateReadBarrier(instruction, out, ref, obj, offset, index); + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); } else if (kPoisonHeapReferences) { GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out)); } } -void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction, - Location out, - Location root) { +void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the GC root load. + // // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root); AddSlowPath(slow_path); - // TODO: Implement a fast path for ReadBarrierForRoot, performing - // the following operation (for Baker's algorithm): - // - // if (thread.tls32_.is_gc_marking) { - // root = Mark(root); - // } - __ B(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index f2ff89488e..a9d1bbde98 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -208,14 +208,53 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg); - void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* instr); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void HandleCondition(HCondition* instruction); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a read barrier and + // shall be a register in that case; it may be an invalid location + // otherwise. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a Baker's (fast + // path) read barrier and shall be a register in that case; it may + // be an invalid location otherwise. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp); + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + vixl::Register obj, + uint32_t offset); + void HandleShift(HBinaryOperation* instr); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); @@ -337,12 +376,11 @@ class CodeGeneratorARM64 : public CodeGenerator { // Emit a write barrier. void MarkGCCard(vixl::Register object, vixl::Register value, bool value_can_be_null); + void GenerateMemoryBarrier(MemBarrierKind kind); + // Register allocation. - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; - // AllocateFreeRegister() is only used when allocating registers locally - // during CompileBaseline(). - Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; + void SetupBlockedRegisters() const OVERRIDE; Location GetStackLocation(HLoadLocal* load) const OVERRIDE; @@ -389,9 +427,12 @@ class CodeGeneratorARM64 : public CodeGenerator { void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); - void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); - void LoadAcquire(HInstruction* instruction, vixl::CPURegister dst, const vixl::MemOperand& src); - void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); + void Store(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst); + void LoadAcquire(HInstruction* instruction, + vixl::CPURegister dst, + const vixl::MemOperand& src, + bool needs_null_check); + void StoreRelease(Primitive::Type type, vixl::CPURegister src, const vixl::MemOperand& dst); // Generate code to invoke a runtime entry point. void InvokeRuntime(QuickEntrypointEnum entrypoint, @@ -426,7 +467,27 @@ class CodeGeneratorARM64 : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; - // Generate a read barrier for a heap reference within `instruction`. + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t offset, + vixl::Register temp, + bool needs_null_check, + bool use_load_acquire); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t data_offset, + Location index, + vixl::Register temp, + bool needs_null_check); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. // // A read barrier for an object reference read from the heap is // implemented as a call to the artReadBarrierSlow runtime entry @@ -443,23 +504,25 @@ class CodeGeneratorARM64 : public CodeGenerator { // When `index` is provided (i.e. for array accesses), the offset // value passed to artReadBarrierSlow is adjusted to take `index` // into account. - void GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // If read barriers are enabled, generate a read barrier for a heap reference. - // If heap poisoning is enabled, also unpoison the reference in `out`. - void MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // Generate a read barrier for a GC root within `instruction`. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. // // A read barrier for an object reference GC root is implemented as // a call to the artReadBarrierForRootSlow runtime entry point, @@ -469,9 +532,20 @@ class CodeGeneratorARM64 : public CodeGenerator { // // The `out` location contains the value returned by // artReadBarrierForRootSlow. - void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); private: + // Factored implementation of GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t offset, + Location index, + vixl::Register temp, + bool needs_null_check, + bool use_load_acquire); + using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>; using MethodToLiteralMap = ArenaSafeMap<MethodReference, vixl::Literal<uint64_t>*, diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index e34767cecd..5bd136a3f0 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1042,7 +1042,7 @@ void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) { __ Bind(&done); } -void CodeGeneratorMIPS::SetupBlockedRegisters(bool is_baseline) const { +void CodeGeneratorMIPS::SetupBlockedRegisters() const { // Don't allocate the dalvik style register pair passing. blocked_register_pairs_[A1_A2] = true; @@ -1072,16 +1072,6 @@ void CodeGeneratorMIPS::SetupBlockedRegisters(bool is_baseline) const { blocked_fpu_registers_[i] = true; } - if (is_baseline) { - for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { - blocked_core_registers_[kCoreCalleeSaves[i]] = true; - } - - for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { - blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; - } - } - UpdateBlockedPairRegisters(); } @@ -1096,52 +1086,6 @@ void CodeGeneratorMIPS::UpdateBlockedPairRegisters() const { } } -Location CodeGeneratorMIPS::AllocateFreeRegister(Primitive::Type type) const { - switch (type) { - case Primitive::kPrimLong: { - size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs); - MipsManagedRegister pair = - MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg)); - DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]); - DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]); - - blocked_core_registers_[pair.AsRegisterPairLow()] = true; - blocked_core_registers_[pair.AsRegisterPairHigh()] = true; - UpdateBlockedPairRegisters(); - return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh()); - } - - case Primitive::kPrimByte: - case Primitive::kPrimBoolean: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - int reg = FindFreeEntry(blocked_core_registers_, kNumberOfCoreRegisters); - // Block all register pairs that contain `reg`. - for (int i = 0; i < kNumberOfRegisterPairs; i++) { - MipsManagedRegister current = - MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); - if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) { - blocked_register_pairs_[i] = true; - } - } - return Location::RegisterLocation(reg); - } - - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - int reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFRegisters); - return Location::FpuRegisterLocation(reg); - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << type; - } - - UNREACHABLE(); -} - size_t CodeGeneratorMIPS::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { __ StoreToOffset(kStoreWord, Register(reg_id), SP, stack_index); return kMipsWordSize; @@ -3835,9 +3779,9 @@ void LocationsBuilderMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderMIPS intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -3973,9 +3917,9 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke } void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index c3d4851ee9..2cde0ed90b 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -290,10 +290,7 @@ class CodeGeneratorMIPS : public CodeGenerator { // Register allocation. - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; - // AllocateFreeRegister() is only used when allocating registers locally - // during CompileBaseline(). - Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; + void SetupBlockedRegisters() const OVERRIDE; Location GetStackLocation(HLoadLocal* load) const OVERRIDE; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 79cd56d698..05054867fe 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -979,7 +979,7 @@ void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) { __ Bind(&done); } -void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const { +void CodeGeneratorMIPS64::SetupBlockedRegisters() const { // ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated. blocked_core_registers_[ZERO] = true; blocked_core_registers_[K0] = true; @@ -1003,8 +1003,7 @@ void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSE // TODO: review; anything else? - // TODO: make these two for's conditional on is_baseline once - // all the issues with register saving/restoring are sorted out. + // TODO: remove once all the issues with register saving/restoring are sorted out. for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { blocked_core_registers_[kCoreCalleeSaves[i]] = true; } @@ -1014,20 +1013,6 @@ void CodeGeneratorMIPS64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSE } } -Location CodeGeneratorMIPS64::AllocateFreeRegister(Primitive::Type type) const { - if (type == Primitive::kPrimVoid) { - LOG(FATAL) << "Unreachable type " << type; - } - - if (Primitive::IsFloatingPointType(type)) { - size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFpuRegisters); - return Location::FpuRegisterLocation(reg); - } else { - size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfGpuRegisters); - return Location::RegisterLocation(reg); - } -} - size_t CodeGeneratorMIPS64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { __ StoreToOffset(kStoreDoubleword, GpuRegister(reg_id), SP, stack_index); return kMips64WordSize; @@ -3031,9 +3016,9 @@ void LocationsBuilderMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -3182,9 +3167,9 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo } void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 7182e8e987..140ff95f14 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -289,10 +289,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { // Register allocation. - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; - // AllocateFreeRegister() is only used when allocating registers locally - // during CompileBaseline(). - Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; + void SetupBlockedRegisters() const OVERRIDE; Location GetStackLocation(HLoadLocal* load) const OVERRIDE; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 6259acded3..f7ccdd8b8f 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -817,65 +817,13 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } -Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const { - switch (type) { - case Primitive::kPrimLong: { - size_t reg = FindFreeEntry(blocked_register_pairs_, kNumberOfRegisterPairs); - X86ManagedRegister pair = - X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg)); - DCHECK(!blocked_core_registers_[pair.AsRegisterPairLow()]); - DCHECK(!blocked_core_registers_[pair.AsRegisterPairHigh()]); - blocked_core_registers_[pair.AsRegisterPairLow()] = true; - blocked_core_registers_[pair.AsRegisterPairHigh()] = true; - UpdateBlockedPairRegisters(); - return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh()); - } - - case Primitive::kPrimByte: - case Primitive::kPrimBoolean: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - Register reg = static_cast<Register>( - FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters)); - // Block all register pairs that contain `reg`. - for (int i = 0; i < kNumberOfRegisterPairs; i++) { - X86ManagedRegister current = - X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i)); - if (current.AsRegisterPairLow() == reg || current.AsRegisterPairHigh() == reg) { - blocked_register_pairs_[i] = true; - } - } - return Location::RegisterLocation(reg); - } - - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - return Location::FpuRegisterLocation( - FindFreeEntry(blocked_fpu_registers_, kNumberOfXmmRegisters)); - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << type; - } - - return Location::NoLocation(); -} - -void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const { +void CodeGeneratorX86::SetupBlockedRegisters() const { // Don't allocate the dalvik style register pair passing. blocked_register_pairs_[ECX_EDX] = true; // Stack register is always reserved. blocked_core_registers_[ESP] = true; - if (is_baseline) { - blocked_core_registers_[EBP] = true; - blocked_core_registers_[ESI] = true; - blocked_core_registers_[EDI] = true; - } - UpdateBlockedPairRegisters(); } @@ -1981,9 +1929,9 @@ void InstructionCodeGeneratorX86::VisitInvokeUnresolved(HInvokeUnresolved* invok } void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -1999,17 +1947,6 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok if (invoke->HasPcRelativeDexCache()) { invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); } - - if (codegen_->IsBaseline()) { - // Baseline does not have enough registers if the current method also - // needs a register. We therefore do not require a register for it, and let - // the code generation of the invoke handle it. - LocationSummary* locations = invoke->GetLocations(); - Location location = locations->InAt(invoke->GetSpecialInputIndex()); - if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { - locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation()); - } - } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) { @@ -2022,9 +1959,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) } void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; @@ -4286,7 +4223,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, if (current_method.IsRegister()) { method_reg = current_method.AsRegister<Register>(); } else { - DCHECK(IsBaseline() || invoke->GetLocations()->Intrinsified()); + DCHECK(invoke->GetLocations()->Intrinsified()); DCHECK(!current_method.IsValid()); method_reg = reg; __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); @@ -5076,11 +5013,6 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { } void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { - // This location builder might end up asking to up to four registers, which is - // not currently possible for baseline. The situation in which we need four - // registers cannot be met by baseline though, because it has not run any - // optimization. - Primitive::Type value_type = instruction->GetComponentType(); bool needs_write_barrier = @@ -6077,7 +6009,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: { // Note that we indeed only call on slow path, but we always go - // into the slow path for the unresolved & interface check + // into the slow path for the unresolved and interface check // cases. // // We cannot directly call the InstanceofNonTrivial runtime @@ -6308,8 +6240,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - // We always go into the type check slow path for the unresolved & - // interface check cases. + // We always go into the type check slow path for the unresolved + // and interface check cases. // // We cannot directly call the CheckCast runtime entry point // without resorting to a type checking slow path here (i.e. by @@ -6588,6 +6520,8 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct // Plain GC root load with no read barrier. // /* GcRoot<mirror::Object> */ root = *(obj + offset) __ movl(root_reg, Address(obj, offset)); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. } } @@ -6650,7 +6584,9 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Note: the original implementation in ReadBarrier::Barrier is // slightly more complex as: // - it implements the load-load fence using a data dependency on - // the high-bits of rb_state, which are expected to be all zeroes; + // the high-bits of rb_state, which are expected to be all zeroes + // (we use CodeGeneratorX86::GenerateMemoryBarrier instead here, + // which is a no-op thanks to the x86 memory model); // - it performs additional checks that we do not do here for // performance reasons. diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index c65c423eae..43e9543e41 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -359,9 +359,7 @@ class CodeGeneratorX86 : public CodeGenerator { return GetLabelOf(block)->Position(); } - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; - - Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; + void SetupBlockedRegisters() const OVERRIDE; Location GetStackLocation(HLoadLocal* load) const OVERRIDE; @@ -453,7 +451,7 @@ class CodeGeneratorX86 : public CodeGenerator { // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, - Location out, + Location ref, Register obj, uint32_t offset, Location temp, @@ -461,7 +459,7 @@ class CodeGeneratorX86 : public CodeGenerator { // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location out, + Location ref, Register obj, uint32_t data_offset, Location index, diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index e024ce2b6c..2ce2d91502 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1002,47 +1002,12 @@ InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, assembler_(codegen->GetAssembler()), codegen_(codegen) {} -Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const { - switch (type) { - case Primitive::kPrimLong: - case Primitive::kPrimByte: - case Primitive::kPrimBoolean: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - size_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfCpuRegisters); - return Location::RegisterLocation(reg); - } - - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - size_t reg = FindFreeEntry(blocked_fpu_registers_, kNumberOfFloatRegisters); - return Location::FpuRegisterLocation(reg); - } - - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << type; - } - - return Location::NoLocation(); -} - -void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const { +void CodeGeneratorX86_64::SetupBlockedRegisters() const { // Stack register is always reserved. blocked_core_registers_[RSP] = true; // Block the register used as TMP. blocked_core_registers_[TMP] = true; - - if (is_baseline) { - for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { - blocked_core_registers_[kCoreCalleeSaves[i]] = true; - } - for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { - blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; - } - } } static dwarf::Reg DWARFReg(Register reg) { @@ -2161,9 +2126,9 @@ void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* in } void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -2183,9 +2148,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codeg } void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been pruned by + // art::PrepareForRegisterAllocation. + DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; @@ -4698,13 +4663,13 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - bool may_need_runtime_call = instruction->NeedsTypeCheck(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool object_array_set_with_read_barrier = kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - (may_need_runtime_call || object_array_set_with_read_barrier) ? + (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); @@ -4733,7 +4698,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Location index = locations->InAt(1); Location value = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); - bool may_need_runtime_call = instruction->NeedsTypeCheck(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -4785,7 +4750,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { __ movl(address, Immediate(0)); codegen_->MaybeRecordImplicitNullCheck(instruction); DCHECK(!needs_write_barrier); - DCHECK(!may_need_runtime_call); + DCHECK(!may_need_runtime_call_for_type_check); break; } @@ -4794,7 +4759,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { NearLabel done, not_null, do_put; SlowPathCode* slow_path = nullptr; CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - if (may_need_runtime_call) { + if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { @@ -4872,7 +4837,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { } else { __ movl(address, register_value); } - if (!may_need_runtime_call) { + if (!may_need_runtime_call_for_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -5661,7 +5626,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: { // Note that we indeed only call on slow path, but we always go - // into the slow path for the unresolved & interface check + // into the slow path for the unresolved and interface check // cases. // // We cannot directly call the InstanceofNonTrivial runtime @@ -5892,8 +5857,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - // We always go into the type check slow path for the unresolved & - // interface check cases. + // We always go into the type check slow path for the unresolved + // and interface check cases. // // We cannot directly call the CheckCast runtime entry point // without resorting to a type checking slow path here (i.e. by @@ -6155,6 +6120,8 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr // Plain GC root load with no read barrier. // /* GcRoot<mirror::Object> */ root = *(obj + offset) __ movl(root_reg, Address(obj, offset)); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. } } @@ -6217,7 +6184,9 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // Note: the original implementation in ReadBarrier::Barrier is // slightly more complex as: // - it implements the load-load fence using a data dependency on - // the high-bits of rb_state, which are expected to be all zeroes; + // the high-bits of rb_state, which are expected to be all zeroes + // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead + // here, which is a no-op thanks to the x86-64 memory model); // - it performs additional checks that we do not do here for // performance reasons. diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 505c9dcdad..82aabb04d3 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -347,8 +347,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location GetStackLocation(HLoadLocal* load) const OVERRIDE; - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE; - Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; + void SetupBlockedRegisters() const OVERRIDE; void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -401,7 +400,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, - Location out, + Location ref, CpuRegister obj, uint32_t offset, Location temp, @@ -409,7 +408,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location out, + Location ref, CpuRegister obj, uint32_t data_offset, Location index, diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index d970704368..19d63de499 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -40,6 +40,7 @@ #include "dex_file.h" #include "dex_instruction.h" #include "driver/compiler_options.h" +#include "graph_checker.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "prepare_for_register_allocation.h" @@ -70,8 +71,8 @@ class TestCodeGeneratorARM : public arm::CodeGeneratorARM { AddAllocatedRegister(Location::RegisterLocation(arm::R7)); } - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE { - arm::CodeGeneratorARM::SetupBlockedRegisters(is_baseline); + void SetupBlockedRegisters() const OVERRIDE { + arm::CodeGeneratorARM::SetupBlockedRegisters(); blocked_core_registers_[arm::R4] = true; blocked_core_registers_[arm::R6] = false; blocked_core_registers_[arm::R7] = false; @@ -90,8 +91,8 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 { AddAllocatedRegister(Location::RegisterLocation(x86::EDI)); } - void SetupBlockedRegisters(bool is_baseline) const OVERRIDE { - x86::CodeGeneratorX86::SetupBlockedRegisters(is_baseline); + void SetupBlockedRegisters() const OVERRIDE { + x86::CodeGeneratorX86::SetupBlockedRegisters(); // ebx is a callee-save register in C, but caller-save for ART. blocked_core_registers_[x86::EBX] = true; blocked_register_pairs_[x86::EAX_EBX] = true; @@ -200,259 +201,228 @@ static void Run(const InternalCodeAllocator& allocator, } template <typename Expected> -static void RunCodeBaseline(InstructionSet target_isa, - HGraph* graph, - bool has_result, - Expected expected) { - InternalCodeAllocator allocator; - - CompilerOptions compiler_options; - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - TestCodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); - // We avoid doing a stack overflow check that requires the runtime being setup, - // by making sure the compiler knows the methods we are running are leaf methods. - codegenX86.CompileBaseline(&allocator, true); - if (target_isa == kX86) { - Run(allocator, codegenX86, has_result, expected); - } +static void RunCode(CodeGenerator* codegen, + HGraph* graph, + std::function<void(HGraph*)> hook_before_codegen, + bool has_result, + Expected expected) { + ASSERT_TRUE(graph->IsInSsaForm()); - std::unique_ptr<const ArmInstructionSetFeatures> features_arm( - ArmInstructionSetFeatures::FromCppDefines()); - TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options); - codegenARM.CompileBaseline(&allocator, true); - if (target_isa == kArm || target_isa == kThumb2) { - Run(allocator, codegenARM, has_result, expected); - } - - std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( - X86_64InstructionSetFeatures::FromCppDefines()); - x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options); - codegenX86_64.CompileBaseline(&allocator, true); - if (target_isa == kX86_64) { - Run(allocator, codegenX86_64, has_result, expected); - } - - std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( - Arm64InstructionSetFeatures::FromCppDefines()); - arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options); - codegenARM64.CompileBaseline(&allocator, true); - if (target_isa == kArm64) { - Run(allocator, codegenARM64, has_result, expected); - } - - std::unique_ptr<const MipsInstructionSetFeatures> features_mips( - MipsInstructionSetFeatures::FromCppDefines()); - mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options); - codegenMIPS.CompileBaseline(&allocator, true); - if (kRuntimeISA == kMips) { - Run(allocator, codegenMIPS, has_result, expected); - } - - std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64( - Mips64InstructionSetFeatures::FromCppDefines()); - mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options); - codegenMIPS64.CompileBaseline(&allocator, true); - if (target_isa == kMips64) { - Run(allocator, codegenMIPS64, has_result, expected); - } -} + SSAChecker graph_checker(graph); + graph_checker.Run(); + ASSERT_TRUE(graph_checker.IsValid()); -template <typename Expected> -static void RunCodeOptimized(CodeGenerator* codegen, - HGraph* graph, - std::function<void(HGraph*)> hook_before_codegen, - bool has_result, - Expected expected) { - // Tests may have already computed it. - if (graph->GetReversePostOrder().empty()) { - graph->BuildDominatorTree(); - } SsaLivenessAnalysis liveness(graph, codegen); - liveness.Analyze(); - RegisterAllocator register_allocator(graph->GetArena(), codegen, liveness); - register_allocator.AllocateRegisters(); + PrepareForRegisterAllocation(graph).Run(); + liveness.Analyze(); + RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters(); hook_before_codegen(graph); InternalCodeAllocator allocator; - codegen->CompileOptimized(&allocator); + codegen->Compile(&allocator); Run(allocator, *codegen, has_result, expected); } template <typename Expected> -static void RunCodeOptimized(InstructionSet target_isa, - HGraph* graph, - std::function<void(HGraph*)> hook_before_codegen, - bool has_result, - Expected expected) { +static void RunCode(InstructionSet target_isa, + HGraph* graph, + std::function<void(HGraph*)> hook_before_codegen, + bool has_result, + Expected expected) { CompilerOptions compiler_options; if (target_isa == kArm || target_isa == kThumb2) { std::unique_ptr<const ArmInstructionSetFeatures> features_arm( ArmInstructionSetFeatures::FromCppDefines()); TestCodeGeneratorARM codegenARM(graph, *features_arm.get(), compiler_options); - RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected); + RunCode(&codegenARM, graph, hook_before_codegen, has_result, expected); } else if (target_isa == kArm64) { std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( Arm64InstructionSetFeatures::FromCppDefines()); arm64::CodeGeneratorARM64 codegenARM64(graph, *features_arm64.get(), compiler_options); - RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected); + RunCode(&codegenARM64, graph, hook_before_codegen, has_result, expected); } else if (target_isa == kX86) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); - RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected); + RunCode(&codegenX86, graph, hook_before_codegen, has_result, expected); } else if (target_isa == kX86_64) { std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( X86_64InstructionSetFeatures::FromCppDefines()); x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options); - RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected); + RunCode(&codegenX86_64, graph, hook_before_codegen, has_result, expected); } else if (target_isa == kMips) { std::unique_ptr<const MipsInstructionSetFeatures> features_mips( MipsInstructionSetFeatures::FromCppDefines()); mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), compiler_options); - RunCodeOptimized(&codegenMIPS, graph, hook_before_codegen, has_result, expected); + RunCode(&codegenMIPS, graph, hook_before_codegen, has_result, expected); } else if (target_isa == kMips64) { std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64( Mips64InstructionSetFeatures::FromCppDefines()); mips64::CodeGeneratorMIPS64 codegenMIPS64(graph, *features_mips64.get(), compiler_options); - RunCodeOptimized(&codegenMIPS64, graph, hook_before_codegen, has_result, expected); + RunCode(&codegenMIPS64, graph, hook_before_codegen, has_result, expected); } } -static void TestCode(InstructionSet target_isa, - const uint16_t* data, +static ::std::vector<InstructionSet> GetTargetISAs() { + ::std::vector<InstructionSet> v; + // Add all ISAs that are executable on hardware or on simulator. + const ::std::vector<InstructionSet> executable_isa_candidates = { + kArm, + kArm64, + kThumb2, + kX86, + kX86_64, + kMips, + kMips64 + }; + + for (auto target_isa : executable_isa_candidates) { + if (CanExecute(target_isa)) { + v.push_back(target_isa); + } + } + + return v; +} + +static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) { - ArenaPool pool; - ArenaAllocator arena(&pool); - HGraph* graph = CreateGraph(&arena); - HGraphBuilder builder(graph); - const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - bool graph_built = builder.BuildGraph(*item); - ASSERT_TRUE(graph_built); - // Remove suspend checks, they cannot be executed in this context. - RemoveSuspendChecks(graph); - RunCodeBaseline(target_isa, graph, has_result, expected); -} - -static void TestCodeLong(InstructionSet target_isa, - const uint16_t* data, + for (InstructionSet target_isa : GetTargetISAs()) { + ArenaPool pool; + ArenaAllocator arena(&pool); + HGraph* graph = CreateGraph(&arena); + HGraphBuilder builder(graph); + const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); + bool graph_built = builder.BuildGraph(*item); + ASSERT_TRUE(graph_built); + // Remove suspend checks, they cannot be executed in this context. + RemoveSuspendChecks(graph); + TransformToSsa(graph); + RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected); + } +} + +static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) { - ArenaPool pool; - ArenaAllocator arena(&pool); - HGraph* graph = CreateGraph(&arena); - HGraphBuilder builder(graph, Primitive::kPrimLong); - const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); - bool graph_built = builder.BuildGraph(*item); - ASSERT_TRUE(graph_built); - // Remove suspend checks, they cannot be executed in this context. - RemoveSuspendChecks(graph); - RunCodeBaseline(target_isa, graph, has_result, expected); + for (InstructionSet target_isa : GetTargetISAs()) { + ArenaPool pool; + ArenaAllocator arena(&pool); + HGraph* graph = CreateGraph(&arena); + HGraphBuilder builder(graph, Primitive::kPrimLong); + const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); + bool graph_built = builder.BuildGraph(*item); + ASSERT_TRUE(graph_built); + // Remove suspend checks, they cannot be executed in this context. + RemoveSuspendChecks(graph); + TransformToSsa(graph); + RunCode(target_isa, graph, [](HGraph*) {}, has_result, expected); + } } -class CodegenTest: public ::testing::TestWithParam<InstructionSet> {}; +class CodegenTest : public CommonCompilerTest {}; -TEST_P(CodegenTest, ReturnVoid) { +TEST_F(CodegenTest, ReturnVoid) { const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID); - TestCode(GetParam(), data); + TestCode(data); } -TEST_P(CodegenTest, CFG1) { +TEST_F(CodegenTest, CFG1) { const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::RETURN_VOID); - TestCode(GetParam(), data); + TestCode(data); } -TEST_P(CodegenTest, CFG2) { +TEST_F(CodegenTest, CFG2) { const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::GOTO | 0x100, Instruction::RETURN_VOID); - TestCode(GetParam(), data); + TestCode(data); } -TEST_P(CodegenTest, CFG3) { +TEST_F(CodegenTest, CFG3) { const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x200, Instruction::RETURN_VOID, Instruction::GOTO | 0xFF00); - TestCode(GetParam(), data1); + TestCode(data1); const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_16, 3, Instruction::RETURN_VOID, Instruction::GOTO_16, 0xFFFF); - TestCode(GetParam(), data2); + TestCode(data2); const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 4, 0, Instruction::RETURN_VOID, Instruction::GOTO_32, 0xFFFF, 0xFFFF); - TestCode(GetParam(), data3); + TestCode(data3); } -TEST_P(CodegenTest, CFG4) { +TEST_F(CodegenTest, CFG4) { const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID, Instruction::GOTO | 0x100, Instruction::GOTO | 0xFE00); - TestCode(GetParam(), data); + TestCode(data); } -TEST_P(CodegenTest, CFG5) { +TEST_F(CodegenTest, CFG5) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, Instruction::RETURN_VOID); - TestCode(GetParam(), data); + TestCode(data); } -TEST_P(CodegenTest, IntConstant) { +TEST_F(CodegenTest, IntConstant) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); - TestCode(GetParam(), data); + TestCode(data); } -TEST_P(CodegenTest, Return1) { +TEST_F(CodegenTest, Return1) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN | 0); - TestCode(GetParam(), data, true, 0); + TestCode(data, true, 0); } -TEST_P(CodegenTest, Return2) { +TEST_F(CodegenTest, Return2) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 0 | 1 << 8, Instruction::RETURN | 1 << 8); - TestCode(GetParam(), data, true, 0); + TestCode(data, true, 0); } -TEST_P(CodegenTest, Return3) { +TEST_F(CodegenTest, Return3) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::RETURN | 1 << 8); - TestCode(GetParam(), data, true, 1); + TestCode(data, true, 1); } -TEST_P(CodegenTest, ReturnIf1) { +TEST_F(CodegenTest, ReturnIf1) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, @@ -460,10 +430,10 @@ TEST_P(CodegenTest, ReturnIf1) { Instruction::RETURN | 0 << 8, Instruction::RETURN | 1 << 8); - TestCode(GetParam(), data, true, 1); + TestCode(data, true, 1); } -TEST_P(CodegenTest, ReturnIf2) { +TEST_F(CodegenTest, ReturnIf2) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, @@ -471,12 +441,12 @@ TEST_P(CodegenTest, ReturnIf2) { Instruction::RETURN | 0 << 8, Instruction::RETURN | 1 << 8); - TestCode(GetParam(), data, true, 0); + TestCode(data, true, 0); } // Exercise bit-wise (one's complement) not-int instruction. #define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \ -TEST_P(CodegenTest, TEST_NAME) { \ +TEST_F(CodegenTest, TEST_NAME) { \ const int32_t input = INPUT; \ const uint16_t input_lo = Low16Bits(input); \ const uint16_t input_hi = High16Bits(input); \ @@ -485,7 +455,7 @@ TEST_P(CodegenTest, TEST_NAME) { \ Instruction::NOT_INT | 1 << 8 | 0 << 12 , \ Instruction::RETURN | 1 << 8); \ \ - TestCode(GetParam(), data, true, EXPECTED_OUTPUT); \ + TestCode(data, true, EXPECTED_OUTPUT); \ } NOT_INT_TEST(ReturnNotIntMinus2, -2, 1) @@ -501,7 +471,7 @@ NOT_INT_TEST(ReturnNotIntINT32_MAX, 2147483647, -2147483648) // -(2^31) // Exercise bit-wise (one's complement) not-long instruction. #define NOT_LONG_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \ -TEST_P(CodegenTest, TEST_NAME) { \ +TEST_F(CodegenTest, TEST_NAME) { \ const int64_t input = INPUT; \ const uint16_t word0 = Low16Bits(Low32Bits(input)); /* LSW. */ \ const uint16_t word1 = High16Bits(Low32Bits(input)); \ @@ -512,7 +482,7 @@ TEST_P(CodegenTest, TEST_NAME) { \ Instruction::NOT_LONG | 2 << 8 | 0 << 12, \ Instruction::RETURN_WIDE | 2 << 8); \ \ - TestCodeLong(GetParam(), data, true, EXPECTED_OUTPUT); \ + TestCodeLong(data, true, EXPECTED_OUTPUT); \ } NOT_LONG_TEST(ReturnNotLongMinus2, INT64_C(-2), INT64_C(1)) @@ -551,7 +521,7 @@ NOT_LONG_TEST(ReturnNotLongINT64_MAX, #undef NOT_LONG_TEST -TEST_P(CodegenTest, IntToLongOfLongToInt) { +TEST_F(CodegenTest, IntToLongOfLongToInt) { const int64_t input = INT64_C(4294967296); // 2^32 const uint16_t word0 = Low16Bits(Low32Bits(input)); // LSW. const uint16_t word1 = High16Bits(Low32Bits(input)); @@ -565,192 +535,146 @@ TEST_P(CodegenTest, IntToLongOfLongToInt) { Instruction::INT_TO_LONG | 2 << 8 | 4 << 12, Instruction::RETURN_WIDE | 2 << 8); - TestCodeLong(GetParam(), data, true, 1); + TestCodeLong(data, true, 1); } -TEST_P(CodegenTest, ReturnAdd1) { +TEST_F(CodegenTest, ReturnAdd1) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT, 1 << 8 | 0, Instruction::RETURN); - TestCode(GetParam(), data, true, 7); + TestCode(data, true, 7); } -TEST_P(CodegenTest, ReturnAdd2) { +TEST_F(CodegenTest, ReturnAdd2) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT_2ADDR | 1 << 12, Instruction::RETURN); - TestCode(GetParam(), data, true, 7); + TestCode(data, true, 7); } -TEST_P(CodegenTest, ReturnAdd3) { +TEST_F(CodegenTest, ReturnAdd3) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::ADD_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); - TestCode(GetParam(), data, true, 7); + TestCode(data, true, 7); } -TEST_P(CodegenTest, ReturnAdd4) { +TEST_F(CodegenTest, ReturnAdd4) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::ADD_INT_LIT16, 3, Instruction::RETURN); - TestCode(GetParam(), data, true, 7); -} - -TEST_P(CodegenTest, NonMaterializedCondition) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry = new (&allocator) HBasicBlock(graph); - graph->AddBlock(entry); - graph->SetEntryBlock(entry); - entry->AddInstruction(new (&allocator) HGoto()); - - HBasicBlock* first_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(first_block); - entry->AddSuccessor(first_block); - HIntConstant* constant0 = graph->GetIntConstant(0); - HIntConstant* constant1 = graph->GetIntConstant(1); - HEqual* equal = new (&allocator) HEqual(constant0, constant0); - first_block->AddInstruction(equal); - first_block->AddInstruction(new (&allocator) HIf(equal)); - - HBasicBlock* then = new (&allocator) HBasicBlock(graph); - HBasicBlock* else_ = new (&allocator) HBasicBlock(graph); - HBasicBlock* exit = new (&allocator) HBasicBlock(graph); - - graph->AddBlock(then); - graph->AddBlock(else_); - graph->AddBlock(exit); - first_block->AddSuccessor(then); - first_block->AddSuccessor(else_); - then->AddSuccessor(exit); - else_->AddSuccessor(exit); - - exit->AddInstruction(new (&allocator) HExit()); - then->AddInstruction(new (&allocator) HReturn(constant0)); - else_->AddInstruction(new (&allocator) HReturn(constant1)); - - ASSERT_TRUE(equal->NeedsMaterialization()); - graph->BuildDominatorTree(); - PrepareForRegisterAllocation(graph).Run(); - ASSERT_FALSE(equal->NeedsMaterialization()); - - auto hook_before_codegen = [](HGraph* graph_in) { - HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0]; - HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena()); - block->InsertInstructionBefore(move, block->GetLastInstruction()); - }; - - RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, 0); + TestCode(data, true, 7); } -TEST_P(CodegenTest, ReturnMulInt) { +TEST_F(CodegenTest, ReturnMulInt) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::MUL_INT, 1 << 8 | 0, Instruction::RETURN); - TestCode(GetParam(), data, true, 12); + TestCode(data, true, 12); } -TEST_P(CodegenTest, ReturnMulInt2addr) { +TEST_F(CodegenTest, ReturnMulInt2addr) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::MUL_INT_2ADDR | 1 << 12, Instruction::RETURN); - TestCode(GetParam(), data, true, 12); + TestCode(data, true, 12); } -TEST_P(CodegenTest, ReturnMulLong) { +TEST_F(CodegenTest, ReturnMulLong) { const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( - Instruction::CONST_4 | 3 << 12 | 0, - Instruction::CONST_4 | 0 << 12 | 1 << 8, - Instruction::CONST_4 | 4 << 12 | 2 << 8, - Instruction::CONST_4 | 0 << 12 | 3 << 8, + Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0, + Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0, Instruction::MUL_LONG, 2 << 8 | 0, Instruction::RETURN_WIDE); - TestCodeLong(GetParam(), data, true, 12); + TestCodeLong(data, true, 12); } -TEST_P(CodegenTest, ReturnMulLong2addr) { +TEST_F(CodegenTest, ReturnMulLong2addr) { const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( - Instruction::CONST_4 | 3 << 12 | 0 << 8, - Instruction::CONST_4 | 0 << 12 | 1 << 8, - Instruction::CONST_4 | 4 << 12 | 2 << 8, - Instruction::CONST_4 | 0 << 12 | 3 << 8, + Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0, + Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0, Instruction::MUL_LONG_2ADDR | 2 << 12, Instruction::RETURN_WIDE); - TestCodeLong(GetParam(), data, true, 12); + TestCodeLong(data, true, 12); } -TEST_P(CodegenTest, ReturnMulIntLit8) { +TEST_F(CodegenTest, ReturnMulIntLit8) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::MUL_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); - TestCode(GetParam(), data, true, 12); + TestCode(data, true, 12); } -TEST_P(CodegenTest, ReturnMulIntLit16) { +TEST_F(CodegenTest, ReturnMulIntLit16) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::MUL_INT_LIT16, 3, Instruction::RETURN); - TestCode(GetParam(), data, true, 12); + TestCode(data, true, 12); } -TEST_P(CodegenTest, MaterializedCondition1) { - // Check that condition are materialized correctly. A materialized condition - // should yield `1` if it evaluated to true, and `0` otherwise. - // We force the materialization of comparisons for different combinations of - // inputs and check the results. - - int lhs[] = {1, 2, -1, 2, 0xabc}; - int rhs[] = {2, 1, 2, -1, 0xabc}; - - for (size_t i = 0; i < arraysize(lhs); i++) { +TEST_F(CodegenTest, NonMaterializedCondition) { + for (InstructionSet target_isa : GetTargetISAs()) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(entry_block); - graph->SetEntryBlock(entry_block); - entry_block->AddInstruction(new (&allocator) HGoto()); - HBasicBlock* code_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(code_block); + HGraph* graph = CreateGraph(&allocator); + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + entry->AddInstruction(new (&allocator) HGoto()); + + HBasicBlock* first_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(first_block); + entry->AddSuccessor(first_block); + HIntConstant* constant0 = graph->GetIntConstant(0); + HIntConstant* constant1 = graph->GetIntConstant(1); + HEqual* equal = new (&allocator) HEqual(constant0, constant0); + first_block->AddInstruction(equal); + first_block->AddInstruction(new (&allocator) HIf(equal)); + + HBasicBlock* then_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* else_block = new (&allocator) HBasicBlock(graph); HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); + graph->SetExitBlock(exit_block); + + graph->AddBlock(then_block); + graph->AddBlock(else_block); graph->AddBlock(exit_block); - exit_block->AddInstruction(new (&allocator) HExit()); + first_block->AddSuccessor(then_block); + first_block->AddSuccessor(else_block); + then_block->AddSuccessor(exit_block); + else_block->AddSuccessor(exit_block); - entry_block->AddSuccessor(code_block); - code_block->AddSuccessor(exit_block); - graph->SetExitBlock(exit_block); + exit_block->AddInstruction(new (&allocator) HExit()); + then_block->AddInstruction(new (&allocator) HReturn(constant0)); + else_block->AddInstruction(new (&allocator) HReturn(constant1)); - HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]); - HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]); - HLessThan cmp_lt(cst_lhs, cst_rhs); - code_block->AddInstruction(&cmp_lt); - HReturn ret(&cmp_lt); - code_block->AddInstruction(&ret); + ASSERT_TRUE(equal->NeedsMaterialization()); + TransformToSsa(graph); + PrepareForRegisterAllocation(graph).Run(); + ASSERT_FALSE(equal->NeedsMaterialization()); auto hook_before_codegen = [](HGraph* graph_in) { HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0]; @@ -758,93 +682,143 @@ TEST_P(CodegenTest, MaterializedCondition1) { block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]); + RunCode(target_isa, graph, hook_before_codegen, true, 0); } } -TEST_P(CodegenTest, MaterializedCondition2) { - // Check that HIf correctly interprets a materialized condition. - // We force the materialization of comparisons for different combinations of - // inputs. An HIf takes the materialized combination as input and returns a - // value that we verify. - - int lhs[] = {1, 2, -1, 2, 0xabc}; - int rhs[] = {2, 1, 2, -1, 0xabc}; - - - for (size_t i = 0; i < arraysize(lhs); i++) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateGraph(&allocator); - - HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(entry_block); - graph->SetEntryBlock(entry_block); - entry_block->AddInstruction(new (&allocator) HGoto()); - - HBasicBlock* if_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(if_block); - HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(if_true_block); - HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(if_false_block); - HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); - graph->AddBlock(exit_block); - exit_block->AddInstruction(new (&allocator) HExit()); - - graph->SetEntryBlock(entry_block); - entry_block->AddSuccessor(if_block); - if_block->AddSuccessor(if_true_block); - if_block->AddSuccessor(if_false_block); - if_true_block->AddSuccessor(exit_block); - if_false_block->AddSuccessor(exit_block); - graph->SetExitBlock(exit_block); - - HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]); - HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]); - HLessThan cmp_lt(cst_lhs, cst_rhs); - if_block->AddInstruction(&cmp_lt); - // We insert a temporary to separate the HIf from the HLessThan and force - // the materialization of the condition. - HTemporary force_materialization(0); - if_block->AddInstruction(&force_materialization); - HIf if_lt(&cmp_lt); - if_block->AddInstruction(&if_lt); - - HIntConstant* cst_lt = graph->GetIntConstant(1); - HReturn ret_lt(cst_lt); - if_true_block->AddInstruction(&ret_lt); - HIntConstant* cst_ge = graph->GetIntConstant(0); - HReturn ret_ge(cst_ge); - if_false_block->AddInstruction(&ret_ge); - - auto hook_before_codegen = [](HGraph* graph_in) { - HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0]; - HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena()); - block->InsertInstructionBefore(move, block->GetLastInstruction()); - }; +TEST_F(CodegenTest, MaterializedCondition1) { + for (InstructionSet target_isa : GetTargetISAs()) { + // Check that condition are materialized correctly. A materialized condition + // should yield `1` if it evaluated to true, and `0` otherwise. + // We force the materialization of comparisons for different combinations of + + // inputs and check the results. + + int lhs[] = {1, 2, -1, 2, 0xabc}; + int rhs[] = {2, 1, 2, -1, 0xabc}; + + for (size_t i = 0; i < arraysize(lhs); i++) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = CreateGraph(&allocator); + + HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry_block); + graph->SetEntryBlock(entry_block); + entry_block->AddInstruction(new (&allocator) HGoto()); + HBasicBlock* code_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(code_block); + HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(exit_block); + exit_block->AddInstruction(new (&allocator) HExit()); + + entry_block->AddSuccessor(code_block); + code_block->AddSuccessor(exit_block); + graph->SetExitBlock(exit_block); + + HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]); + HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]); + HLessThan cmp_lt(cst_lhs, cst_rhs); + code_block->AddInstruction(&cmp_lt); + HReturn ret(&cmp_lt); + code_block->AddInstruction(&ret); + + TransformToSsa(graph); + auto hook_before_codegen = [](HGraph* graph_in) { + HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0]; + HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena()); + block->InsertInstructionBefore(move, block->GetLastInstruction()); + }; + RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]); + } + } +} - RunCodeOptimized(GetParam(), graph, hook_before_codegen, true, lhs[i] < rhs[i]); +TEST_F(CodegenTest, MaterializedCondition2) { + for (InstructionSet target_isa : GetTargetISAs()) { + // Check that HIf correctly interprets a materialized condition. + // We force the materialization of comparisons for different combinations of + // inputs. An HIf takes the materialized combination as input and returns a + // value that we verify. + + int lhs[] = {1, 2, -1, 2, 0xabc}; + int rhs[] = {2, 1, 2, -1, 0xabc}; + + + for (size_t i = 0; i < arraysize(lhs); i++) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = CreateGraph(&allocator); + + HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry_block); + graph->SetEntryBlock(entry_block); + entry_block->AddInstruction(new (&allocator) HGoto()); + + HBasicBlock* if_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(if_block); + HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(if_true_block); + HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(if_false_block); + HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); + graph->AddBlock(exit_block); + exit_block->AddInstruction(new (&allocator) HExit()); + + graph->SetEntryBlock(entry_block); + entry_block->AddSuccessor(if_block); + if_block->AddSuccessor(if_true_block); + if_block->AddSuccessor(if_false_block); + if_true_block->AddSuccessor(exit_block); + if_false_block->AddSuccessor(exit_block); + graph->SetExitBlock(exit_block); + + HIntConstant* cst_lhs = graph->GetIntConstant(lhs[i]); + HIntConstant* cst_rhs = graph->GetIntConstant(rhs[i]); + HLessThan cmp_lt(cst_lhs, cst_rhs); + if_block->AddInstruction(&cmp_lt); + // We insert a temporary to separate the HIf from the HLessThan and force + // the materialization of the condition. + HTemporary force_materialization(0); + if_block->AddInstruction(&force_materialization); + HIf if_lt(&cmp_lt); + if_block->AddInstruction(&if_lt); + + HIntConstant* cst_lt = graph->GetIntConstant(1); + HReturn ret_lt(cst_lt); + if_true_block->AddInstruction(&ret_lt); + HIntConstant* cst_ge = graph->GetIntConstant(0); + HReturn ret_ge(cst_ge); + if_false_block->AddInstruction(&ret_ge); + + TransformToSsa(graph); + auto hook_before_codegen = [](HGraph* graph_in) { + HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0]; + HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena()); + block->InsertInstructionBefore(move, block->GetLastInstruction()); + }; + RunCode(target_isa, graph, hook_before_codegen, true, lhs[i] < rhs[i]); + } } } -TEST_P(CodegenTest, ReturnDivIntLit8) { +TEST_F(CodegenTest, ReturnDivIntLit8) { const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::DIV_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); - TestCode(GetParam(), data, true, 1); + TestCode(data, true, 1); } -TEST_P(CodegenTest, ReturnDivInt2Addr) { +TEST_F(CodegenTest, ReturnDivInt2Addr) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0, Instruction::CONST_4 | 2 << 12 | 1 << 8, Instruction::DIV_INT_2ADDR | 1 << 12, Instruction::RETURN); - TestCode(GetParam(), data, true, 2); + TestCode(data, true, 2); } // Helper method. @@ -933,80 +907,55 @@ static void TestComparison(IfCondition condition, block->AddInstruction(comparison); block->AddInstruction(new (&allocator) HReturn(comparison)); - auto hook_before_codegen = [](HGraph*) { - }; - RunCodeOptimized(target_isa, graph, hook_before_codegen, true, expected_result); -} - -TEST_P(CodegenTest, ComparisonsInt) { - const InstructionSet target_isa = GetParam(); - for (int64_t i = -1; i <= 1; i++) { - for (int64_t j = -1; j <= 1; j++) { - TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondB, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondA, i, j, Primitive::kPrimInt, target_isa); - TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa); + TransformToSsa(graph); + RunCode(target_isa, graph, [](HGraph*) {}, true, expected_result); +} + +TEST_F(CodegenTest, ComparisonsInt) { + for (InstructionSet target_isa : GetTargetISAs()) { + for (int64_t i = -1; i <= 1; i++) { + for (int64_t j = -1; j <= 1; j++) { + TestComparison(kCondEQ, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondNE, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondLT, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondLE, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondGT, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondGE, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondB, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondBE, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondA, i, j, Primitive::kPrimInt, target_isa); + TestComparison(kCondAE, i, j, Primitive::kPrimInt, target_isa); + } } } } -TEST_P(CodegenTest, ComparisonsLong) { +TEST_F(CodegenTest, ComparisonsLong) { // TODO: make MIPS work for long if (kRuntimeISA == kMips || kRuntimeISA == kMips64) { return; } - const InstructionSet target_isa = GetParam(); - if (target_isa == kMips || target_isa == kMips64) { - return; - } - - for (int64_t i = -1; i <= 1; i++) { - for (int64_t j = -1; j <= 1; j++) { - TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondB, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondA, i, j, Primitive::kPrimLong, target_isa); - TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa); + for (InstructionSet target_isa : GetTargetISAs()) { + if (target_isa == kMips || target_isa == kMips64) { + continue; } - } -} -static ::std::vector<InstructionSet> GetTargetISAs() { - ::std::vector<InstructionSet> v; - // Add all ISAs that are executable on hardware or on simulator. - const ::std::vector<InstructionSet> executable_isa_candidates = { - kArm, - kArm64, - kThumb2, - kX86, - kX86_64, - kMips, - kMips64 - }; - - for (auto target_isa : executable_isa_candidates) { - if (CanExecute(target_isa)) { - v.push_back(target_isa); + for (int64_t i = -1; i <= 1; i++) { + for (int64_t j = -1; j <= 1; j++) { + TestComparison(kCondEQ, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondNE, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondLT, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondLE, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondGT, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondGE, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondB, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondBE, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondA, i, j, Primitive::kPrimLong, target_isa); + TestComparison(kCondAE, i, j, Primitive::kPrimLong, target_isa); + } } } - - return v; } -INSTANTIATE_TEST_CASE_P(MultipleTargets, - CodegenTest, - ::testing::ValuesIn(GetTargetISAs())); - } // namespace art diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 86a695b152..e170e37bdd 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -89,15 +89,18 @@ void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) { } void HDeadCodeElimination::RemoveDeadBlocks() { + if (graph_->HasIrreducibleLoops()) { + // Do not eliminate dead blocks if the graph has irreducible loops. We could + // support it, but that would require changes in our loop representation to handle + // multiple entry points. We decided it was not worth the complexity. + return; + } // Classify blocks as reachable/unreachable. ArenaAllocator* allocator = graph_->GetArena(); ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false); MarkReachableBlocks(graph_, &live_blocks); bool removed_one_or_more_blocks = false; - // If the graph has irreducible loops we need to reset all graph analysis we have done - // before: the irreducible loop can be turned into a reducible one. - // For simplicity, we do the full computation regardless of the type of the loops. bool rerun_dominance_and_loop_analysis = false; // Remove all dead blocks. Iterate in post order because removal needs the @@ -105,9 +108,6 @@ void HDeadCodeElimination::RemoveDeadBlocks() { // inside out. for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); - if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) { - rerun_dominance_and_loop_analysis = true; - } int id = block->GetBlockId(); if (!live_blocks.IsBitSet(id)) { MaybeRecordDeadBlock(block); diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc index 91e4a997fd..feb8b2092a 100644 --- a/compiler/optimizing/dominator_test.cc +++ b/compiler/optimizing/dominator_test.cc @@ -133,8 +133,9 @@ TEST(OptimizerTest, CFG4) { const uint32_t dominators[] = { kInvalidBlockId, - 0, - kInvalidBlockId + 3, + kInvalidBlockId, + 0 }; TestCode(data1, dominators, sizeof(dominators) / sizeof(int)); diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 9439ba0c8d..31136772c7 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -484,6 +484,18 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { loop_information->GetPreHeader()->GetSuccessors().size())); } + if (loop_information->GetSuspendCheck() == nullptr) { + AddError(StringPrintf( + "Loop with header %d does not have a suspend check.", + loop_header->GetBlockId())); + } + + if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) { + AddError(StringPrintf( + "Loop header %d does not have the loop suspend check as the first instruction.", + loop_header->GetBlockId())); + } + // Ensure the loop header has only one incoming branch and the remaining // predecessors are back edges. size_t num_preds = loop_header->GetPredecessors().size(); @@ -589,6 +601,14 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { } } + if (instruction->NeedsEnvironment() && !instruction->HasEnvironment()) { + AddError(StringPrintf("Instruction %s:%d in block %d requires an environment " + "but does not have one.", + instruction->DebugName(), + instruction->GetId(), + current_block_->GetBlockId())); + } + // Ensure an instruction having an environment is dominated by the // instructions contained in the environment. for (HEnvironment* environment = instruction->GetEnvironment(); diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc index d4b9b71952..d5305646a8 100644 --- a/compiler/optimizing/graph_test.cc +++ b/compiler/optimizing/graph_test.cc @@ -164,7 +164,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges1) { // Ensure there is only one back edge. ASSERT_EQ(if_block->GetPredecessors().size(), 2u); - ASSERT_EQ(if_block->GetPredecessors()[0], entry_block); + ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor()); ASSERT_NE(if_block->GetPredecessors()[1], if_block); // Ensure the new block is the back edge. @@ -199,7 +199,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges2) { // Ensure there is only one back edge. ASSERT_EQ(if_block->GetPredecessors().size(), 2u); - ASSERT_EQ(if_block->GetPredecessors()[0], entry_block); + ASSERT_EQ(if_block->GetPredecessors()[0], entry_block->GetSingleSuccessor()); ASSERT_NE(if_block->GetPredecessors()[1], if_block); // Ensure the new block is the back edge. diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 293282edbb..2e79df1b84 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -356,12 +356,12 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, compare, invoke_instruction->GetDexPc()); // TODO: Extend reference type propagation to understand the guard. if (cursor != nullptr) { - bb_cursor->InsertInstructionAfter(load_class, cursor); + bb_cursor->InsertInstructionAfter(field_get, cursor); } else { - bb_cursor->InsertInstructionBefore(load_class, bb_cursor->GetFirstInstruction()); + bb_cursor->InsertInstructionBefore(field_get, bb_cursor->GetFirstInstruction()); } - bb_cursor->InsertInstructionAfter(field_get, load_class); - bb_cursor->InsertInstructionAfter(compare, field_get); + bb_cursor->InsertInstructionAfter(load_class, field_get); + bb_cursor->InsertInstructionAfter(compare, load_class); bb_cursor->InsertInstructionAfter(deoptimize, compare); deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); @@ -419,7 +419,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits(); if (code_item->insns_size_in_code_units_ > inline_max_code_units) { VLOG(compiler) << "Method " << PrettyMethod(method) - << " is too big to inline"; + << " is too big to inline: " + << code_item->insns_size_in_code_units_ + << " > " + << inline_max_code_units; return false; } @@ -639,9 +642,12 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, for (; !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); - if (block->IsLoopHeader()) { + + if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) { + // Don't inline methods with irreducible loops, they could prevent some + // optimizations to run. VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " could not be inlined because it contains a loop"; + << " could not be inlined because it contains an irreducible loop"; return false; } diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index c6da9a3f5e..5caf077858 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -176,6 +176,16 @@ static Intrinsics GetIntrinsic(InlineMethod method) { } // Misc data processing. + case kIntrinsicBitCount: + switch (GetType(method.d.data, true)) { + case Primitive::kPrimInt: + return Intrinsics::kIntegerBitCount; + case Primitive::kPrimLong: + return Intrinsics::kLongBitCount; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } case kIntrinsicNumberOfLeadingZeros: switch (GetType(method.d.data, true)) { case Primitive::kPrimInt: diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 9f50d1814e..3bf3f7ffae 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -85,9 +85,9 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) InvokeDexCallingConventionVisitor* calling_convention_visitor) { if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) { HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); - // When we do not run baseline, explicit clinit checks triggered by static - // invokes must have been pruned by art::PrepareForRegisterAllocation. - DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck()); + // Explicit clinit checks triggered by static invokes must have been + // pruned by art::PrepareForRegisterAllocation. + DCHECK(!invoke_static_or_direct->IsStaticWithExplicitClinitCheck()); } if (invoke->GetNumberOfArguments() == 0) { diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index b1fbf28204..e72f927e44 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1577,10 +1577,12 @@ void IntrinsicLocationsBuilderARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } +UNIMPLEMENTED_INTRINSIC(IntegerBitCount) UNIMPLEMENTED_INTRINSIC(IntegerReverse) UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes) UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) +UNIMPLEMENTED_INTRINSIC(LongBitCount) UNIMPLEMENTED_INTRINSIC(LongReverse) UNIMPLEMENTED_INTRINSIC(LongReverseBytes) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 81cab86c83..8cf2d4f393 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -752,21 +752,33 @@ static void GenUnsafeGet(HInvoke* invoke, Register trg = RegisterFrom(trg_loc, type); bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease(); - MemOperand mem_op(base.X(), offset); - if (is_volatile) { - if (use_acquire_release) { - codegen->LoadAcquire(invoke, trg, mem_op); - } else { - codegen->Load(type, trg, mem_op); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case. + UseScratchRegisterScope temps(masm); + Register temp = temps.AcquireW(); + codegen->GenerateArrayLoadWithBakerReadBarrier( + invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + if (is_volatile && !use_acquire_release) { __ Dmb(InnerShareable, BarrierReads); } } else { - codegen->Load(type, trg, mem_op); - } + // Other cases. + MemOperand mem_op(base.X(), offset); + if (is_volatile) { + if (use_acquire_release) { + codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true); + } else { + codegen->Load(type, trg, mem_op); + __ Dmb(InnerShareable, BarrierReads); + } + } else { + codegen->Load(type, trg, mem_op); + } - if (type == Primitive::kPrimNot) { - DCHECK(trg.IsW()); - codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + if (type == Primitive::kPrimNot) { + DCHECK(trg.IsW()); + codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + } } } @@ -1026,10 +1038,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat vixl::Label loop_head, exit_loop; if (use_acquire_release) { __ Bind(&loop_head); - __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); - // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? + // TODO: When `type == Primitive::kPrimNot`, add a read barrier for + // the reference stored in the object before attempting the CAS, + // similar to the one in the art::Unsafe_compareAndSwapObject JNI + // implementation. + // // Note that this code is not (yet) used when read barriers are // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). + DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); + __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); @@ -1447,8 +1464,10 @@ void IntrinsicLocationsBuilderARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNU void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } +UNIMPLEMENTED_INTRINSIC(IntegerBitCount) UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) +UNIMPLEMENTED_INTRINSIC(LongBitCount) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) UNIMPLEMENTED_INTRINSIC(LongRotateRight) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h index 2e87546282..ea380347da 100644 --- a/compiler/optimizing/intrinsics_list.h +++ b/compiler/optimizing/intrinsics_list.h @@ -28,12 +28,14 @@ V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(IntegerBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(LongReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(LongBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index bc126a2716..81112b1a34 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -935,6 +935,9 @@ void IntrinsicLocationsBuilderMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUS void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } +UNIMPLEMENTED_INTRINSIC(IntegerBitCount) +UNIMPLEMENTED_INTRINSIC(LongBitCount) + UNIMPLEMENTED_INTRINSIC(MathAbsDouble) UNIMPLEMENTED_INTRINSIC(MathAbsFloat) UNIMPLEMENTED_INTRINSIC(MathAbsInt) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 8b45ea7c4f..ac969e39fa 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1724,6 +1724,9 @@ void IntrinsicLocationsBuilderMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UN void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } +UNIMPLEMENTED_INTRINSIC(IntegerBitCount) +UNIMPLEMENTED_INTRINSIC(LongBitCount) + UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(MathRoundFloat) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 677f2e9c81..e48bed59d7 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -2303,6 +2303,81 @@ void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) { SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler); } +static void CreateBitCountLocations( + ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) { + if (!codegen->GetInstructionSetFeatures().HasPopCnt()) { + // Do nothing if there is no popcnt support. This results in generating + // a call for the intrinsic rather than direct code. + return; + } + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + if (is_long) { + locations->SetInAt(0, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } else { + locations->SetInAt(0, Location::Any()); + } + locations->SetOut(Location::RequiresRegister()); +} + +static void GenBitCount(X86Assembler* assembler, HInvoke* invoke, bool is_long) { + LocationSummary* locations = invoke->GetLocations(); + Location src = locations->InAt(0); + Register out = locations->Out().AsRegister<Register>(); + + if (invoke->InputAt(0)->IsConstant()) { + // Evaluate this at compile time. + int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); + value = is_long + ? POPCOUNT(static_cast<uint64_t>(value)) + : POPCOUNT(static_cast<uint32_t>(value)); + if (value == 0) { + __ xorl(out, out); + } else { + __ movl(out, Immediate(value)); + } + return; + } + + // Handle the non-constant cases. + if (!is_long) { + if (src.IsRegister()) { + __ popcntl(out, src.AsRegister<Register>()); + } else { + DCHECK(src.IsStackSlot()); + __ popcntl(out, Address(ESP, src.GetStackIndex())); + } + return; + } + + // The 64-bit case needs to worry about both parts of the register. + DCHECK(src.IsRegisterPair()); + Register src_lo = src.AsRegisterPairLow<Register>(); + Register src_hi = src.AsRegisterPairHigh<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + __ popcntl(temp, src_lo); + __ popcntl(out, src_hi); + __ addl(out, temp); +} + +void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) { + CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false); +} + +void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) { + GenBitCount(GetAssembler(), invoke, /* is_long */ false); +} + +void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) { + CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true); +} + +void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) { + GenBitCount(GetAssembler(), invoke, /* is_long */ true); +} + static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 690cf3d413..23a628f243 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2368,6 +2368,70 @@ void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) { SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler); } +static void CreateBitCountLocations( + ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) { + if (!codegen->GetInstructionSetFeatures().HasPopCnt()) { + // Do nothing if there is no popcnt support. This results in generating + // a call for the intrinsic rather than direct code. + return; + } + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::RequiresRegister()); +} + +static void GenBitCount(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) { + LocationSummary* locations = invoke->GetLocations(); + Location src = locations->InAt(0); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + if (invoke->InputAt(0)->IsConstant()) { + // Evaluate this at compile time. + int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); + value = is_long + ? POPCOUNT(static_cast<uint64_t>(value)) + : POPCOUNT(static_cast<uint32_t>(value)); + if (value == 0) { + __ xorl(out, out); + } else { + __ movl(out, Immediate(value)); + } + return; + } + + if (src.IsRegister()) { + if (is_long) { + __ popcntq(out, src.AsRegister<CpuRegister>()); + } else { + __ popcntl(out, src.AsRegister<CpuRegister>()); + } + } else if (is_long) { + DCHECK(src.IsDoubleStackSlot()); + __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex())); + } else { + DCHECK(src.IsStackSlot()); + __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex())); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) { + CreateBitCountLocations(arena_, codegen_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) { + GenBitCount(GetAssembler(), invoke, /* is_long */ false); +} + +void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) { + CreateBitCountLocations(arena_, codegen_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) { + GenBitCount(GetAssembler(), invoke, /* is_long */ true); +} + static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 854d92a409..adf8734214 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -167,11 +167,7 @@ void HGraph::ClearDominanceInformation() { void HGraph::ClearLoopInformation() { SetHasIrreducibleLoops(false); for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* current = it.Current(); - if (current->IsLoopHeader()) { - current->RemoveInstruction(current->GetLoopInformation()->GetSuspendCheck()); - } - current->SetLoopInformation(nullptr); + it.Current()->SetLoopInformation(nullptr); } } @@ -180,6 +176,14 @@ void HBasicBlock::ClearDominanceInformation() { dominator_ = nullptr; } +HInstruction* HBasicBlock::GetFirstInstructionDisregardMoves() const { + HInstruction* instruction = GetFirstInstruction(); + while (instruction->IsParallelMove()) { + instruction = instruction->GetNext(); + } + return instruction; +} + void HGraph::ComputeDominanceInformation() { DCHECK(reverse_post_order_.empty()); reverse_post_order_.reserve(blocks_.size()); @@ -284,9 +288,10 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { // Make sure the loop has only one pre header. This simplifies SSA building by having // to just look at the pre header to know which locals are initialized at entry of the - // loop. + // loop. Also, don't allow the entry block to be a pre header: this simplifies inlining + // this graph. size_t number_of_incomings = header->GetPredecessors().size() - info->NumberOfBackEdges(); - if (number_of_incomings != 1) { + if (number_of_incomings != 1 || (GetEntryBlock()->GetSingleSuccessor() == header)) { HBasicBlock* pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); AddBlock(pre_header); pre_header->AddInstruction(new (arena_) HGoto(header->GetDexPc())); @@ -457,6 +462,10 @@ void HGraph::SimplifyCFG() { } if (block->IsLoopHeader()) { SimplifyLoop(block); + } else if (!block->IsEntryBlock() && block->GetFirstInstruction()->IsSuspendCheck()) { + // We are being called by the dead code elimiation pass, and what used to be + // a loop got dismantled. Just remove the suspend check. + block->RemoveInstruction(block->GetFirstInstruction()); } } } @@ -1829,6 +1838,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { DCHECK(GetBlocks()[0]->IsEntryBlock()); DCHECK(GetBlocks()[2]->IsExitBlock()); DCHECK(!body->IsExitBlock()); + DCHECK(!body->IsInLoop()); HInstruction* last = body->GetLastInstruction(); invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions()); @@ -1887,7 +1897,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Update the meta information surrounding blocks: // (1) the graph they are now in, // (2) the reverse post order of that graph, - // (3) the potential loop information they are now in, + // (3) their potential loop information, inner and outer, // (4) try block membership. // Note that we do not need to update catch phi inputs because they // correspond to the register file of the outer method which the inlinee @@ -1916,15 +1926,24 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* current = it.Current(); if (current != exit_block_ && current != entry_block_ && current != first) { - DCHECK(!current->IsInLoop()); DCHECK(current->GetTryCatchInformation() == nullptr); DCHECK(current->GetGraph() == this); current->SetGraph(outer_graph); outer_graph->AddBlock(current); outer_graph->reverse_post_order_[++index_of_at] = current; - if (loop_info != nullptr) { + if (!current->IsInLoop()) { current->SetLoopInformation(loop_info); - for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) { + } else if (current->IsLoopHeader()) { + // Clear the information of which blocks are contained in that loop. Since the + // information is stored as a bit vector based on block ids, we have to update + // it, as those block ids were specific to the callee graph and we are now adding + // these blocks to the caller graph. + current->GetLoopInformation()->ClearAllBlocks(); + } + if (current->IsInLoop()) { + for (HLoopInformationOutwardIterator loop_it(*current); + !loop_it.Done(); + loop_it.Advance()) { loop_it.Current()->Add(current); } } @@ -1937,7 +1956,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { outer_graph->AddBlock(to); outer_graph->reverse_post_order_[++index_of_at] = to; if (loop_info != nullptr) { - to->SetLoopInformation(loop_info); + if (!to->IsInLoop()) { + to->SetLoopInformation(loop_info); + } for (HLoopInformationOutwardIterator loop_it(*at); !loop_it.Done(); loop_it.Advance()) { loop_it.Current()->Add(to); } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 859d570b29..5246fd1f05 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -689,6 +689,10 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { void Add(HBasicBlock* block); void Remove(HBasicBlock* block); + void ClearAllBlocks() { + blocks_.ClearAllBits(); + } + private: // Internal recursive implementation of `Populate`. void PopulateRecursive(HBasicBlock* block); @@ -860,6 +864,8 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { HInstruction* GetLastPhi() const { return phis_.last_instruction_; } const HInstructionList& GetPhis() const { return phis_; } + HInstruction* GetFirstInstructionDisregardMoves() const; + void AddSuccessor(HBasicBlock* block) { successors_.push_back(block); block->predecessors_.push_back(this); @@ -3687,19 +3693,13 @@ class HInvokeStaticOrDirect : public HInvoke { DCHECK(!IsStaticWithExplicitClinitCheck()); } - HNewInstance* GetThisArgumentOfStringInit() const { - DCHECK(IsStringInit()); - size_t index = InputCount() - 1; - DCHECK(InputAt(index)->IsNewInstance()); - return InputAt(index)->AsNewInstance(); - } - - void RemoveThisArgumentOfStringInit() { + HInstruction* GetAndRemoveThisArgumentOfStringInit() { DCHECK(IsStringInit()); size_t index = InputCount() - 1; - DCHECK(InputAt(index)->IsNewInstance()); + HInstruction* input = InputAt(index); RemoveAsUserOfInput(index); inputs_.pop_back(); + return input; } // Is this a call to a static method whose declaring class has an diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index bb840eabdd..fffd00535c 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -127,7 +127,7 @@ class PassObserver : public ValueObject { timing_logger_enabled_(compiler_driver->GetDumpPasses()), timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true), disasm_info_(graph->GetArena()), - visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()), + visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()), visualizer_(visualizer_output, graph, *codegen), graph_in_bad_state_(false) { if (timing_logger_enabled_ || visualizer_enabled_) { @@ -305,30 +305,19 @@ class OptimizingCompiler FINAL : public Compiler { SHARED_REQUIRES(Locks::mutator_lock_); private: - // Whether we should run any optimization or register allocation. If false, will - // just run the code generation after the graph was built. - const bool run_optimizations_; - // Create a 'CompiledMethod' for an optimized graph. - CompiledMethod* EmitOptimized(ArenaAllocator* arena, - CodeVectorAllocator* code_allocator, - CodeGenerator* codegen, - CompilerDriver* driver) const; - - // Create a 'CompiledMethod' for a non-optimized graph. - CompiledMethod* EmitBaseline(ArenaAllocator* arena, - CodeVectorAllocator* code_allocator, - CodeGenerator* codegen, - CompilerDriver* driver) const; + CompiledMethod* Emit(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + CodeGenerator* codegen, + CompilerDriver* driver) const; // Try compiling a method and return the code generator used for // compiling it. // This method: // 1) Builds the graph. Returns null if it failed to build it. - // 2) If `run_optimizations_` is set: - // 2.1) Transform the graph to SSA. Returns null if it failed. - // 2.2) Run optimizations on the graph, including register allocator. - // 3) Generate code with the `code_allocator` provided. + // 2) Transforms the graph to SSA. Returns null if it failed. + // 3) Runs optimizations on the graph, including register allocator. + // 4) Generates code with the `code_allocator` provided. CodeGenerator* TryCompile(ArenaAllocator* arena, CodeVectorAllocator* code_allocator, const DexFile::CodeItem* code_item, @@ -350,21 +339,19 @@ class OptimizingCompiler FINAL : public Compiler { static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */ OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) - : Compiler(driver, kMaximumCompilationTimeBeforeWarning), - run_optimizations_( - driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime) {} + : Compiler(driver, kMaximumCompilationTimeBeforeWarning) {} void OptimizingCompiler::Init() { // Enable C1visualizer output. Must be done in Init() because the compiler // driver is not fully initialized when passed to the compiler's constructor. CompilerDriver* driver = GetCompilerDriver(); - const std::string cfg_file_name = driver->GetDumpCfgFileName(); + const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName(); if (!cfg_file_name.empty()) { CHECK_EQ(driver->GetThreadCount(), 1U) << "Graph visualizer requires the compiler to run single-threaded. " << "Invoke the compiler with '-j1'."; std::ios_base::openmode cfg_file_mode = - driver->GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out; + driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out; visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode)); } if (driver->GetDumpStats()) { @@ -577,17 +564,6 @@ static void RunOptimizations(HGraph* graph, AllocateRegisters(graph, codegen, pass_observer); } -// The stack map we generate must be 4-byte aligned on ARM. Since existing -// maps are generated alongside these stack maps, we must also align them. -static ArrayRef<const uint8_t> AlignVectorSize(ArenaVector<uint8_t>& vector) { - size_t size = vector.size(); - size_t aligned_size = RoundUp(size, 4); - for (; size < aligned_size; ++size) { - vector.push_back(0); - } - return ArrayRef<const uint8_t>(vector); -} - static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) { ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter()); codegen->EmitLinkerPatches(&linker_patches); @@ -601,10 +577,10 @@ static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) return linker_patches; } -CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena, - CodeVectorAllocator* code_allocator, - CodeGenerator* codegen, - CompilerDriver* compiler_driver) const { +CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + CodeGenerator* codegen, + CompilerDriver* compiler_driver) const { ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps)); stack_map.resize(codegen->ComputeStackMapsSize()); @@ -630,39 +606,6 @@ CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena, return compiled_method; } -CompiledMethod* OptimizingCompiler::EmitBaseline( - ArenaAllocator* arena, - CodeVectorAllocator* code_allocator, - CodeGenerator* codegen, - CompilerDriver* compiler_driver) const { - ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); - - ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps)); - codegen->BuildMappingTable(&mapping_table); - ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps)); - codegen->BuildVMapTable(&vmap_table); - ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps)); - codegen->BuildNativeGCMap(&gc_map, *compiler_driver); - - CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( - compiler_driver, - codegen->GetInstructionSet(), - ArrayRef<const uint8_t>(code_allocator->GetMemory()), - // Follow Quick's behavior and set the frame size to zero if it is - // considered "empty" (see the definition of - // art::CodeGenerator::HasEmptyFrame). - codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), - codegen->GetCoreSpillMask(), - codegen->GetFpuSpillMask(), - ArrayRef<const SrcMapElem>(), - AlignVectorSize(mapping_table), - AlignVectorSize(vmap_table), - AlignVectorSize(gc_map), - ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), - ArrayRef<const LinkerPatch>(linker_patches)); - return compiled_method; -} - CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, CodeVectorAllocator* code_allocator, const DexFile::CodeItem* code_item, @@ -775,41 +718,37 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName(); - if (run_optimizations_) { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScopeCollection handles(soa.Self()); - ScopedThreadSuspension sts(soa.Self(), kNative); - - { - PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer); - GraphAnalysisResult result = graph->TryBuildingSsa(&handles); - if (result != kAnalysisSuccess) { - switch (result) { - case kAnalysisFailThrowCatchLoop: - MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop); - break; - case kAnalysisFailAmbiguousArrayOp: - MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp); - break; - case kAnalysisSuccess: - UNREACHABLE(); - } - pass_observer.SetGraphInBadState(); - return nullptr; + ScopedObjectAccess soa(Thread::Current()); + StackHandleScopeCollection handles(soa.Self()); + ScopedThreadSuspension sts(soa.Self(), kNative); + + { + PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer); + GraphAnalysisResult result = graph->TryBuildingSsa(&handles); + if (result != kAnalysisSuccess) { + switch (result) { + case kAnalysisFailThrowCatchLoop: + MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop); + break; + case kAnalysisFailAmbiguousArrayOp: + MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp); + break; + case kAnalysisSuccess: + UNREACHABLE(); } + pass_observer.SetGraphInBadState(); + return nullptr; } - - RunOptimizations(graph, - codegen.get(), - compiler_driver, - compilation_stats_.get(), - dex_compilation_unit, - &pass_observer, - &handles); - codegen->CompileOptimized(code_allocator); - } else { - codegen->CompileBaseline(code_allocator); } + + RunOptimizations(graph, + codegen.get(), + compiler_driver, + compilation_stats_.get(), + dex_compilation_unit, + &pass_observer, + &handles); + codegen->Compile(code_allocator); pass_observer.DumpDisassembly(); if (kArenaAllocatorCountAllocations) { @@ -861,11 +800,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, dex_cache)); if (codegen.get() != nullptr) { MaybeRecordStat(MethodCompilationStat::kCompiled); - if (run_optimizations_) { - method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver); - } else { - method = EmitBaseline(&arena, &code_allocator, codegen.get(), compiler_driver); - } + method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver); } } else { if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { @@ -928,8 +863,6 @@ bool OptimizingCompiler::JitCompile(Thread* self, { // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(self, kNative); - - DCHECK(run_optimizations_); codegen.reset( TryCompile(&arena, &code_allocator, diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 9d136f3ae6..be470ccb7d 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -504,7 +504,7 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) { void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) { // This function is used to reduce the dependencies in the graph after // (from -> to) has been performed. Since we ensure there is no move with the same - // destination, (to -> X) can not be blocked while (from -> X) might still be + // destination, (to -> X) cannot be blocked while (from -> X) might still be // blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After // (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is // a dependency between the two. If we update the source location from 1 to 2, we diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 2bae4bc5c8..d77639d608 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -72,8 +72,7 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, float_spill_slots_.reserve(kDefaultNumberOfSpillSlots); double_spill_slots_.reserve(kDefaultNumberOfSpillSlots); - static constexpr bool kIsBaseline = false; - codegen->SetupBlockedRegisters(kIsBaseline); + codegen->SetupBlockedRegisters(); physical_core_register_intervals_.resize(codegen->GetNumberOfCoreRegisters(), nullptr); physical_fp_register_intervals_.resize(codegen->GetNumberOfFloatingPointRegisters(), nullptr); // Always reserve for the current method and the graph's max out registers. @@ -1735,6 +1734,12 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { } } +static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop( + HInstruction* instruction) { + return instruction->GetBlock()->GetGraph()->HasIrreducibleLoops() && + (instruction->IsConstant() || instruction->IsCurrentMethod()); +} + void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, HBasicBlock* from, HBasicBlock* to) const { @@ -1751,7 +1756,19 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, // Interval was not split. return; } - DCHECK(destination != nullptr && source != nullptr); + + LiveInterval* parent = interval->GetParent(); + HInstruction* defined_by = parent->GetDefinedBy(); + if (destination == nullptr) { + // Our live_in fixed point calculation has found that the instruction is live + // in the `to` block because it will eventually enter an irreducible loop. Our + // live interval computation however does not compute a fixed point, and + // therefore will not have a location for that instruction for `to`. + // Because the instruction is a constant or the ArtMethod, we don't need to + // do anything: it will be materialized in the irreducible loop. + DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by)); + return; + } if (!destination->HasRegister()) { // Values are eagerly spilled. Spill slot already contains appropriate value. @@ -1762,13 +1779,13 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, // we need to put the moves at the entry of `to`. if (from->GetNormalSuccessors().size() == 1) { InsertParallelMoveAtExitOf(from, - interval->GetParent()->GetDefinedBy(), + defined_by, source->ToLocation(), destination->ToLocation()); } else { DCHECK_EQ(to->GetPredecessors().size(), 1u); InsertParallelMoveAtEntryOf(to, - interval->GetParent()->GetDefinedBy(), + defined_by, source->ToLocation(), destination->ToLocation()); } diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 7494e336b1..165d09d1a5 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -422,6 +422,34 @@ bool SsaBuilder::FixAmbiguousArrayOps() { return true; } +void SsaBuilder::RemoveRedundantUninitializedStrings() { + if (GetGraph()->IsDebuggable()) { + // Do not perform the optimization for consistency with the interpreter + // which always allocates an object for new-instance of String. + return; + } + + for (HNewInstance* new_instance : uninitialized_strings_) { + DCHECK(new_instance->IsStringAlloc()); + + // Replace NewInstance of String with NullConstant if not used prior to + // calling StringFactory. In case of deoptimization, the interpreter is + // expected to skip null check on the `this` argument of the StringFactory call. + if (!new_instance->HasNonEnvironmentUses()) { + new_instance->ReplaceWith(GetGraph()->GetNullConstant()); + new_instance->GetBlock()->RemoveInstruction(new_instance); + + // Remove LoadClass if not needed any more. + HLoadClass* load_class = new_instance->InputAt(0)->AsLoadClass(); + DCHECK(load_class != nullptr); + DCHECK(!load_class->NeedsAccessCheck()) << "String class is always accessible"; + if (!load_class->HasUses()) { + load_class->GetBlock()->RemoveInstruction(load_class); + } + } + } +} + GraphAnalysisResult SsaBuilder::BuildSsa() { // 1) Visit in reverse post order. We need to have all predecessors of a block // visited (with the exception of loops) in order to create the right environment @@ -487,7 +515,15 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { // input types. dead_phi_elimimation.EliminateDeadPhis(); - // 11) Clear locals. + // 11) Step 1) replaced uses of NewInstances of String with the results of + // their corresponding StringFactory calls. Unless the String objects are used + // before they are initialized, they can be replaced with NullConstant. + // Note that this optimization is valid only if unsimplified code does not use + // the uninitialized value because we assume execution can be deoptimized at + // any safepoint. We must therefore perform it before any other optimizations. + RemoveRedundantUninitializedStrings(); + + // 12) Clear locals. for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions()); !it.Done(); it.Advance()) { @@ -891,12 +927,21 @@ void SsaBuilder::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { if (invoke->IsStringInit()) { // This is a StringFactory call which acts as a String constructor. Its // result replaces the empty String pre-allocated by NewInstance. - HNewInstance* new_instance = invoke->GetThisArgumentOfStringInit(); - invoke->RemoveThisArgumentOfStringInit(); + HInstruction* arg_this = invoke->GetAndRemoveThisArgumentOfStringInit(); + + // Replacing the NewInstance might render it redundant. Keep a list of these + // to be visited once it is clear whether it is has remaining uses. + if (arg_this->IsNewInstance()) { + uninitialized_strings_.push_back(arg_this->AsNewInstance()); + } else { + DCHECK(arg_this->IsPhi()); + // NewInstance is not the direct input of the StringFactory call. It might + // be redundant but optimizing this case is not worth the effort. + } - // Walk over all vregs and replace any occurrence of `new_instance` with `invoke`. + // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`. for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) { - if ((*current_locals_)[vreg] == new_instance) { + if ((*current_locals_)[vreg] == arg_this) { (*current_locals_)[vreg] = invoke; } } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 28eef6a40c..ccef8ea380 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -57,6 +57,7 @@ class SsaBuilder : public HGraphVisitor { loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), + uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), locals_for_(graph->GetBlocks().size(), ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) { @@ -105,6 +106,8 @@ class SsaBuilder : public HGraphVisitor { HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type); HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget); + void RemoveRedundantUninitializedStrings(); + StackHandleScopeCollection* const handles_; // True if types of ambiguous ArrayGets have been resolved. @@ -119,6 +122,7 @@ class SsaBuilder : public HGraphVisitor { ArenaVector<HArrayGet*> ambiguous_agets_; ArenaVector<HArraySet*> ambiguous_asets_; + ArenaVector<HNewInstance*> uninitialized_strings_; // HEnvironment for each block. ArenaVector<ArenaVector<HInstruction*>> locals_for_; diff --git a/compiler/profile_assistant.cc b/compiler/profile_assistant.cc index 81f2a5692d..85335efcc4 100644 --- a/compiler/profile_assistant.cc +++ b/compiler/profile_assistant.cc @@ -16,54 +16,154 @@ #include "profile_assistant.h" +#include "base/unix_file/fd_file.h" +#include "os.h" + namespace art { // Minimum number of new methods that profiles must contain to enable recompilation. static constexpr const uint32_t kMinNewMethodsForCompilation = 10; -bool ProfileAssistant::ProcessProfiles( - const std::vector<std::string>& profile_files, - const std::vector<std::string>& reference_profile_files, - /*out*/ ProfileCompilationInfo** profile_compilation_info) { +bool ProfileAssistant::ProcessProfilesInternal( + const std::vector<ScopedFlock>& profile_files, + const std::vector<ScopedFlock>& reference_profile_files, + /*out*/ ProfileCompilationInfo** profile_compilation_info) { DCHECK(!profile_files.empty()); - DCHECK(reference_profile_files.empty() || + DCHECK(!reference_profile_files.empty() || (profile_files.size() == reference_profile_files.size())); std::vector<ProfileCompilationInfo> new_info(profile_files.size()); bool should_compile = false; // Read the main profile files. - for (size_t i = 0; i < profile_files.size(); i++) { - if (!new_info[i].Load(profile_files[i])) { - LOG(WARNING) << "Could not load profile file: " << profile_files[i]; + for (size_t i = 0; i < new_info.size(); i++) { + if (!new_info[i].Load(profile_files[i].GetFile()->Fd())) { + LOG(WARNING) << "Could not load profile file at index " << i; return false; } // Do we have enough new profiled methods that will make the compilation worthwhile? should_compile |= (new_info[i].GetNumberOfMethods() > kMinNewMethodsForCompilation); } + if (!should_compile) { - *profile_compilation_info = nullptr; return true; } std::unique_ptr<ProfileCompilationInfo> result(new ProfileCompilationInfo()); + // Merge information. for (size_t i = 0; i < new_info.size(); i++) { + if (!reference_profile_files.empty()) { + if (!new_info[i].Load(reference_profile_files[i].GetFile()->Fd())) { + LOG(WARNING) << "Could not load reference profile file at index " << i; + return false; + } + } // Merge all data into a single object. - result->Load(new_info[i]); - // If we have any reference profile information merge their information with - // the current profiles and save them back to disk. + if (!result->Load(new_info[i])) { + LOG(WARNING) << "Could not merge profile data at index " << i; + return false; + } + } + // We were successful in merging all profile information. Update the files. + for (size_t i = 0; i < new_info.size(); i++) { if (!reference_profile_files.empty()) { - if (!new_info[i].Load(reference_profile_files[i])) { - LOG(WARNING) << "Could not load reference profile file: " << reference_profile_files[i]; + if (!reference_profile_files[i].GetFile()->ClearContent()) { + PLOG(WARNING) << "Could not clear reference profile file at index " << i; + return false; + } + if (!new_info[i].Save(reference_profile_files[i].GetFile()->Fd())) { + LOG(WARNING) << "Could not save reference profile file at index " << i; return false; } - if (!new_info[i].Save(reference_profile_files[i])) { - LOG(WARNING) << "Could not save reference profile file: " << reference_profile_files[i]; + if (!profile_files[i].GetFile()->ClearContent()) { + PLOG(WARNING) << "Could not clear profile file at index " << i; return false; } } } + *profile_compilation_info = result.release(); return true; } +class ScopedCollectionFlock { + public: + explicit ScopedCollectionFlock(size_t size) : flocks_(size) {} + + // Will block until all the locks are acquired. + bool Init(const std::vector<std::string>& filenames, /* out */ std::string* error) { + for (size_t i = 0; i < filenames.size(); i++) { + if (!flocks_[i].Init(filenames[i].c_str(), O_RDWR, /* block */ true, error)) { + *error += " (index=" + std::to_string(i) + ")"; + return false; + } + } + return true; + } + + // Will block until all the locks are acquired. + bool Init(const std::vector<uint32_t>& fds, /* out */ std::string* error) { + for (size_t i = 0; i < fds.size(); i++) { + // We do not own the descriptor, so disable auto-close and don't check usage. + File file(fds[i], false); + file.DisableAutoClose(); + if (!flocks_[i].Init(&file, error)) { + *error += " (index=" + std::to_string(i) + ")"; + return false; + } + } + return true; + } + + const std::vector<ScopedFlock>& Get() const { return flocks_; } + + private: + std::vector<ScopedFlock> flocks_; +}; + +bool ProfileAssistant::ProcessProfiles( + const std::vector<uint32_t>& profile_files_fd, + const std::vector<uint32_t>& reference_profile_files_fd, + /*out*/ ProfileCompilationInfo** profile_compilation_info) { + *profile_compilation_info = nullptr; + + std::string error; + ScopedCollectionFlock profile_files_flocks(profile_files_fd.size()); + if (!profile_files_flocks.Init(profile_files_fd, &error)) { + LOG(WARNING) << "Could not lock profile files: " << error; + return false; + } + ScopedCollectionFlock reference_profile_files_flocks(reference_profile_files_fd.size()); + if (!reference_profile_files_flocks.Init(reference_profile_files_fd, &error)) { + LOG(WARNING) << "Could not lock reference profile files: " << error; + return false; + } + + return ProcessProfilesInternal(profile_files_flocks.Get(), + reference_profile_files_flocks.Get(), + profile_compilation_info); +} + +bool ProfileAssistant::ProcessProfiles( + const std::vector<std::string>& profile_files, + const std::vector<std::string>& reference_profile_files, + /*out*/ ProfileCompilationInfo** profile_compilation_info) { + *profile_compilation_info = nullptr; + + std::string error; + ScopedCollectionFlock profile_files_flocks(profile_files.size()); + if (!profile_files_flocks.Init(profile_files, &error)) { + LOG(WARNING) << "Could not lock profile files: " << error; + return false; + } + ScopedCollectionFlock reference_profile_files_flocks(reference_profile_files.size()); + if (!reference_profile_files_flocks.Init(reference_profile_files, &error)) { + LOG(WARNING) << "Could not lock reference profile files: " << error; + return false; + } + + return ProcessProfilesInternal(profile_files_flocks.Get(), + reference_profile_files_flocks.Get(), + profile_compilation_info); +} + } // namespace art diff --git a/compiler/profile_assistant.h b/compiler/profile_assistant.h index 088c8bd1c7..ad5e2163cf 100644 --- a/compiler/profile_assistant.h +++ b/compiler/profile_assistant.h @@ -20,6 +20,7 @@ #include <string> #include <vector> +#include "base/scoped_flock.h" #include "jit/offline_profiling_info.cc" namespace art { @@ -52,7 +53,17 @@ class ProfileAssistant { const std::vector<std::string>& reference_profile_files, /*out*/ ProfileCompilationInfo** profile_compilation_info); + static bool ProcessProfiles( + const std::vector<uint32_t>& profile_files_fd_, + const std::vector<uint32_t>& reference_profile_files_fd_, + /*out*/ ProfileCompilationInfo** profile_compilation_info); + private: + static bool ProcessProfilesInternal( + const std::vector<ScopedFlock>& profile_files, + const std::vector<ScopedFlock>& reference_profile_files, + /*out*/ ProfileCompilationInfo** profile_compilation_info); + DISALLOW_COPY_AND_ASSIGN(ProfileAssistant); }; diff --git a/compiler/profile_assistant_test.cc b/compiler/profile_assistant_test.cc new file mode 100644 index 0000000000..58b7513377 --- /dev/null +++ b/compiler/profile_assistant_test.cc @@ -0,0 +1,279 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include "base/unix_file/fd_file.h" +#include "common_runtime_test.h" +#include "compiler/profile_assistant.h" +#include "jit/offline_profiling_info.h" + +namespace art { + +class ProfileAssistantTest : public CommonRuntimeTest { + protected: + void SetupProfile(const std::string& id, + uint32_t checksum, + uint16_t number_of_methods, + const ScratchFile& profile, + ProfileCompilationInfo* info, + uint16_t start_method_index = 0) { + std::string dex_location1 = "location1" + id; + uint32_t dex_location_checksum1 = checksum; + std::string dex_location2 = "location2" + id; + uint32_t dex_location_checksum2 = 10 * checksum; + for (uint16_t i = start_method_index; i < start_method_index + number_of_methods; i++) { + ASSERT_TRUE(info->AddData(dex_location1, dex_location_checksum1, i)); + ASSERT_TRUE(info->AddData(dex_location2, dex_location_checksum2, i)); + } + ASSERT_TRUE(info->Save(GetFd(profile))); + ASSERT_EQ(0, profile.GetFile()->Flush()); + ASSERT_TRUE(profile.GetFile()->ResetOffset()); + } + + uint32_t GetFd(const ScratchFile& file) const { + return static_cast<uint32_t>(file.GetFd()); + } +}; + +TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) { + ScratchFile profile1; + ScratchFile profile2; + ScratchFile reference_profile1; + ScratchFile reference_profile2; + + std::vector<uint32_t> profile_fds({ + GetFd(profile1), + GetFd(profile2)}); + std::vector<uint32_t> reference_profile_fds({ + GetFd(reference_profile1), + GetFd(reference_profile2)}); + + const uint16_t kNumberOfMethodsToEnableCompilation = 100; + ProfileCompilationInfo info1; + SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1); + ProfileCompilationInfo info2; + SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2); + + // We should advise compilation. + ProfileCompilationInfo* result; + ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result)); + ASSERT_TRUE(result != nullptr); + + // The resulting compilation info must be equal to the merge of the inputs. + ProfileCompilationInfo expected; + ASSERT_TRUE(expected.Load(info1)); + ASSERT_TRUE(expected.Load(info2)); + ASSERT_TRUE(expected.Equals(*result)); + + // The information from profiles must be transfered to the reference profiles. + ProfileCompilationInfo file_info1; + ASSERT_TRUE(reference_profile1.GetFile()->ResetOffset()); + ASSERT_TRUE(file_info1.Load(GetFd(reference_profile1))); + ASSERT_TRUE(file_info1.Equals(info1)); + + ProfileCompilationInfo file_info2; + ASSERT_TRUE(reference_profile2.GetFile()->ResetOffset()); + ASSERT_TRUE(file_info2.Load(GetFd(reference_profile2))); + ASSERT_TRUE(file_info2.Equals(info2)); + + // Initial profiles must be cleared. + ASSERT_EQ(0, profile1.GetFile()->GetLength()); + ASSERT_EQ(0, profile2.GetFile()->GetLength()); +} + +TEST_F(ProfileAssistantTest, AdviseCompilationNonEmptyReferences) { + ScratchFile profile1; + ScratchFile profile2; + ScratchFile reference_profile1; + ScratchFile reference_profile2; + + std::vector<uint32_t> profile_fds({ + GetFd(profile1), + GetFd(profile2)}); + std::vector<uint32_t> reference_profile_fds({ + GetFd(reference_profile1), + GetFd(reference_profile2)}); + + // The new profile info will contain the methods with indices 0-100. + const uint16_t kNumberOfMethodsToEnableCompilation = 100; + ProfileCompilationInfo info1; + SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1); + ProfileCompilationInfo info2; + SetupProfile("p2", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2); + + + // The reference profile info will contain the methods with indices 50-150. + const uint16_t kNumberOfMethodsAlreadyCompiled = 100; + ProfileCompilationInfo reference_info1; + SetupProfile("p1", 1, kNumberOfMethodsAlreadyCompiled, reference_profile1, + &reference_info1, kNumberOfMethodsToEnableCompilation / 2); + ProfileCompilationInfo reference_info2; + SetupProfile("p2", 2, kNumberOfMethodsAlreadyCompiled, reference_profile2, + &reference_info2, kNumberOfMethodsToEnableCompilation / 2); + + // We should advise compilation. + ProfileCompilationInfo* result; + ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result)); + ASSERT_TRUE(result != nullptr); + + // The resulting compilation info must be equal to the merge of the inputs + ProfileCompilationInfo expected; + ASSERT_TRUE(expected.Load(info1)); + ASSERT_TRUE(expected.Load(info2)); + ASSERT_TRUE(expected.Load(reference_info1)); + ASSERT_TRUE(expected.Load(reference_info2)); + ASSERT_TRUE(expected.Equals(*result)); + + // The information from profiles must be transfered to the reference profiles. + ProfileCompilationInfo file_info1; + ProfileCompilationInfo merge1; + ASSERT_TRUE(merge1.Load(info1)); + ASSERT_TRUE(merge1.Load(reference_info1)); + ASSERT_TRUE(reference_profile1.GetFile()->ResetOffset()); + ASSERT_TRUE(file_info1.Load(GetFd(reference_profile1))); + ASSERT_TRUE(file_info1.Equals(merge1)); + + ProfileCompilationInfo file_info2; + ProfileCompilationInfo merge2; + ASSERT_TRUE(merge2.Load(info2)); + ASSERT_TRUE(merge2.Load(reference_info2)); + ASSERT_TRUE(reference_profile2.GetFile()->ResetOffset()); + ASSERT_TRUE(file_info2.Load(GetFd(reference_profile2))); + ASSERT_TRUE(file_info2.Equals(merge2)); + + // Initial profiles must be cleared. + ASSERT_EQ(0, profile1.GetFile()->GetLength()); + ASSERT_EQ(0, profile2.GetFile()->GetLength()); +} + +TEST_F(ProfileAssistantTest, DoNotAdviseCompilation) { + ScratchFile profile1; + ScratchFile profile2; + ScratchFile reference_profile1; + ScratchFile reference_profile2; + + std::vector<uint32_t> profile_fds({ + GetFd(profile1), + GetFd(profile2)}); + std::vector<uint32_t> reference_profile_fds({ + GetFd(reference_profile1), + GetFd(reference_profile2)}); + + const uint16_t kNumberOfMethodsToSkipCompilation = 1; + ProfileCompilationInfo info1; + SetupProfile("p1", 1, kNumberOfMethodsToSkipCompilation, profile1, &info1); + ProfileCompilationInfo info2; + SetupProfile("p2", 2, kNumberOfMethodsToSkipCompilation, profile2, &info2); + + // We should not advise compilation. + ProfileCompilationInfo* result = nullptr; + ASSERT_TRUE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result)); + ASSERT_TRUE(result == nullptr); + + // The information from profiles must remain the same. + ProfileCompilationInfo file_info1; + ASSERT_TRUE(profile1.GetFile()->ResetOffset()); + ASSERT_TRUE(file_info1.Load(GetFd(profile1))); + ASSERT_TRUE(file_info1.Equals(info1)); + + ProfileCompilationInfo file_info2; + ASSERT_TRUE(profile2.GetFile()->ResetOffset()); + ASSERT_TRUE(file_info2.Load(GetFd(profile2))); + ASSERT_TRUE(file_info2.Equals(info2)); + + // Reference profile files must remain empty. + ASSERT_EQ(0, reference_profile1.GetFile()->GetLength()); + ASSERT_EQ(0, reference_profile2.GetFile()->GetLength()); +} + +TEST_F(ProfileAssistantTest, FailProcessingBecauseOfProfiles) { + ScratchFile profile1; + ScratchFile profile2; + ScratchFile reference_profile1; + ScratchFile reference_profile2; + + std::vector<uint32_t> profile_fds({ + GetFd(profile1), + GetFd(profile2)}); + std::vector<uint32_t> reference_profile_fds({ + GetFd(reference_profile1), + GetFd(reference_profile2)}); + + const uint16_t kNumberOfMethodsToEnableCompilation = 100; + // Assign different hashes for the same dex file. This will make merging of information to fail. + ProfileCompilationInfo info1; + SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1); + ProfileCompilationInfo info2; + SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, profile2, &info2); + + // We should fail processing. + ProfileCompilationInfo* result = nullptr; + ASSERT_FALSE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result)); + ASSERT_TRUE(result == nullptr); + + // The information from profiles must still remain the same. + ProfileCompilationInfo file_info1; + ASSERT_TRUE(profile1.GetFile()->ResetOffset()); + ASSERT_TRUE(file_info1.Load(GetFd(profile1))); + ASSERT_TRUE(file_info1.Equals(info1)); + + ProfileCompilationInfo file_info2; + ASSERT_TRUE(profile2.GetFile()->ResetOffset()); + ASSERT_TRUE(file_info2.Load(GetFd(profile2))); + ASSERT_TRUE(file_info2.Equals(info2)); + + // Reference profile files must still remain empty. + ASSERT_EQ(0, reference_profile1.GetFile()->GetLength()); + ASSERT_EQ(0, reference_profile2.GetFile()->GetLength()); +} + +TEST_F(ProfileAssistantTest, FailProcessingBecauseOfReferenceProfiles) { + ScratchFile profile1; + ScratchFile reference_profile; + + std::vector<uint32_t> profile_fds({ + GetFd(profile1)}); + std::vector<uint32_t> reference_profile_fds({ + GetFd(reference_profile)}); + + const uint16_t kNumberOfMethodsToEnableCompilation = 100; + // Assign different hashes for the same dex file. This will make merging of information to fail. + ProfileCompilationInfo info1; + SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, profile1, &info1); + ProfileCompilationInfo reference_info; + SetupProfile("p1", 2, kNumberOfMethodsToEnableCompilation, reference_profile, &reference_info); + + // We should not advise compilation. + ProfileCompilationInfo* result = nullptr; + ASSERT_TRUE(profile1.GetFile()->ResetOffset()); + ASSERT_TRUE(reference_profile.GetFile()->ResetOffset()); + ASSERT_FALSE(ProfileAssistant::ProcessProfiles(profile_fds, reference_profile_fds, &result)); + ASSERT_TRUE(result == nullptr); + + // The information from profiles must still remain the same. + ProfileCompilationInfo file_info1; + ASSERT_TRUE(profile1.GetFile()->ResetOffset()); + ASSERT_TRUE(file_info1.Load(GetFd(profile1))); + ASSERT_TRUE(file_info1.Equals(info1)); + + ProfileCompilationInfo file_info2; + ASSERT_TRUE(reference_profile.GetFile()->ResetOffset()); + ASSERT_TRUE(file_info2.Load(GetFd(reference_profile))); + ASSERT_TRUE(file_info2.Equals(reference_info)); +} + +} // namespace art diff --git a/compiler/utils/test_dex_file_builder.h b/compiler/utils/test_dex_file_builder.h index b6a228c13c..e57a540669 100644 --- a/compiler/utils/test_dex_file_builder.h +++ b/compiler/utils/test_dex_file_builder.h @@ -21,6 +21,7 @@ #include <set> #include <map> #include <vector> +#include <zlib.h> #include "base/bit_utils.h" #include "base/logging.h" @@ -161,7 +162,6 @@ class TestDexFileBuilder { uint32_t total_size = data_section_offset + data_section_size; dex_file_data_.resize(total_size); - std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header)); for (const auto& entry : strings_) { CHECK_LT(entry.first.size(), 128u); @@ -210,7 +210,12 @@ class TestDexFileBuilder { Write32(raw_offset + 4u, GetStringIdx(entry.first.name)); } - // Leave checksum and signature as zeros. + // Leave signature as zeros. + + header->file_size_ = dex_file_data_.size(); + size_t skip = sizeof(header->magic_) + sizeof(header->checksum_); + header->checksum_ = adler32(0u, dex_file_data_.data() + skip, dex_file_data_.size() - skip); + std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header)); std::string error_msg; std::unique_ptr<const DexFile> dex_file(DexFile::Open( diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index d6caa3c338..7138a46890 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -186,6 +186,22 @@ void X86Assembler::bsrl(Register dst, const Address& src) { EmitOperand(dst, src); } +void X86Assembler::popcntl(Register dst, Register src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitUint8(0x0F); + EmitUint8(0xB8); + EmitRegisterOperand(dst, src); +} + +void X86Assembler::popcntl(Register dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitUint8(0x0F); + EmitUint8(0xB8); + EmitOperand(dst, src); +} + void X86Assembler::movzxb(Register dst, ByteRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 655af9c184..759a41e80e 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -330,11 +330,15 @@ class X86Assembler FINAL : public Assembler { void movntl(const Address& dst, Register src); void bswapl(Register dst); + void bsfl(Register dst, Register src); void bsfl(Register dst, const Address& src); void bsrl(Register dst, Register src); void bsrl(Register dst, const Address& src); + void popcntl(Register dst, Register src); + void popcntl(Register dst, const Address& src); + void rorl(Register reg, const Immediate& imm); void rorl(Register operand, Register shifter); void roll(Register reg, const Immediate& imm); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index a9b991c7a0..0fd098227a 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -260,6 +260,19 @@ TEST_F(AssemblerX86Test, BsrlAddress) { DriverStr(expected, "bsrl_address"); } +TEST_F(AssemblerX86Test, Popcntl) { + DriverStr(RepeatRR(&x86::X86Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl"); +} + +TEST_F(AssemblerX86Test, PopcntlAddress) { + GetAssembler()->popcntl(x86::Register(x86::EDI), x86::Address( + x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12)); + const char* expected = + "popcntl 0xc(%EDI,%EBX,4), %EDI\n"; + + DriverStr(expected, "popcntl_address"); +} + // Rorl only allows CL as the shift count. std::string rorl_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) { std::ostringstream str; diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index db072678ef..10f5a005e1 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -2247,6 +2247,42 @@ void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) { EmitOperand(dst.LowBits(), src); } +void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xB8); + EmitRegisterOperand(dst.LowBits(), src.LowBits()); +} + +void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xB8); + EmitOperand(dst.LowBits(), src); +} + +void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitRex64(dst, src); + EmitUint8(0x0F); + EmitUint8(0xB8); + EmitRegisterOperand(dst.LowBits(), src.LowBits()); +} + +void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitRex64(dst, src); + EmitUint8(0x0F); + EmitUint8(0xB8); + EmitOperand(dst.LowBits(), src); +} + void X86_64Assembler::repne_scasw() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 01d28e305d..6f0847eb61 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -647,6 +647,11 @@ class X86_64Assembler FINAL : public Assembler { void bsrq(CpuRegister dst, CpuRegister src); void bsrq(CpuRegister dst, const Address& src); + void popcntl(CpuRegister dst, CpuRegister src); + void popcntl(CpuRegister dst, const Address& src); + void popcntq(CpuRegister dst, CpuRegister src); + void popcntq(CpuRegister dst, const Address& src); + void rorl(CpuRegister reg, const Immediate& imm); void rorl(CpuRegister operand, CpuRegister shifter); void roll(CpuRegister reg, const Immediate& imm); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 00bb5ca36b..8a87fca96a 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1333,6 +1333,44 @@ TEST_F(AssemblerX86_64Test, BsrqAddress) { DriverStr(expected, "bsrq_address"); } +TEST_F(AssemblerX86_64Test, Popcntl) { + DriverStr(Repeatrr(&x86_64::X86_64Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl"); +} + +TEST_F(AssemblerX86_64Test, PopcntlAddress) { + GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::R10), x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); + GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address( + x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); + GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12)); + const char* expected = + "popcntl 0xc(%RDI,%RBX,4), %R10d\n" + "popcntl 0xc(%R10,%RBX,4), %edi\n" + "popcntl 0xc(%RDI,%R9,4), %edi\n"; + + DriverStr(expected, "popcntl_address"); +} + +TEST_F(AssemblerX86_64Test, Popcntq) { + DriverStr(RepeatRR(&x86_64::X86_64Assembler::popcntq, "popcntq %{reg2}, %{reg1}"), "popcntq"); +} + +TEST_F(AssemblerX86_64Test, PopcntqAddress) { + GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::R10), x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); + GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address( + x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); + GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12)); + const char* expected = + "popcntq 0xc(%RDI,%RBX,4), %R10\n" + "popcntq 0xc(%R10,%RBX,4), %RDI\n" + "popcntq 0xc(%RDI,%R9,4), %RDI\n"; + + DriverStr(expected, "popcntq_address"); +} + ///////////////// // Near labels // ///////////////// |