diff options
Diffstat (limited to 'compiler')
117 files changed, 3744 insertions, 2054 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index be963fbbdb..e1d382f6f4 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -63,6 +63,7 @@ art_cc_defaults { "optimizing/inliner.cc", "optimizing/instruction_builder.cc", "optimizing/instruction_simplifier.cc", + "optimizing/intrinsic_objects.cc", "optimizing/intrinsics.cc", "optimizing/licm.cc", "optimizing/linear_order.cc", @@ -160,6 +161,7 @@ art_cc_defaults { "utils/x86/assembler_x86.cc", "utils/x86/jni_macro_assembler_x86.cc", "utils/x86/managed_register_x86.cc", + "optimizing/instruction_simplifier_x86.cc", ], }, x86_64: { @@ -345,6 +347,7 @@ art_cc_test { "optimizing/parallel_move_test.cc", "optimizing/pretty_printer_test.cc", "optimizing/reference_type_propagation_test.cc", + "optimizing/select_generator_test.cc", "optimizing/side_effects_test.cc", "optimizing/ssa_liveness_analysis_test.cc", "optimizing/ssa_test.cc", diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index a7f16d394e..e8e1d408ef 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -16,6 +16,8 @@ #include "common_compiler_test.h" +#include <type_traits> + #include "arch/instruction_set_features.h" #include "art_field-inl.h" #include "art_method-inl.h" @@ -29,6 +31,7 @@ #include "dex/verification_results.h" #include "driver/compiler_driver.h" #include "driver/compiler_options.h" +#include "jni/java_vm_ext.h" #include "interpreter/interpreter.h" #include "mirror/class-inl.h" #include "mirror/class_loader.h" @@ -37,6 +40,7 @@ #include "oat_quick_method_header.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" +#include "utils/atomic_dex_ref_map-inl.h" namespace art { @@ -79,6 +83,7 @@ void CommonCompilerTest::MakeExecutable(ArtMethod* method) { const size_t size = method_info.size() + vmap_table.size() + sizeof(method_header) + code_size; chunk->reserve(size + max_padding); chunk->resize(sizeof(method_header)); + static_assert(std::is_trivially_copyable<OatQuickMethodHeader>::value, "Cannot use memcpy"); memcpy(&(*chunk)[0], &method_header, sizeof(method_header)); chunk->insert(chunk->begin(), vmap_table.begin(), vmap_table.end()); chunk->insert(chunk->begin(), method_info.begin(), method_info.end()); @@ -131,11 +136,10 @@ void CommonCompilerTest::MakeExecutable(ObjPtr<mirror::ClassLoader> class_loader } } -// Get the set of image classes given to the compiler-driver in SetUp. Note: the compiler -// driver assumes ownership of the set, so the test should properly release the set. -std::unordered_set<std::string>* CommonCompilerTest::GetImageClasses() { +// Get the set of image classes given to the compiler options in SetUp. +std::unique_ptr<HashSet<std::string>> CommonCompilerTest::GetImageClasses() { // Empty set: by default no classes are retained in the image. - return new std::unordered_set<std::string>(); + return std::make_unique<HashSet<std::string>>(); } // Get ProfileCompilationInfo that should be passed to the driver. @@ -149,11 +153,7 @@ void CommonCompilerTest::SetUp() { { ScopedObjectAccess soa(Thread::Current()); - const InstructionSet instruction_set = kRuntimeISA; - // Take the default set of instruction features from the build. - instruction_set_features_ = InstructionSetFeatures::FromCppDefines(); - - runtime_->SetInstructionSet(instruction_set); + runtime_->SetInstructionSet(instruction_set_); for (uint32_t i = 0; i < static_cast<uint32_t>(CalleeSaveType::kLastCalleeSaveType); ++i) { CalleeSaveType type = CalleeSaveType(i); if (!runtime_->HasCalleeSaveMethod(type)) { @@ -161,26 +161,51 @@ void CommonCompilerTest::SetUp() { } } - CreateCompilerDriver(compiler_kind_, instruction_set); + CreateCompilerDriver(); + } +} + +void CommonCompilerTest::ApplyInstructionSet() { + // Copy local instruction_set_ and instruction_set_features_ to *compiler_options_; + CHECK(instruction_set_features_ != nullptr); + if (instruction_set_ == InstructionSet::kThumb2) { + CHECK_EQ(InstructionSet::kArm, instruction_set_features_->GetInstructionSet()); + } else { + CHECK_EQ(instruction_set_, instruction_set_features_->GetInstructionSet()); + } + compiler_options_->instruction_set_ = instruction_set_; + compiler_options_->instruction_set_features_ = + InstructionSetFeatures::FromBitmap(instruction_set_, instruction_set_features_->AsBitmap()); + CHECK(compiler_options_->instruction_set_features_->Equals(instruction_set_features_.get())); +} + +void CommonCompilerTest::OverrideInstructionSetFeatures(InstructionSet instruction_set, + const std::string& variant) { + instruction_set_ = instruction_set; + std::string error_msg; + instruction_set_features_ = + InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg); + CHECK(instruction_set_features_ != nullptr) << error_msg; + + if (compiler_options_ != nullptr) { + ApplyInstructionSet(); } } -void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind, - InstructionSet isa, - size_t number_of_threads) { +void CommonCompilerTest::CreateCompilerDriver() { + ApplyInstructionSet(); + compiler_options_->boot_image_ = true; + compiler_options_->compile_pic_ = false; // Non-PIC boot image is a test configuration. compiler_options_->SetCompilerFilter(GetCompilerFilter()); + compiler_options_->image_classes_.swap(*GetImageClasses()); compiler_driver_.reset(new CompilerDriver(compiler_options_.get(), verification_results_.get(), - kind, - isa, - instruction_set_features_.get(), - GetImageClasses(), - number_of_threads, + compiler_kind_, + &compiler_options_->image_classes_, + number_of_threads_, /* swap_fd */ -1, GetProfileCompilationInfo())); - // We typically don't generate an image in unit tests, disable this optimization by default. - compiler_driver_->SetSupportBootImageFixup(false); } void CommonCompilerTest::SetUpRuntimeOptions(RuntimeOptions* options) { @@ -202,11 +227,6 @@ void CommonCompilerTest::SetCompilerKind(Compiler::Kind compiler_kind) { compiler_kind_ = compiler_kind; } -InstructionSet CommonCompilerTest::GetInstructionSet() const { - DCHECK(compiler_driver_.get() != nullptr); - return compiler_driver_->GetInstructionSet(); -} - void CommonCompilerTest::TearDown() { compiler_driver_.reset(); callbacks_.reset(); @@ -232,9 +252,49 @@ void CommonCompilerTest::CompileClass(mirror::ClassLoader* class_loader, const c void CommonCompilerTest::CompileMethod(ArtMethod* method) { CHECK(method != nullptr); - TimingLogger timings("CommonTest::CompileMethod", false, false); + TimingLogger timings("CommonCompilerTest::CompileMethod", false, false); TimingLogger::ScopedTiming t(__FUNCTION__, &timings); - compiler_driver_->CompileOne(Thread::Current(), method, &timings); + { + Thread* self = Thread::Current(); + jobject class_loader = self->GetJniEnv()->GetVm()->AddGlobalRef(self, method->GetClassLoader()); + + DCHECK(!Runtime::Current()->IsStarted()); + const DexFile* dex_file = method->GetDexFile(); + uint16_t class_def_idx = method->GetClassDefIndex(); + uint32_t method_idx = method->GetDexMethodIndex(); + uint32_t access_flags = method->GetAccessFlags(); + InvokeType invoke_type = method->GetInvokeType(); + StackHandleScope<2> hs(self); + Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache())); + Handle<mirror::ClassLoader> h_class_loader = hs.NewHandle( + self->DecodeJObject(class_loader)->AsClassLoader()); + const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset()); + + std::vector<const DexFile*> dex_files; + dex_files.push_back(dex_file); + + // Go to native so that we don't block GC during compilation. + ScopedThreadSuspension sts(self, kNative); + + compiler_driver_->InitializeThreadPools(); + + compiler_driver_->PreCompile(class_loader, dex_files, &timings); + + compiler_driver_->CompileOne(self, + class_loader, + *dex_file, + class_def_idx, + method_idx, + access_flags, + invoke_type, + code_item, + dex_cache, + h_class_loader); + + compiler_driver_->FreeThreadPools(); + + self->GetJniEnv()->DeleteGlobalRef(class_loader); + } TimingLogger::ScopedTiming t2("MakeExecutable", &timings); MakeExecutable(method); } @@ -288,4 +348,14 @@ void CommonCompilerTest::UnreserveImageSpace() { image_reservation_.reset(); } +void CommonCompilerTest::SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files) { + compiler_options_->dex_files_for_oat_file_ = dex_files; + compiler_driver_->compiled_classes_.AddDexFiles(dex_files); + compiler_driver_->dex_to_dex_compiler_.SetDexFiles(dex_files); +} + +void CommonCompilerTest::ClearBootImageOption() { + compiler_options_->boot_image_ = false; +} + } // namespace art diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index 39c8bd817b..db38110400 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -18,9 +18,11 @@ #define ART_COMPILER_COMMON_COMPILER_TEST_H_ #include <list> -#include <unordered_set> #include <vector> +#include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" +#include "base/hash_set.h" #include "common_runtime_test.h" #include "compiler.h" #include "oat_file.h" @@ -33,6 +35,7 @@ class ClassLoader; class CompilerDriver; class CompilerOptions; class CumulativeLogger; +class DexFile; class ProfileCompilationInfo; class VerificationResults; @@ -54,18 +57,15 @@ class CommonCompilerTest : public CommonRuntimeTest { REQUIRES_SHARED(Locks::mutator_lock_); protected: - virtual void SetUp(); + void SetUp() OVERRIDE; - virtual void SetUpRuntimeOptions(RuntimeOptions* options); + void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE; Compiler::Kind GetCompilerKind() const; void SetCompilerKind(Compiler::Kind compiler_kind); - InstructionSet GetInstructionSet() const; - - // Get the set of image classes given to the compiler-driver in SetUp. Note: the compiler - // driver assumes ownership of the set, so the test should properly release the set. - virtual std::unordered_set<std::string>* GetImageClasses(); + // Get the set of image classes given to the compiler-driver in SetUp. + virtual std::unique_ptr<HashSet<std::string>> GetImageClasses(); virtual ProfileCompilationInfo* GetProfileCompilationInfo(); @@ -73,7 +73,7 @@ class CommonCompilerTest : public CommonRuntimeTest { return CompilerFilter::kDefaultCompilerFilter; } - virtual void TearDown(); + void TearDown() OVERRIDE; void CompileClass(mirror::ClassLoader* class_loader, const char* class_name) REQUIRES_SHARED(Locks::mutator_lock_); @@ -88,18 +88,31 @@ class CommonCompilerTest : public CommonRuntimeTest { const char* method_name, const char* signature) REQUIRES_SHARED(Locks::mutator_lock_); - void CreateCompilerDriver(Compiler::Kind kind, InstructionSet isa, size_t number_of_threads = 2U); + void ApplyInstructionSet(); + void OverrideInstructionSetFeatures(InstructionSet instruction_set, const std::string& variant); + + void CreateCompilerDriver(); void ReserveImageSpace(); void UnreserveImageSpace(); + void SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files); + + void ClearBootImageOption(); + Compiler::Kind compiler_kind_ = Compiler::kOptimizing; + size_t number_of_threads_ = 2u; + + InstructionSet instruction_set_ = + (kRuntimeISA == InstructionSet::kArm) ? InstructionSet::kThumb2 : kRuntimeISA; + // Take the default set of instruction features from the build. + std::unique_ptr<const InstructionSetFeatures> instruction_set_features_ + = InstructionSetFeatures::FromCppDefines(); + std::unique_ptr<CompilerOptions> compiler_options_; std::unique_ptr<VerificationResults> verification_results_; std::unique_ptr<CompilerDriver> compiler_driver_; - std::unique_ptr<const InstructionSetFeatures> instruction_set_features_; - private: std::unique_ptr<MemMap> image_reservation_; diff --git a/compiler/compiler.h b/compiler/compiler.h index f2ec3a9fa3..ef3d87f02b 100644 --- a/compiler/compiler.h +++ b/compiler/compiler.h @@ -39,12 +39,6 @@ template<class T> class Handle; class OatWriter; class Thread; -enum class CopyOption { - kNever, - kAlways, - kOnlyIfCompressed -}; - class Compiler { public: enum Kind { diff --git a/compiler/debug/dwarf/dwarf_test.h b/compiler/debug/dwarf/dwarf_test.h index 9a7c604ca1..6b039a7b5f 100644 --- a/compiler/debug/dwarf/dwarf_test.h +++ b/compiler/debug/dwarf/dwarf_test.h @@ -28,7 +28,7 @@ #include "base/os.h" #include "base/unix_file/fd_file.h" -#include "common_runtime_test.h" +#include "common_compiler_test.h" #include "gtest/gtest.h" #include "linker/elf_builder.h" #include "linker/file_output_stream.h" @@ -39,7 +39,7 @@ namespace dwarf { #define DW_CHECK(substring) Check(substring, false, __FILE__, __LINE__) #define DW_CHECK_NEXT(substring) Check(substring, true, __FILE__, __LINE__) -class DwarfTest : public CommonRuntimeTest { +class DwarfTest : public CommonCompilerTest { public: static constexpr bool kPrintObjdumpOutput = false; // debugging. diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h index f2002a0af6..bda7108c74 100644 --- a/compiler/debug/elf_debug_info_writer.h +++ b/compiler/debug/elf_debug_info_writer.h @@ -208,10 +208,8 @@ class ElfCompilationUnitWriter { std::vector<DexRegisterMap> dex_reg_maps; if (accessor.HasCodeItem() && mi->code_info != nullptr) { code_info.reset(new CodeInfo(mi->code_info)); - for (size_t s = 0; s < code_info->GetNumberOfStackMaps(); ++s) { - const StackMap stack_map = code_info->GetStackMapAt(s); - dex_reg_maps.push_back(code_info->GetDexRegisterMapOf( - stack_map, accessor.RegistersSize())); + for (StackMap stack_map : code_info->GetStackMaps()) { + dex_reg_maps.push_back(code_info->GetDexRegisterMapOf(stack_map)); } } diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h index a7adab5506..3d78943cd0 100644 --- a/compiler/debug/elf_debug_line_writer.h +++ b/compiler/debug/elf_debug_line_writer.h @@ -101,9 +101,7 @@ class ElfDebugLineWriter { // Use stack maps to create mapping table from pc to dex. const CodeInfo code_info(mi->code_info); pc2dex_map.reserve(code_info.GetNumberOfStackMaps()); - for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) { - StackMap stack_map = code_info.GetStackMapAt(s); - DCHECK(stack_map.IsValid()); + for (StackMap stack_map : code_info.GetStackMaps()) { const uint32_t pc = stack_map.GetNativePcOffset(isa); const int32_t dex = stack_map.GetDexPc(); pc2dex_map.push_back({pc, dex}); diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h index 8cb4e55bbc..b663291b4d 100644 --- a/compiler/debug/elf_debug_loc_writer.h +++ b/compiler/debug/elf_debug_loc_writer.h @@ -147,11 +147,11 @@ static std::vector<VariableLocation> GetVariableLocations( DexRegisterLocation reg_hi = DexRegisterLocation::None(); DCHECK_LT(stack_map_index, dex_register_maps.size()); DexRegisterMap dex_register_map = dex_register_maps[stack_map_index]; - DCHECK(dex_register_map.IsValid()); + DCHECK(!dex_register_map.empty()); CodeItemDataAccessor accessor(*method_info->dex_file, method_info->code_item); - reg_lo = dex_register_map.GetDexRegisterLocation(vreg); + reg_lo = dex_register_map[vreg]; if (is64bitValue) { - reg_hi = dex_register_map.GetDexRegisterLocation(vreg + 1); + reg_hi = dex_register_map[vreg + 1]; } // Add location entry for this address range. diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc index fb6a72b1c5..fcaa0cdd07 100644 --- a/compiler/dex/dex_to_dex_compiler.cc +++ b/compiler/dex/dex_to_dex_compiler.cc @@ -31,6 +31,7 @@ #include "dex/dex_instruction-inl.h" #include "dex_to_dex_decompiler.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "mirror/dex_cache.h" #include "quicken_info.h" @@ -609,7 +610,7 @@ CompiledMethod* DexToDexCompiler::CompileMethod( } // Create a `CompiledMethod`, with the quickened information in the vmap table. - InstructionSet instruction_set = driver_->GetInstructionSet(); + InstructionSet instruction_set = driver_->GetCompilerOptions().GetInstructionSet(); if (instruction_set == InstructionSet::kThumb2) { // Don't use the thumb2 instruction set to avoid the one off code delta. instruction_set = InstructionSet::kArm; diff --git a/compiler/dex/dex_to_dex_decompiler_test.cc b/compiler/dex/dex_to_dex_decompiler_test.cc index 1fe42ad531..4f83d605a3 100644 --- a/compiler/dex/dex_to_dex_decompiler_test.cc +++ b/compiler/dex/dex_to_dex_decompiler_test.cc @@ -16,6 +16,7 @@ #include "dex_to_dex_decompiler.h" +#include "base/casts.h" #include "class_linker.h" #include "common_compiler_test.h" #include "compiled_method-inl.h" @@ -26,6 +27,7 @@ #include "driver/compiler_options.h" #include "handle_scope-inl.h" #include "mirror/class_loader.h" +#include "quick_compiler_callbacks.h" #include "runtime.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" @@ -43,9 +45,9 @@ class DexToDexDecompilerTest : public CommonCompilerTest { compiler_options_->SetCompilerFilter(CompilerFilter::kQuicken); // Create the main VerifierDeps, here instead of in the compiler since we want to aggregate // the results for all the dex files, not just the results for the current dex file. - Runtime::Current()->GetCompilerCallbacks()->SetVerifierDeps( + down_cast<QuickCompilerCallbacks*>(Runtime::Current()->GetCompilerCallbacks())->SetVerifierDeps( new verifier::VerifierDeps(GetDexFiles(class_loader))); - compiler_driver_->SetDexFilesForOatFile(GetDexFiles(class_loader)); + SetDexFilesForOatFile(GetDexFiles(class_loader)); compiler_driver_->CompileAll(class_loader, GetDexFiles(class_loader), &timings); } @@ -82,9 +84,8 @@ class DexToDexDecompilerTest : public CommonCompilerTest { ASSERT_NE(0, cmp); // Unquicken the dex file. - for (uint32_t i = 0; i < updated_dex_file->NumClassDefs(); ++i) { + for (ClassAccessor accessor : updated_dex_file->GetClasses()) { // Unquicken each method. - ClassAccessor accessor(*updated_dex_file, updated_dex_file->GetClassDef(i)); for (const ClassAccessor::Method& method : accessor.GetMethods()) { CompiledMethod* compiled_method = compiler_driver_->GetCompiledMethod( method.GetReference()); diff --git a/compiler/dex/inline_method_analyser.cc b/compiler/dex/inline_method_analyser.cc index dc044c1210..fe8b766d0f 100644 --- a/compiler/dex/inline_method_analyser.cc +++ b/compiler/dex/inline_method_analyser.cc @@ -724,7 +724,8 @@ bool InlineMethodAnalyser::ComputeSpecialAccessorInfo(ArtMethod* method, return false; } DCHECK_GE(field->GetOffset().Int32Value(), 0); - // Do not interleave function calls with bit field writes to placate valgrind. Bug: 27552451. + // Historical note: We made sure not to interleave function calls with bit field writes to + // placate Valgrind. Bug: 27552451. uint32_t field_offset = field->GetOffset().Uint32Value(); bool is_volatile = field->IsVolatile(); result->field_idx = field_idx; diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc index aa8277edb4..d56b135aca 100644 --- a/compiler/driver/compiled_method_storage.cc +++ b/compiler/driver/compiled_method_storage.cc @@ -21,6 +21,7 @@ #include <android-base/logging.h> +#include "base/data_hash.h" #include "base/utils.h" #include "compiled_method.h" #include "linker/linker_patch.h" @@ -80,65 +81,7 @@ class CompiledMethodStorage::DedupeHashFunc { public: size_t operator()(const ArrayRef<ContentType>& array) const { - const uint8_t* data = reinterpret_cast<const uint8_t*>(array.data()); - // TODO: More reasonable assertion. - // static_assert(IsPowerOfTwo(sizeof(ContentType)), - // "ContentType is not power of two, don't know whether array layout is as assumed"); - uint32_t len = sizeof(ContentType) * array.size(); - if (kUseMurmur3Hash) { - static constexpr uint32_t c1 = 0xcc9e2d51; - static constexpr uint32_t c2 = 0x1b873593; - static constexpr uint32_t r1 = 15; - static constexpr uint32_t r2 = 13; - static constexpr uint32_t m = 5; - static constexpr uint32_t n = 0xe6546b64; - - uint32_t hash = 0; - - const int nblocks = len / 4; - typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t; - const unaligned_uint32_t *blocks = reinterpret_cast<const uint32_t*>(data); - int i; - for (i = 0; i < nblocks; i++) { - uint32_t k = blocks[i]; - k *= c1; - k = (k << r1) | (k >> (32 - r1)); - k *= c2; - - hash ^= k; - hash = ((hash << r2) | (hash >> (32 - r2))) * m + n; - } - - const uint8_t *tail = reinterpret_cast<const uint8_t*>(data + nblocks * 4); - uint32_t k1 = 0; - - switch (len & 3) { - case 3: - k1 ^= tail[2] << 16; - FALLTHROUGH_INTENDED; - case 2: - k1 ^= tail[1] << 8; - FALLTHROUGH_INTENDED; - case 1: - k1 ^= tail[0]; - - k1 *= c1; - k1 = (k1 << r1) | (k1 >> (32 - r1)); - k1 *= c2; - hash ^= k1; - } - - hash ^= len; - hash ^= (hash >> 16); - hash *= 0x85ebca6b; - hash ^= (hash >> 13); - hash *= 0xc2b2ae35; - hash ^= (hash >> 16); - - return hash; - } else { - return HashBytes(data, len); - } + return DataHash()(array); } }; diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc index 42fbba5109..aed04f9c75 100644 --- a/compiler/driver/compiled_method_storage_test.cc +++ b/compiler/driver/compiled_method_storage_test.cc @@ -31,8 +31,6 @@ TEST(CompiledMethodStorage, Deduplicate) { CompilerDriver driver(&compiler_options, &verification_results, Compiler::kOptimizing, - /* instruction_set_ */ InstructionSet::kNone, - /* instruction_set_features */ nullptr, /* image_classes */ nullptr, /* thread_count */ 1u, /* swap_fd */ -1, diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 653e9edb45..7e6fdaf633 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -67,7 +67,6 @@ #include "mirror/object-refvisitor-inl.h" #include "mirror/object_array-inl.h" #include "mirror/throwable.h" -#include "nativehelper/ScopedLocalRef.h" #include "object_lock.h" #include "profile/profile_compilation_info.h" #include "runtime.h" @@ -262,9 +261,7 @@ CompilerDriver::CompilerDriver( const CompilerOptions* compiler_options, VerificationResults* verification_results, Compiler::Kind compiler_kind, - InstructionSet instruction_set, - const InstructionSetFeatures* instruction_set_features, - std::unordered_set<std::string>* image_classes, + HashSet<std::string>* image_classes, size_t thread_count, int swap_fd, const ProfileCompilationInfo* profile_compilation_info) @@ -272,18 +269,14 @@ CompilerDriver::CompilerDriver( verification_results_(verification_results), compiler_(Compiler::Create(this, compiler_kind)), compiler_kind_(compiler_kind), - instruction_set_( - instruction_set == InstructionSet::kArm ? InstructionSet::kThumb2 : instruction_set), - instruction_set_features_(instruction_set_features), requires_constructor_barrier_lock_("constructor barrier lock"), non_relative_linker_patch_count_(0u), - image_classes_(image_classes), + image_classes_(std::move(image_classes)), number_of_soft_verifier_failures_(0), had_hard_verifier_failure_(false), parallel_thread_count_(thread_count), stats_(new AOTCompilationStats), compiler_context_(nullptr), - support_boot_image_fixup_(true), compiled_method_storage_(swap_fd), profile_compilation_info_(profile_compilation_info), max_arena_alloc_(0), @@ -293,7 +286,7 @@ CompilerDriver::CompilerDriver( compiler_->Init(); if (GetCompilerOptions().IsBootImage()) { - CHECK(image_classes_.get() != nullptr) << "Expected image classes for boot image"; + CHECK(image_classes_ != nullptr) << "Expected image classes for boot image"; } compiled_method_storage_.SetDedupeEnabled(compiler_options_->DeduplicateCode()); @@ -310,13 +303,15 @@ CompilerDriver::~CompilerDriver() { } -#define CREATE_TRAMPOLINE(type, abi, offset) \ - if (Is64BitInstructionSet(instruction_set_)) { \ - return CreateTrampoline64(instruction_set_, abi, \ - type ## _ENTRYPOINT_OFFSET(PointerSize::k64, offset)); \ - } else { \ - return CreateTrampoline32(instruction_set_, abi, \ - type ## _ENTRYPOINT_OFFSET(PointerSize::k32, offset)); \ +#define CREATE_TRAMPOLINE(type, abi, offset) \ + if (Is64BitInstructionSet(GetCompilerOptions().GetInstructionSet())) { \ + return CreateTrampoline64(GetCompilerOptions().GetInstructionSet(), \ + abi, \ + type ## _ENTRYPOINT_OFFSET(PointerSize::k64, offset)); \ + } else { \ + return CreateTrampoline32(GetCompilerOptions().GetInstructionSet(), \ + abi, \ + type ## _ENTRYPOINT_OFFSET(PointerSize::k32, offset)); \ } std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateJniDlsymLookup() const { @@ -351,12 +346,6 @@ void CompilerDriver::CompileAll(jobject class_loader, InitializeThreadPools(); - VLOG(compiler) << "Before precompile " << GetMemoryUsageString(false); - // Precompile: - // 1) Load image classes - // 2) Resolve all classes - // 3) Attempt to verify all classes - // 4) Attempt to initialize image classes, and trivially initialized classes PreCompile(class_loader, dex_files, timings); if (GetCompilerOptions().IsBootImage()) { // We don't need to setup the intrinsics for non boot image compilation, as @@ -608,7 +597,7 @@ static void CompileMethodQuick( if ((access_flags & kAccNative) != 0) { // Are we extracting only and have support for generic JNI down calls? if (!driver->GetCompilerOptions().IsJniCompilationEnabled() && - InstructionSetHasGenericJniStub(driver->GetInstructionSet())) { + InstructionSetHasGenericJniStub(driver->GetCompilerOptions().GetInstructionSet())) { // Leaving this empty will trigger the generic JNI version } else { // Query any JNI optimization annotations such as @FastNative or @CriticalNative. @@ -673,46 +662,24 @@ static void CompileMethodQuick( quick_fn); } -void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings) { - DCHECK(!Runtime::Current()->IsStarted()); - jobject jclass_loader; - const DexFile* dex_file; - uint16_t class_def_idx; - uint32_t method_idx = method->GetDexMethodIndex(); - uint32_t access_flags = method->GetAccessFlags(); - InvokeType invoke_type = method->GetInvokeType(); - StackHandleScope<2> hs(self); - Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache())); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(method->GetDeclaringClass()->GetClassLoader())); - { - ScopedObjectAccessUnchecked soa(self); - ScopedLocalRef<jobject> local_class_loader( - soa.Env(), soa.AddLocalReference<jobject>(class_loader.Get())); - jclass_loader = soa.Env()->NewGlobalRef(local_class_loader.get()); - // Find the dex_file - dex_file = method->GetDexFile(); - class_def_idx = method->GetClassDefIndex(); - } - const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset()); - - // Go to native so that we don't block GC during compilation. - ScopedThreadSuspension sts(self, kNative); - - std::vector<const DexFile*> dex_files; - dex_files.push_back(dex_file); - - InitializeThreadPools(); - - PreCompile(jclass_loader, dex_files, timings); - +// Compile a single Method. (For testing only.) +void CompilerDriver::CompileOne(Thread* self, + jobject class_loader, + const DexFile& dex_file, + uint16_t class_def_idx, + uint32_t method_idx, + uint32_t access_flags, + InvokeType invoke_type, + const DexFile::CodeItem* code_item, + Handle<mirror::DexCache> dex_cache, + Handle<mirror::ClassLoader> h_class_loader) { // Can we run DEX-to-DEX compiler on this class ? optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level = GetDexToDexCompilationLevel(self, *this, - jclass_loader, - *dex_file, - dex_file->GetClassDef(class_def_idx)); + class_loader, + dex_file, + dex_file.GetClassDef(class_def_idx)); CompileMethodQuick(self, this, @@ -721,8 +688,8 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t invoke_type, class_def_idx, method_idx, - class_loader, - *dex_file, + h_class_loader, + dex_file, dex_to_dex_compilation_level, true, dex_cache); @@ -737,17 +704,13 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t invoke_type, class_def_idx, method_idx, - class_loader, - *dex_file, + h_class_loader, + dex_file, dex_to_dex_compilation_level, true, dex_cache); dex_to_dex_compiler_.ClearState(); } - - FreeThreadPools(); - - self->GetJniEnv()->DeleteGlobalRef(jclass_loader); } void CompilerDriver::Resolve(jobject class_loader, @@ -838,7 +801,7 @@ static void InitializeTypeCheckBitstrings(CompilerDriver* driver, // primitive) classes. We may reconsider this in future if it's deemed to be beneficial. // And we cannot use it for classes outside the boot image as we do not know the runtime // value of their bitstring when compiling (it may not even get assigned at runtime). - if (descriptor[0] == 'L' && driver->IsImageClass(descriptor)) { + if (descriptor[0] == 'L' && driver->GetCompilerOptions().IsImageClass(descriptor)) { ObjPtr<mirror::Class> klass = class_linker->LookupResolvedType(type_index, dex_cache.Get(), @@ -919,6 +882,20 @@ void CompilerDriver::PreCompile(jobject class_loader, TimingLogger* timings) { CheckThreadPools(); + VLOG(compiler) << "Before precompile " << GetMemoryUsageString(false); + + compiled_classes_.AddDexFiles(GetCompilerOptions().GetDexFilesForOatFile()); + dex_to_dex_compiler_.SetDexFiles(GetCompilerOptions().GetDexFilesForOatFile()); + + // Precompile: + // 1) Load image classes. + // 2) Resolve all classes. + // 3) For deterministic boot image, resolve strings for const-string instructions. + // 4) Attempt to verify all classes. + // 5) Attempt to initialize image classes, and trivially initialized classes. + // 6) Update the set of image classes. + // 7) For deterministic boot image, initialize bitstrings for type checking. + LoadImageClasses(timings); VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false); @@ -988,21 +965,11 @@ void CompilerDriver::PreCompile(jobject class_loader, } } -bool CompilerDriver::IsImageClass(const char* descriptor) const { - if (image_classes_ != nullptr) { - // If we have a set of image classes, use those. - return image_classes_->find(descriptor) != image_classes_->end(); - } - // No set of image classes, assume we include all the classes. - // NOTE: Currently only reachable from InitImageMethodVisitor for the app image case. - return !GetCompilerOptions().IsBootImage(); -} - bool CompilerDriver::IsClassToCompile(const char* descriptor) const { if (classes_to_compile_ == nullptr) { return true; } - return classes_to_compile_->find(descriptor) != classes_to_compile_->end(); + return classes_to_compile_->find(StringPiece(descriptor)) != classes_to_compile_->end(); } bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_ref) const { @@ -1091,7 +1058,7 @@ class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor { class RecordImageClassesVisitor : public ClassVisitor { public: - explicit RecordImageClassesVisitor(std::unordered_set<std::string>* image_classes) + explicit RecordImageClassesVisitor(HashSet<std::string>* image_classes) : image_classes_(image_classes) {} bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { @@ -1101,7 +1068,7 @@ class RecordImageClassesVisitor : public ClassVisitor { } private: - std::unordered_set<std::string>* const image_classes_; + HashSet<std::string>* const image_classes_; }; // Make a list of descriptors for classes to include in the image @@ -1116,7 +1083,7 @@ void CompilerDriver::LoadImageClasses(TimingLogger* timings) { Thread* self = Thread::Current(); ScopedObjectAccess soa(self); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - CHECK(image_classes_.get() != nullptr); + CHECK(image_classes_ != nullptr); for (auto it = image_classes_->begin(), end = image_classes_->end(); it != end;) { const std::string& descriptor(*it); StackHandleScope<1> hs(self); @@ -1124,7 +1091,7 @@ void CompilerDriver::LoadImageClasses(TimingLogger* timings) { hs.NewHandle(class_linker->FindSystemClass(self, descriptor.c_str()))); if (klass == nullptr) { VLOG(compiler) << "Failed to find class " << descriptor; - image_classes_->erase(it++); + it = image_classes_->erase(it); self->ClearException(); } else { ++it; @@ -1174,15 +1141,15 @@ void CompilerDriver::LoadImageClasses(TimingLogger* timings) { // We walk the roots looking for classes so that we'll pick up the // above classes plus any classes them depend on such super // classes, interfaces, and the required ClassLinker roots. - RecordImageClassesVisitor visitor(image_classes_.get()); + RecordImageClassesVisitor visitor(image_classes_); class_linker->VisitClasses(&visitor); - CHECK_NE(image_classes_->size(), 0U); + CHECK(!image_classes_->empty()); } static void MaybeAddToImageClasses(Thread* self, ObjPtr<mirror::Class> klass, - std::unordered_set<std::string>* image_classes) + HashSet<std::string>* image_classes) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK_EQ(self, Thread::Current()); StackHandleScope<1> hs(self); @@ -1190,11 +1157,10 @@ static void MaybeAddToImageClasses(Thread* self, const PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); while (!klass->IsObjectClass()) { const char* descriptor = klass->GetDescriptor(&temp); - std::pair<std::unordered_set<std::string>::iterator, bool> result = - image_classes->insert(descriptor); - if (!result.second) { // Previously inserted. - break; + if (image_classes->find(StringPiece(descriptor)) != image_classes->end()) { + break; // Previously inserted. } + image_classes->insert(descriptor); VLOG(compiler) << "Adding " << descriptor << " to image classes"; for (size_t i = 0, num_interfaces = klass->NumDirectInterfaces(); i != num_interfaces; ++i) { ObjPtr<mirror::Class> interface = mirror::Class::GetDirectInterface(self, klass, i); @@ -1216,7 +1182,7 @@ static void MaybeAddToImageClasses(Thread* self, class ClinitImageUpdate { public: static ClinitImageUpdate* Create(VariableSizedHandleScope& hs, - std::unordered_set<std::string>* image_class_descriptors, + HashSet<std::string>* image_class_descriptors, Thread* self, ClassLinker* linker) { std::unique_ptr<ClinitImageUpdate> res(new ClinitImageUpdate(hs, @@ -1273,7 +1239,7 @@ class ClinitImageUpdate { bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { std::string temp; - const char* name = klass->GetDescriptor(&temp); + StringPiece name(klass->GetDescriptor(&temp)); if (data_->image_class_descriptors_->find(name) != data_->image_class_descriptors_->end()) { data_->image_classes_.push_back(hs_.NewHandle(klass)); } else { @@ -1292,7 +1258,7 @@ class ClinitImageUpdate { }; ClinitImageUpdate(VariableSizedHandleScope& hs, - std::unordered_set<std::string>* image_class_descriptors, + HashSet<std::string>* image_class_descriptors, Thread* self, ClassLinker* linker) REQUIRES_SHARED(Locks::mutator_lock_) : hs_(hs), @@ -1339,7 +1305,7 @@ class ClinitImageUpdate { VariableSizedHandleScope& hs_; mutable std::vector<Handle<mirror::Class>> to_insert_; mutable std::unordered_set<mirror::Object*> marked_objects_; - std::unordered_set<std::string>* const image_class_descriptors_; + HashSet<std::string>* const image_class_descriptors_; std::vector<Handle<mirror::Class>> image_classes_; Thread* const self_; const char* old_cause_; @@ -1359,7 +1325,7 @@ void CompilerDriver::UpdateImageClasses(TimingLogger* timings) { VariableSizedHandleScope hs(Thread::Current()); std::string error_msg; std::unique_ptr<ClinitImageUpdate> update(ClinitImageUpdate::Create(hs, - image_classes_.get(), + image_classes_, Thread::Current(), runtime->GetClassLinker())); @@ -1383,7 +1349,7 @@ bool CompilerDriver::CanAssumeClassIsLoaded(mirror::Class* klass) { } std::string temp; const char* descriptor = klass->GetDescriptor(&temp); - return IsImageClass(descriptor); + return GetCompilerOptions().IsImageClass(descriptor); } bool CompilerDriver::CanAccessTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class, @@ -1685,16 +1651,14 @@ static void CheckAndClearResolveException(Thread* self) bool CompilerDriver::RequiresConstructorBarrier(const DexFile& dex_file, uint16_t class_def_idx) const { - ClassAccessor accessor(dex_file, dex_file.GetClassDef(class_def_idx)); - bool has_is_final = false; + ClassAccessor accessor(dex_file, class_def_idx); // We require a constructor barrier if there are final instance fields. - accessor.VisitFields(/*static*/ VoidFunctor(), - [&](const ClassAccessor::Field& field) { + for (const ClassAccessor::Field& field : accessor.GetInstanceFields()) { if (field.IsFinal()) { - has_is_final = true; + return true; } - }); - return has_is_final; + } + return false; } class ResolveClassFieldsAndMethodsVisitor : public CompilationVisitor { @@ -1744,7 +1708,7 @@ class ResolveClassFieldsAndMethodsVisitor : public CompilationVisitor { // fields are assigned within the lock held for class initialization. bool requires_constructor_barrier = false; - ClassAccessor accessor(dex_file, class_def); + ClassAccessor accessor(dex_file, class_def_index); // Optionally resolve fields and methods and figure out if we need a constructor barrier. auto method_visitor = [&](const ClassAccessor::Method& method) REQUIRES_SHARED(Locks::mutator_lock_) { @@ -1926,13 +1890,12 @@ bool CompilerDriver::FastVerify(jobject jclass_loader, // Fetch the list of unverified classes. const std::set<dex::TypeIndex>& unverified_classes = verifier_deps->GetUnverifiedClasses(*dex_file); - uint32_t class_def_idx = 0u; for (ClassAccessor accessor : dex_file->GetClasses()) { if (unverified_classes.find(accessor.GetClassIdx()) == unverified_classes.end()) { if (compiler_only_verifies) { // Just update the compiled_classes_ map. The compiler doesn't need to resolve // the type. - ClassReference ref(dex_file, class_def_idx); + ClassReference ref(dex_file, accessor.GetClassDefIndex()); const ClassStatus existing = ClassStatus::kNotReady; ClassStateTable::InsertResult result = compiled_classes_.Insert(ref, existing, ClassStatus::kVerified); @@ -1959,7 +1922,6 @@ bool CompilerDriver::FastVerify(jobject jclass_loader, class_loader, soa.Self()); } - ++class_def_idx; } } return true; @@ -1986,7 +1948,8 @@ void CompilerDriver::Verify(jobject jclass_loader, // Create per-thread VerifierDeps to avoid contention on the main one. // We will merge them after verification. for (ThreadPoolWorker* worker : parallel_thread_pool_->GetWorkers()) { - worker->GetThread()->SetVerifierDeps(new verifier::VerifierDeps(dex_files_for_oat_file_)); + worker->GetThread()->SetVerifierDeps( + new verifier::VerifierDeps(GetCompilerOptions().GetDexFilesForOatFile())); } } @@ -2011,7 +1974,7 @@ void CompilerDriver::Verify(jobject jclass_loader, for (ThreadPoolWorker* worker : parallel_thread_pool_->GetWorkers()) { verifier::VerifierDeps* thread_deps = worker->GetThread()->GetVerifierDeps(); worker->GetThread()->SetVerifierDeps(nullptr); - verifier_deps->MergeWith(*thread_deps, dex_files_for_oat_file_); + verifier_deps->MergeWith(*thread_deps, GetCompilerOptions().GetDexFilesForOatFile()); delete thread_deps; } Thread::Current()->SetVerifierDeps(nullptr); @@ -2179,8 +2142,9 @@ class SetVerifiedClassVisitor : public CompilationVisitor { mirror::Class::SetStatus(klass, ClassStatus::kVerified, soa.Self()); // Mark methods as pre-verified. If we don't do this, the interpreter will run with // access checks. - klass->SetSkipAccessChecksFlagOnAllMethods( - GetInstructionSetPointerSize(manager_->GetCompiler()->GetInstructionSet())); + InstructionSet instruction_set = + manager_->GetCompiler()->GetCompilerOptions().GetInstructionSet(); + klass->SetSkipAccessChecksFlagOnAllMethods(GetInstructionSetPointerSize(instruction_set)); klass->SetVerificationAttempted(); } // Record the final class status if necessary. @@ -2297,7 +2261,7 @@ class InitializeClassVisitor : public CompilationVisitor { (is_app_image || is_boot_image) && is_superclass_initialized && !too_many_encoded_fields && - manager_->GetCompiler()->IsImageClass(descriptor)) { + manager_->GetCompiler()->GetCompilerOptions().IsImageClass(descriptor)) { bool can_init_static_fields = false; if (is_boot_image) { // We need to initialize static fields, we only do this for image classes that aren't @@ -2700,7 +2664,7 @@ static void CompileDexFile(CompilerDriver* driver, jobject jclass_loader = context.GetClassLoader(); ClassReference ref(&dex_file, class_def_index); const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - ClassAccessor accessor(dex_file, class_def); + ClassAccessor accessor(dex_file, class_def_index); // Skip compiling classes with generic verifier failures since they will still fail at runtime if (context.GetCompiler()->GetVerificationResults()->IsClassRejected(ref)) { return; @@ -2884,7 +2848,7 @@ void CompilerDriver::RecordClassStatus(const ClassReference& ref, ClassStatus st if (kIsDebugBuild) { // Check to make sure it's not a dex file for an oat file we are compiling since these // should always succeed. These do not include classes in for used libraries. - for (const DexFile* dex_file : GetDexFilesForOatFile()) { + for (const DexFile* dex_file : GetCompilerOptions().GetDexFilesForOatFile()) { CHECK_NE(ref.dex_file, dex_file) << ref.dex_file->GetLocation(); } } @@ -2983,18 +2947,6 @@ std::string CompilerDriver::GetMemoryUsageString(bool extended) const { return oss.str(); } -bool CompilerDriver::MayInlineInternal(const DexFile* inlined_from, - const DexFile* inlined_into) const { - // We're not allowed to inline across dex files if we're the no-inline-from dex file. - if (inlined_from != inlined_into && - compiler_options_->GetNoInlineFromDexFile() != nullptr && - ContainsElement(*compiler_options_->GetNoInlineFromDexFile(), inlined_from)) { - return false; - } - - return true; -} - void CompilerDriver::InitializeThreadPools() { size_t parallel_count = parallel_thread_count_ > 0 ? parallel_thread_count_ - 1 : 0; parallel_thread_pool_.reset( @@ -3007,12 +2959,6 @@ void CompilerDriver::FreeThreadPools() { single_thread_pool_.reset(); } -void CompilerDriver::SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files) { - dex_files_for_oat_file_ = dex_files; - compiled_classes_.AddDexFiles(dex_files); - dex_to_dex_compiler_.SetDexFiles(dex_files); -} - void CompilerDriver::SetClasspathDexFiles(const std::vector<const DexFile*>& dex_files) { classpath_classes_.AddDexFiles(dex_files); } diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 55f3561e3a..0a8754a6a6 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -20,7 +20,6 @@ #include <atomic> #include <set> #include <string> -#include <unordered_set> #include <vector> #include "android-base/strings.h" @@ -28,6 +27,7 @@ #include "arch/instruction_set.h" #include "base/array_ref.h" #include "base/bit_utils.h" +#include "base/hash_set.h" #include "base/mutex.h" #include "base/os.h" #include "base/quasi_atomic.h" @@ -97,45 +97,36 @@ class CompilerDriver { CompilerDriver(const CompilerOptions* compiler_options, VerificationResults* verification_results, Compiler::Kind compiler_kind, - InstructionSet instruction_set, - const InstructionSetFeatures* instruction_set_features, - std::unordered_set<std::string>* image_classes, + HashSet<std::string>* image_classes, size_t thread_count, int swap_fd, const ProfileCompilationInfo* profile_compilation_info); ~CompilerDriver(); - // Set dex files associated with the oat file being compiled. - void SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files); - // Set dex files classpath. void SetClasspathDexFiles(const std::vector<const DexFile*>& dex_files); - // Get dex files associated with the the oat file being compiled. - ArrayRef<const DexFile* const> GetDexFilesForOatFile() const { - return ArrayRef<const DexFile* const>(dex_files_for_oat_file_); - } - void CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files, TimingLogger* timings) REQUIRES(!Locks::mutator_lock_); - // Compile a single Method. - void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings) - REQUIRES_SHARED(Locks::mutator_lock_); + // Compile a single Method. (For testing only.) + void CompileOne(Thread* self, + jobject class_loader, + const DexFile& dex_file, + uint16_t class_def_idx, + uint32_t method_idx, + uint32_t access_flags, + InvokeType invoke_type, + const DexFile::CodeItem* code_item, + Handle<mirror::DexCache> dex_cache, + Handle<mirror::ClassLoader> h_class_loader) + REQUIRES(!Locks::mutator_lock_); VerificationResults* GetVerificationResults() const; - InstructionSet GetInstructionSet() const { - return instruction_set_; - } - - const InstructionSetFeatures* GetInstructionSetFeatures() const { - return instruction_set_features_; - } - const CompilerOptions& GetCompilerOptions() const { return *compiler_options_; } @@ -144,10 +135,6 @@ class CompilerDriver { return compiler_.get(); } - const std::unordered_set<std::string>* GetImageClasses() const { - return image_classes_.get(); - } - // Generate the trampolines that are invoked by unresolved direct methods. std::unique_ptr<const std::vector<uint8_t>> CreateJniDlsymLookup() const; std::unique_ptr<const std::vector<uint8_t>> CreateQuickGenericJniTrampoline() const; @@ -280,14 +267,6 @@ class CompilerDriver { const VerifiedMethod* GetVerifiedMethod(const DexFile* dex_file, uint32_t method_idx) const; bool IsSafeCast(const DexCompilationUnit* mUnit, uint32_t dex_pc); - bool GetSupportBootImageFixup() const { - return support_boot_image_fixup_; - } - - void SetSupportBootImageFixup(bool support_boot_image_fixup) { - support_boot_image_fixup_ = support_boot_image_fixup; - } - void SetCompilerContext(void* compiler_context) { compiler_context_ = compiler_context; } @@ -308,9 +287,6 @@ class CompilerDriver { return compiled_method_storage_.DedupeEnabled(); } - // Checks if class specified by type_idx is one of the image_classes_ - bool IsImageClass(const char* descriptor) const; - // Checks whether the provided class should be compiled, i.e., is in classes_to_compile_. bool IsClassToCompile(const char* descriptor) const; @@ -352,13 +328,6 @@ class CompilerDriver { bool CanAssumeClassIsLoaded(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_); - bool MayInline(const DexFile* inlined_from, const DexFile* inlined_into) const { - if (!kIsTargetBuild) { - return MayInlineInternal(inlined_from, inlined_into); - } - return true; - } - const ProfileCompilationInfo* GetProfileCompilationInfo() const { return profile_compilation_info_; } @@ -452,8 +421,6 @@ class CompilerDriver { const std::vector<const DexFile*>& dex_files, TimingLogger* timings); - bool MayInlineInternal(const DexFile* inlined_from, const DexFile* inlined_into) const; - void InitializeThreadPools(); void FreeThreadPools(); void CheckThreadPools(); @@ -466,9 +433,6 @@ class CompilerDriver { std::unique_ptr<Compiler> compiler_; Compiler::Kind compiler_kind_; - const InstructionSet instruction_set_; - const InstructionSetFeatures* const instruction_set_features_; - // All class references that require constructor barriers. If the class reference is not in the // set then the result has not yet been computed. mutable ReaderWriterMutex requires_constructor_barrier_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; @@ -491,22 +455,24 @@ class CompilerDriver { // in the .oat_patches ELF section if requested in the compiler options. Atomic<size_t> non_relative_linker_patch_count_; - // If image_ is true, specifies the classes that will be included in the image. - // Note if image_classes_ is null, all classes are included in the image. - std::unique_ptr<std::unordered_set<std::string>> image_classes_; + // Image classes to be updated by PreCompile(). + // TODO: Remove this member which is a non-const pointer to the CompilerOptions' data. + // Pass this explicitly to the PreCompile() which should be called directly from + // Dex2Oat rather than implicitly by CompileAll(). + HashSet<std::string>* image_classes_; // Specifies the classes that will be compiled. Note that if classes_to_compile_ is null, // all classes are eligible for compilation (duplication filters etc. will still apply). // This option may be restricted to the boot image, depending on a flag in the implementation. - std::unique_ptr<std::unordered_set<std::string>> classes_to_compile_; + std::unique_ptr<HashSet<std::string>> classes_to_compile_; std::atomic<uint32_t> number_of_soft_verifier_failures_; bool had_hard_verifier_failure_; // A thread pool that can (potentially) run tasks in parallel. - std::unique_ptr<ThreadPool> parallel_thread_pool_; size_t parallel_thread_count_; + std::unique_ptr<ThreadPool> parallel_thread_pool_; // A thread pool that guarantees running single-threaded on the main thread. std::unique_ptr<ThreadPool> single_thread_pool_; @@ -519,11 +485,6 @@ class CompilerDriver { void* compiler_context_; - bool support_boot_image_fixup_; - - // List of dex files associates with the oat file. - std::vector<const DexFile*> dex_files_for_oat_file_; - CompiledMethodStorage compiled_method_storage_; // Info for profile guided compilation. @@ -534,6 +495,7 @@ class CompilerDriver { // Compiler for dex to dex (quickening). optimizer::DexToDexCompiler dex_to_dex_compiler_; + friend class CommonCompilerTest; friend class CompileClassVisitor; friend class DexToDexDecompilerTest; friend class verifier::VerifierDepsTest; diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index 491e61f9b5..2eeb4399db 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -46,7 +46,7 @@ class CompilerDriverTest : public CommonCompilerTest { TimingLogger timings("CompilerDriverTest::CompileAll", false, false); TimingLogger::ScopedTiming t(__FUNCTION__, &timings); dex_files_ = GetDexFiles(class_loader); - compiler_driver_->SetDexFilesForOatFile(dex_files_);; + SetDexFilesForOatFile(dex_files_); compiler_driver_->CompileAll(class_loader, dex_files_, &timings); t.NewTiming("MakeAllExecutable"); MakeAllExecutable(class_loader); @@ -331,7 +331,7 @@ TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatusCheckVerified) { ASSERT_GT(dex_files.size(), 0u); dex_file = dex_files.front(); } - compiler_driver_->SetDexFilesForOatFile(dex_files); + SetDexFilesForOatFile(dex_files); callbacks_->SetDoesClassUnloading(true, compiler_driver_.get()); ClassReference ref(dex_file, 0u); // Test that the status is read from the compiler driver as expected. diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 933be4f004..62d547de44 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -20,6 +20,8 @@ #include "android-base/stringprintf.h" +#include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" #include "base/runtime_debug.h" #include "base/variant_map.h" #include "cmdline_parser.h" @@ -37,11 +39,14 @@ CompilerOptions::CompilerOptions() tiny_method_threshold_(kDefaultTinyMethodThreshold), num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold), inline_max_code_units_(kUnsetInlineMaxCodeUnits), - no_inline_from_(nullptr), + instruction_set_(kRuntimeISA == InstructionSet::kArm ? InstructionSet::kThumb2 : kRuntimeISA), + instruction_set_features_(nullptr), + no_inline_from_(), + dex_files_for_oat_file_(), + image_classes_(), boot_image_(false), core_image_(false), app_image_(false), - top_k_profile_threshold_(kDefaultTopKProfileThreshold), debuggable_(false), generate_debug_info_(kDefaultGenerateDebugInfo), generate_mini_debug_info_(kDefaultGenerateMiniDebugInfo), @@ -53,6 +58,7 @@ CompilerOptions::CompilerOptions() dump_timings_(false), dump_pass_timings_(false), dump_stats_(false), + top_k_profile_threshold_(kDefaultTopKProfileThreshold), verbose_methods_(), abort_on_hard_verifier_failure_(false), abort_on_soft_verifier_failure_(false), @@ -67,8 +73,8 @@ CompilerOptions::CompilerOptions() } CompilerOptions::~CompilerOptions() { - // The destructor looks empty but it destroys a PassManagerOptions object. We keep it here - // because we don't want to include the PassManagerOptions definition from the header file. + // Everything done by member destructors. + // The definitions of classes forward-declared in the header have now been #included. } namespace { @@ -129,4 +135,11 @@ bool CompilerOptions::ParseCompilerOptions(const std::vector<std::string>& optio #pragma GCC diagnostic pop +bool CompilerOptions::IsImageClass(const char* descriptor) const { + // Historical note: We used to hold the set indirectly and there was a distinction between an + // empty set and a null, null meaning to include all classes. However, the distiction has been + // removed; if we don't have a profile, we treat it as an empty set of classes. b/77340429 + return image_classes_.find(StringPiece(descriptor)) != image_classes_.end(); +} + } // namespace art diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index cee989b315..601c9140dd 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -17,11 +17,13 @@ #ifndef ART_COMPILER_DRIVER_COMPILER_OPTIONS_H_ #define ART_COMPILER_DRIVER_COMPILER_OPTIONS_H_ +#include <memory> #include <ostream> #include <string> #include <vector> #include "base/globals.h" +#include "base/hash_set.h" #include "base/macros.h" #include "base/utils.h" #include "compiler_filter.h" @@ -29,11 +31,17 @@ namespace art { +namespace jit { +class JitCompiler; +} // namespace jit + namespace verifier { class VerifierDepsTest; } // namespace verifier class DexFile; +enum class InstructionSet; +class InstructionSetFeatures; class CompilerOptions FINAL { public: @@ -230,10 +238,29 @@ class CompilerOptions FINAL { return abort_on_soft_verifier_failure_; } - const std::vector<const DexFile*>* GetNoInlineFromDexFile() const { + InstructionSet GetInstructionSet() const { + return instruction_set_; + } + + const InstructionSetFeatures* GetInstructionSetFeatures() const { + return instruction_set_features_.get(); + } + + + const std::vector<const DexFile*>& GetNoInlineFromDexFile() const { return no_inline_from_; } + const std::vector<const DexFile*>& GetDexFilesForOatFile() const { + return dex_files_for_oat_file_; + } + + const HashSet<std::string>& GetImageClasses() const { + return image_classes_; + } + + bool IsImageClass(const char* descriptor) const; + bool ParseCompilerOptions(const std::vector<std::string>& options, bool ignore_unrecognized, std::string* error_msg); @@ -301,16 +328,24 @@ class CompilerOptions FINAL { size_t num_dex_methods_threshold_; size_t inline_max_code_units_; - // Dex files from which we should not inline code. + InstructionSet instruction_set_; + std::unique_ptr<const InstructionSetFeatures> instruction_set_features_; + + // Dex files from which we should not inline code. Does not own the dex files. // This is usually a very short list (i.e. a single dex file), so we // prefer vector<> over a lookup-oriented container, such as set<>. - const std::vector<const DexFile*>* no_inline_from_; + std::vector<const DexFile*> no_inline_from_; + + // List of dex files associated with the oat file, empty for JIT. + std::vector<const DexFile*> dex_files_for_oat_file_; + + // Image classes, specifies the classes that will be included in the image if creating an image. + // Must not be empty for real boot image, only for tests pretending to compile boot image. + HashSet<std::string> image_classes_; bool boot_image_; bool core_image_; bool app_image_; - // When using a profile file only the top K% of the profiled samples will be compiled. - double top_k_profile_threshold_; bool debuggable_; bool generate_debug_info_; bool generate_mini_debug_info_; @@ -323,6 +358,9 @@ class CompilerOptions FINAL { bool dump_pass_timings_; bool dump_stats_; + // When using a profile file only the top K% of the profiled samples will be compiled. + double top_k_profile_threshold_; + // Vector of methods to have verbose output enabled for. std::vector<std::string> verbose_methods_; @@ -362,6 +400,7 @@ class CompilerOptions FINAL { friend class Dex2Oat; friend class DexToDexDecompilerTest; friend class CommonCompilerTest; + friend class jit::JitCompiler; friend class verifier::VerifierDepsTest; template <class Base> diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc index 15c07870a1..b56a991e74 100644 --- a/compiler/exception_test.cc +++ b/compiler/exception_test.cc @@ -15,6 +15,7 @@ */ #include <memory> +#include <type_traits> #include "base/arena_allocator.h" #include "base/callee_save_type.h" @@ -76,13 +77,10 @@ class ExceptionTest : public CommonRuntimeTest { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stack_maps(&allocator, kRuntimeISA); - stack_maps.BeginStackMapEntry(kDexPc, - native_pc_offset, - /* register_mask */ 0u, - /* sp_mask */ nullptr, - /* num_dex_registers */ 0u, - /* inlining_depth */ 0u); + stack_maps.BeginMethod(4 * sizeof(void*), 0u, 0u, 0u); + stack_maps.BeginStackMapEntry(kDexPc, native_pc_offset); stack_maps.EndStackMapEntry(); + stack_maps.EndMethod(); const size_t stack_maps_size = stack_maps.PrepareForFillIn(); const size_t header_size = sizeof(OatQuickMethodHeader); const size_t code_alignment = GetInstructionSetAlignment(kRuntimeISA); @@ -92,6 +90,7 @@ class ExceptionTest : public CommonRuntimeTest { MemoryRegion stack_maps_region(&fake_header_code_and_maps_[0], stack_maps_size); stack_maps.FillInCodeInfo(stack_maps_region); OatQuickMethodHeader method_header(code_offset, 0u, 4 * sizeof(void*), 0u, 0u, code_size); + static_assert(std::is_trivially_copyable<OatQuickMethodHeader>::value, "Cannot use memcpy"); memcpy(&fake_header_code_and_maps_[code_offset - header_size], &method_header, header_size); std::copy(fake_code_.begin(), fake_code_.end(), diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index 0de00a82fa..a881c5ec98 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -33,6 +33,7 @@ #include "jit/debugger_interface.h" #include "jit/jit.h" #include "jit/jit_code_cache.h" +#include "jit/jit_logger.h" #include "oat_file-inl.h" #include "oat_quick_method_header.h" #include "object_lock.h" @@ -50,7 +51,7 @@ extern "C" void* jit_load(bool* generate_debug_info) { VLOG(jit) << "loading jit compiler"; auto* const jit_compiler = JitCompiler::Create(); CHECK(jit_compiler != nullptr); - *generate_debug_info = jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo(); + *generate_debug_info = jit_compiler->GetCompilerOptions().GetGenerateDebugInfo(); VLOG(jit) << "Done loading jit compiler"; return jit_compiler; } @@ -72,10 +73,11 @@ extern "C" void jit_types_loaded(void* handle, mirror::Class** types, size_t cou REQUIRES_SHARED(Locks::mutator_lock_) { auto* jit_compiler = reinterpret_cast<JitCompiler*>(handle); DCHECK(jit_compiler != nullptr); - if (jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo()) { + const CompilerOptions& compiler_options = jit_compiler->GetCompilerOptions(); + if (compiler_options.GetGenerateDebugInfo()) { const ArrayRef<mirror::Class*> types_array(types, count); std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForClasses( - kRuntimeISA, jit_compiler->GetCompilerDriver()->GetInstructionSetFeatures(), types_array); + kRuntimeISA, compiler_options.GetInstructionSetFeatures(), types_array); MutexLock mu(Thread::Current(), *Locks::native_debug_interface_lock_); // We never free debug info for types, so we don't need to provide a handle // (which would have been otherwise used as identifier to remove it later). @@ -103,51 +105,56 @@ JitCompiler::JitCompiler() { // Set debuggability based on the runtime value. compiler_options_->SetDebuggable(Runtime::Current()->IsJavaDebuggable()); - const InstructionSet instruction_set = kRuntimeISA; + const InstructionSet instruction_set = compiler_options_->GetInstructionSet(); + if (kRuntimeISA == InstructionSet::kArm) { + DCHECK_EQ(instruction_set, InstructionSet::kThumb2); + } else { + DCHECK_EQ(instruction_set, kRuntimeISA); + } + std::unique_ptr<const InstructionSetFeatures> instruction_set_features; for (const StringPiece option : Runtime::Current()->GetCompilerOptions()) { VLOG(compiler) << "JIT compiler option " << option; std::string error_msg; if (option.starts_with("--instruction-set-variant=")) { StringPiece str = option.substr(strlen("--instruction-set-variant=")).data(); VLOG(compiler) << "JIT instruction set variant " << str; - instruction_set_features_ = InstructionSetFeatures::FromVariant( + instruction_set_features = InstructionSetFeatures::FromVariant( instruction_set, str.as_string(), &error_msg); - if (instruction_set_features_ == nullptr) { + if (instruction_set_features == nullptr) { LOG(WARNING) << "Error parsing " << option << " message=" << error_msg; } } else if (option.starts_with("--instruction-set-features=")) { StringPiece str = option.substr(strlen("--instruction-set-features=")).data(); VLOG(compiler) << "JIT instruction set features " << str; - if (instruction_set_features_ == nullptr) { - instruction_set_features_ = InstructionSetFeatures::FromVariant( + if (instruction_set_features == nullptr) { + instruction_set_features = InstructionSetFeatures::FromVariant( instruction_set, "default", &error_msg); - if (instruction_set_features_ == nullptr) { + if (instruction_set_features == nullptr) { LOG(WARNING) << "Error parsing " << option << " message=" << error_msg; } } - instruction_set_features_ = - instruction_set_features_->AddFeaturesFromString(str.as_string(), &error_msg); - if (instruction_set_features_ == nullptr) { + instruction_set_features = + instruction_set_features->AddFeaturesFromString(str.as_string(), &error_msg); + if (instruction_set_features == nullptr) { LOG(WARNING) << "Error parsing " << option << " message=" << error_msg; } } } - if (instruction_set_features_ == nullptr) { - instruction_set_features_ = InstructionSetFeatures::FromCppDefines(); + if (instruction_set_features == nullptr) { + instruction_set_features = InstructionSetFeatures::FromCppDefines(); } + compiler_options_->instruction_set_features_ = std::move(instruction_set_features); + compiler_driver_.reset(new CompilerDriver( compiler_options_.get(), /* verification_results */ nullptr, Compiler::kOptimizing, - instruction_set, - instruction_set_features_.get(), /* image_classes */ nullptr, /* thread_count */ 1, /* swap_fd */ -1, /* profile_compilation_info */ nullptr)); // Disable dedupe so we can remove compiled methods. compiler_driver_->SetDedupeEnabled(false); - compiler_driver_->SetSupportBootImageFixup(false); size_t thread_count = compiler_driver_->GetThreadCount(); if (compiler_options_->GetGenerateDebugInfo()) { diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h index 31dc9e2fe5..5840fece2e 100644 --- a/compiler/jit/jit_compiler.h +++ b/compiler/jit/jit_compiler.h @@ -18,18 +18,19 @@ #define ART_COMPILER_JIT_JIT_COMPILER_H_ #include "base/mutex.h" -#include "compiled_method.h" -#include "driver/compiler_driver.h" -#include "driver/compiler_options.h" -#include "jit_logger.h" namespace art { class ArtMethod; -class InstructionSetFeatures; +class CompiledMethod; +class CompilerDriver; +class CompilerOptions; +class Thread; namespace jit { +class JitLogger; + class JitCompiler { public: static JitCompiler* Create(); @@ -39,8 +40,8 @@ class JitCompiler { bool CompileMethod(Thread* self, ArtMethod* method, bool osr) REQUIRES_SHARED(Locks::mutator_lock_); - CompilerOptions* GetCompilerOptions() const { - return compiler_options_.get(); + const CompilerOptions& GetCompilerOptions() const { + return *compiler_options_.get(); } CompilerDriver* GetCompilerDriver() const { return compiler_driver_.get(); @@ -49,7 +50,6 @@ class JitCompiler { private: std::unique_ptr<CompilerOptions> compiler_options_; std::unique_ptr<CompilerDriver> compiler_driver_; - std::unique_ptr<const InstructionSetFeatures> instruction_set_features_; std::unique_ptr<JitLogger> jit_logger_; JitCompiler(); diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index 0902bf2bce..62e8e0264f 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -34,7 +34,6 @@ #include "class_linker.h" #include "debug/dwarf/debug_frame_opcode_writer.h" #include "dex/dex_file-inl.h" -#include "driver/compiler_driver.h" #include "driver/compiler_options.h" #include "entrypoints/quick/quick_entrypoints.h" #include "jni/jni_env_ext.h" @@ -115,7 +114,7 @@ static ThreadOffset<kPointerSize> GetJniEntrypointThreadOffset(JniEntrypoint whi // convention. // template <PointerSize kPointerSize> -static JniCompiledMethod ArtJniCompileMethodInternal(CompilerDriver* driver, +static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& compiler_options, uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file) { @@ -124,8 +123,9 @@ static JniCompiledMethod ArtJniCompileMethodInternal(CompilerDriver* driver, const bool is_static = (access_flags & kAccStatic) != 0; const bool is_synchronized = (access_flags & kAccSynchronized) != 0; const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx)); - InstructionSet instruction_set = driver->GetInstructionSet(); - const InstructionSetFeatures* instruction_set_features = driver->GetInstructionSetFeatures(); + InstructionSet instruction_set = compiler_options.GetInstructionSet(); + const InstructionSetFeatures* instruction_set_features = + compiler_options.GetInstructionSetFeatures(); // i.e. if the method was annotated with @FastNative const bool is_fast_native = (access_flags & kAccFastNative) != 0u; @@ -216,7 +216,6 @@ static JniCompiledMethod ArtJniCompileMethodInternal(CompilerDriver* driver, // Assembler that holds generated instructions std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm = GetMacroAssembler<kPointerSize>(&allocator, instruction_set, instruction_set_features); - const CompilerOptions& compiler_options = driver->GetCompilerOptions(); jni_asm->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo()); jni_asm->SetEmitRunTimeChecksInDebugMode(compiler_options.EmitRunTimeChecksInDebugMode()); @@ -771,16 +770,16 @@ static void SetNativeParameter(JNIMacroAssembler<kPointerSize>* jni_asm, } } -JniCompiledMethod ArtQuickJniCompileMethod(CompilerDriver* compiler, +JniCompiledMethod ArtQuickJniCompileMethod(const CompilerOptions& compiler_options, uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file) { - if (Is64BitInstructionSet(compiler->GetInstructionSet())) { + if (Is64BitInstructionSet(compiler_options.GetInstructionSet())) { return ArtJniCompileMethodInternal<PointerSize::k64>( - compiler, access_flags, method_idx, dex_file); + compiler_options, access_flags, method_idx, dex_file); } else { return ArtJniCompileMethodInternal<PointerSize::k32>( - compiler, access_flags, method_idx, dex_file); + compiler_options, access_flags, method_idx, dex_file); } } diff --git a/compiler/jni/quick/jni_compiler.h b/compiler/jni/quick/jni_compiler.h index 11419947a0..313fcd361e 100644 --- a/compiler/jni/quick/jni_compiler.h +++ b/compiler/jni/quick/jni_compiler.h @@ -25,7 +25,7 @@ namespace art { class ArtMethod; -class CompilerDriver; +class CompilerOptions; class DexFile; class JniCompiledMethod { @@ -62,7 +62,7 @@ class JniCompiledMethod { std::vector<uint8_t> cfi_; }; -JniCompiledMethod ArtQuickJniCompileMethod(CompilerDriver* compiler, +JniCompiledMethod ArtQuickJniCompileMethod(const CompilerOptions& compiler_options, uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file); diff --git a/compiler/linker/linker_patch.h b/compiler/linker/linker_patch.h index 7b35fd9b0c..b7beb7bdb4 100644 --- a/compiler/linker/linker_patch.h +++ b/compiler/linker/linker_patch.h @@ -40,19 +40,31 @@ class LinkerPatch { // which is ridiculous given we have only a handful of values here. If we // choose to squeeze the Type into fewer than 8 bits, we'll have to declare // patch_type_ as an uintN_t and do explicit static_cast<>s. + // + // Note: Actual patching is instruction_set-dependent. enum class Type : uint8_t { - kDataBimgRelRo, // NOTE: Actual patching is instruction_set-dependent. - kMethodRelative, // NOTE: Actual patching is instruction_set-dependent. - kMethodBssEntry, // NOTE: Actual patching is instruction_set-dependent. - kCall, - kCallRelative, // NOTE: Actual patching is instruction_set-dependent. - kTypeRelative, // NOTE: Actual patching is instruction_set-dependent. - kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent. - kStringRelative, // NOTE: Actual patching is instruction_set-dependent. - kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent. - kBakerReadBarrierBranch, // NOTE: Actual patching is instruction_set-dependent. + kIntrinsicReference, // Boot image reference for an intrinsic, see IntrinsicObjects. + kDataBimgRelRo, + kMethodRelative, + kMethodBssEntry, + kCall, // TODO: Remove. (Deprecated, non-PIC.) + kCallRelative, + kTypeRelative, + kTypeBssEntry, + kStringRelative, + kStringBssEntry, + kBakerReadBarrierBranch, }; + static LinkerPatch IntrinsicReferencePatch(size_t literal_offset, + uint32_t pc_insn_offset, + uint32_t intrinsic_data) { + LinkerPatch patch(literal_offset, Type::kIntrinsicReference, /* target_dex_file */ nullptr); + patch.intrinsic_data_ = intrinsic_data; + patch.pc_insn_offset_ = pc_insn_offset; + return patch; + } + static LinkerPatch DataBimgRelRoPatch(size_t literal_offset, uint32_t pc_insn_offset, uint32_t boot_image_offset) { @@ -160,6 +172,7 @@ class LinkerPatch { bool IsPcRelative() const { switch (GetType()) { + case Type::kIntrinsicReference: case Type::kDataBimgRelRo: case Type::kMethodRelative: case Type::kMethodBssEntry: @@ -175,6 +188,11 @@ class LinkerPatch { } } + uint32_t IntrinsicData() const { + DCHECK(patch_type_ == Type::kIntrinsicReference); + return intrinsic_data_; + } + uint32_t BootImageOffset() const { DCHECK(patch_type_ == Type::kDataBimgRelRo); return boot_image_offset_; @@ -213,7 +231,8 @@ class LinkerPatch { } uint32_t PcInsnOffset() const { - DCHECK(patch_type_ == Type::kDataBimgRelRo || + DCHECK(patch_type_ == Type::kIntrinsicReference || + patch_type_ == Type::kDataBimgRelRo || patch_type_ == Type::kMethodRelative || patch_type_ == Type::kMethodBssEntry || patch_type_ == Type::kTypeRelative || @@ -255,10 +274,12 @@ class LinkerPatch { uint32_t method_idx_; // Method index for Call/Method patches. uint32_t type_idx_; // Type index for Type patches. uint32_t string_idx_; // String index for String patches. + uint32_t intrinsic_data_; // Data for IntrinsicObjects. uint32_t baker_custom_value1_; static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators"); static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators"); static_assert(sizeof(string_idx_) == sizeof(cmp1_), "needed by relational operators"); + static_assert(sizeof(intrinsic_data_) == sizeof(cmp1_), "needed by relational operators"); static_assert(sizeof(baker_custom_value1_) == sizeof(cmp1_), "needed by relational operators"); }; union { diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 1523478613..7c29df877a 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -43,7 +43,7 @@ class BoundsCheckEliminationTest : public OptimizingUnitTest { void RunBCE() { graph_->BuildDominatorTree(); - InstructionSimplifier(graph_, /* codegen */ nullptr, /* driver */ nullptr).Run(); + InstructionSimplifier(graph_, /* codegen */ nullptr).Run(); SideEffectsAnalysis side_effects(graph_); side_effects.Run(); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 4791fa3fba..074f249fe1 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -393,6 +393,11 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { HGraphVisitor* instruction_visitor = GetInstructionVisitor(); DCHECK_EQ(current_block_index_, 0u); + GetStackMapStream()->BeginMethod(HasEmptyFrame() ? 0 : frame_size_, + core_spill_mask_, + fpu_spill_mask_, + GetGraph()->GetNumberOfVRegs()); + size_t frame_start = GetAssembler()->CodeSize(); GenerateFrameEntry(); DCHECK_EQ(GetAssembler()->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size_)); @@ -435,6 +440,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { // Finalize instructions in assember; Finalize(allocator); + + GetStackMapStream()->EndMethod(); } void CodeGenerator::Finalize(CodeAllocator* allocator) { @@ -516,7 +523,7 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( locations->AddTemp(visitor->GetMethodLocation()); break; } - } else { + } else if (!invoke->IsInvokePolymorphic()) { locations->AddTemp(visitor->GetMethodLocation()); } } @@ -544,6 +551,7 @@ void CodeGenerator::GenerateInvokeStaticOrDirectRuntimeCall( case kVirtual: case kInterface: case kPolymorphic: + case kCustom: LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType(); UNREACHABLE(); } @@ -572,6 +580,7 @@ void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invok entrypoint = kQuickInvokeInterfaceTrampolineWithAccessCheck; break; case kPolymorphic: + case kCustom: LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType(); UNREACHABLE(); } @@ -579,11 +588,19 @@ void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invok } void CodeGenerator::GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke) { - MoveConstant(invoke->GetLocations()->GetTemp(0), static_cast<int32_t>(invoke->GetType())); + // invoke-polymorphic does not use a temporary to convey any additional information (e.g. a + // method index) since it requires multiple info from the instruction (registers A, B, H). Not + // using the reservation has no effect on the registers used in the runtime call. QuickEntrypointEnum entrypoint = kQuickInvokePolymorphic; InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr); } +void CodeGenerator::GenerateInvokeCustomCall(HInvokeCustom* invoke) { + MoveConstant(invoke->GetLocations()->GetTemp(0), invoke->GetCallSiteIndex()); + QuickEntrypointEnum entrypoint = kQuickInvokeCustom; + InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr); +} + void CodeGenerator::CreateUnresolvedFieldLocationSummary( HInstruction* field_access, DataType::Type field_type, @@ -867,53 +884,45 @@ void CodeGenerator::AllocateLocations(HInstruction* instruction) { } std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph, - InstructionSet instruction_set, - const InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) { ArenaAllocator* allocator = graph->GetAllocator(); - switch (instruction_set) { + switch (compiler_options.GetInstructionSet()) { #ifdef ART_ENABLE_CODEGEN_arm case InstructionSet::kArm: case InstructionSet::kThumb2: { return std::unique_ptr<CodeGenerator>( - new (allocator) arm::CodeGeneratorARMVIXL( - graph, *isa_features.AsArmInstructionSetFeatures(), compiler_options, stats)); + new (allocator) arm::CodeGeneratorARMVIXL(graph, compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_arm64 case InstructionSet::kArm64: { return std::unique_ptr<CodeGenerator>( - new (allocator) arm64::CodeGeneratorARM64( - graph, *isa_features.AsArm64InstructionSetFeatures(), compiler_options, stats)); + new (allocator) arm64::CodeGeneratorARM64(graph, compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_mips case InstructionSet::kMips: { return std::unique_ptr<CodeGenerator>( - new (allocator) mips::CodeGeneratorMIPS( - graph, *isa_features.AsMipsInstructionSetFeatures(), compiler_options, stats)); + new (allocator) mips::CodeGeneratorMIPS(graph, compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_mips64 case InstructionSet::kMips64: { return std::unique_ptr<CodeGenerator>( - new (allocator) mips64::CodeGeneratorMIPS64( - graph, *isa_features.AsMips64InstructionSetFeatures(), compiler_options, stats)); + new (allocator) mips64::CodeGeneratorMIPS64(graph, compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: { return std::unique_ptr<CodeGenerator>( - new (allocator) x86::CodeGeneratorX86( - graph, *isa_features.AsX86InstructionSetFeatures(), compiler_options, stats)); + new (allocator) x86::CodeGeneratorX86(graph, compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_x86_64 case InstructionSet::kX86_64: { return std::unique_ptr<CodeGenerator>( - new (allocator) x86_64::CodeGeneratorX86_64( - graph, *isa_features.AsX86_64InstructionSetFeatures(), compiler_options, stats)); + new (allocator) x86_64::CodeGeneratorX86_64(graph, compiler_options, stats)); } #endif default: @@ -1045,7 +1054,8 @@ void CodeGenerator::BuildStackMaps(MemoryRegion stack_map_region, void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path) { + SlowPathCode* slow_path, + bool native_debug_info) { if (instruction != nullptr) { // The code generated for some type conversions // may call the runtime, thus normally requiring a subsequent @@ -1076,7 +1086,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, if (instruction == nullptr) { // For stack overflow checks and native-debug-info entries without dex register // mapping (i.e. start of basic block or start of slow path). - stack_map_stream->BeginStackMapEntry(dex_pc, native_pc, 0, 0, 0, 0); + stack_map_stream->BeginStackMapEntry(dex_pc, native_pc); stack_map_stream->EndStackMapEntry(); return; } @@ -1110,12 +1120,21 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, outer_dex_pc = outer_environment->GetDexPc(); outer_environment_size = outer_environment->Size(); } + + HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); + bool osr = + instruction->IsSuspendCheck() && + (info != nullptr) && + graph_->IsCompilingOsr() && + (inlining_depth == 0); + StackMap::Kind kind = native_debug_info + ? StackMap::Kind::Debug + : (osr ? StackMap::Kind::OSR : StackMap::Kind::Default); stack_map_stream->BeginStackMapEntry(outer_dex_pc, native_pc, register_mask, locations->GetStackMask(), - outer_environment_size, - inlining_depth); + kind); EmitEnvironment(environment, slow_path); // Record invoke info, the common case for the trampoline is super and static invokes. Only // record these to reduce oat file size. @@ -1128,19 +1147,9 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } stack_map_stream->EndStackMapEntry(); - HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); - if (instruction->IsSuspendCheck() && - (info != nullptr) && - graph_->IsCompilingOsr() && - (inlining_depth == 0)) { + if (osr) { DCHECK_EQ(info->GetSuspendCheck(), instruction); - // We duplicate the stack map as a marker that this stack map can be an OSR entry. - // Duplicating it avoids having the runtime recognize and skip an OSR stack map. DCHECK(info->IsIrreducible()); - stack_map_stream->BeginStackMapEntry( - dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, 0); - EmitEnvironment(instruction->GetEnvironment(), slow_path); - stack_map_stream->EndStackMapEntry(); if (kIsDebugBuild) { for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) { HInstruction* in_environment = environment->GetInstructionAt(i); @@ -1157,14 +1166,6 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } } } - } else if (kIsDebugBuild) { - // Ensure stack maps are unique, by checking that the native pc in the stack map - // last emitted is different than the native pc of the stack map just emitted. - size_t number_of_stack_maps = stack_map_stream->GetNumberOfStackMaps(); - if (number_of_stack_maps > 1) { - DCHECK_NE(stack_map_stream->GetStackMapNativePcOffset(number_of_stack_maps - 1), - stack_map_stream->GetStackMapNativePcOffset(number_of_stack_maps - 2)); - } } } @@ -1186,12 +1187,11 @@ void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction, // Ensure that we do not collide with the stack map of the previous instruction. GenerateNop(); } - RecordPcInfo(instruction, dex_pc, slow_path); + RecordPcInfo(instruction, dex_pc, slow_path, /* native_debug_info */ true); } } void CodeGenerator::RecordCatchBlockInfo() { - ArenaAllocator* allocator = graph_->GetAllocator(); StackMapStream* stack_map_stream = GetStackMapStream(); for (HBasicBlock* block : *block_order_) { @@ -1201,30 +1201,23 @@ void CodeGenerator::RecordCatchBlockInfo() { uint32_t dex_pc = block->GetDexPc(); uint32_t num_vregs = graph_->GetNumberOfVRegs(); - uint32_t inlining_depth = 0; // Inlining of catch blocks is not supported at the moment. uint32_t native_pc = GetAddressOf(block); - uint32_t register_mask = 0; // Not used. - - // The stack mask is not used, so we leave it empty. - ArenaBitVector* stack_mask = - ArenaBitVector::Create(allocator, 0, /* expandable */ true, kArenaAllocCodeGenerator); stack_map_stream->BeginStackMapEntry(dex_pc, native_pc, - register_mask, - stack_mask, - num_vregs, - inlining_depth); + /* register_mask */ 0, + /* stack_mask */ nullptr, + StackMap::Kind::Catch); HInstruction* current_phi = block->GetFirstPhi(); for (size_t vreg = 0; vreg < num_vregs; ++vreg) { - while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) { - HInstruction* next_phi = current_phi->GetNext(); - DCHECK(next_phi == nullptr || - current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber()) - << "Phis need to be sorted by vreg number to keep this a linear-time loop."; - current_phi = next_phi; - } + while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) { + HInstruction* next_phi = current_phi->GetNext(); + DCHECK(next_phi == nullptr || + current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber()) + << "Phis need to be sorted by vreg number to keep this a linear-time loop."; + current_phi = next_phi; + } if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) { stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); @@ -1284,50 +1277,45 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo continue; } + using Kind = DexRegisterLocation::Kind; Location location = environment->GetLocationAt(i); switch (location.GetKind()) { case Location::kConstant: { DCHECK_EQ(current, location.GetConstant()); if (current->IsLongConstant()) { int64_t value = current->AsLongConstant()->GetValue(); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kConstant, Low32Bits(value)); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kConstant, High32Bits(value)); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, Low32Bits(value)); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, High32Bits(value)); ++i; DCHECK_LT(i, environment_size); } else if (current->IsDoubleConstant()) { int64_t value = bit_cast<int64_t, double>(current->AsDoubleConstant()->GetValue()); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kConstant, Low32Bits(value)); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kConstant, High32Bits(value)); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, Low32Bits(value)); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, High32Bits(value)); ++i; DCHECK_LT(i, environment_size); } else if (current->IsIntConstant()) { int32_t value = current->AsIntConstant()->GetValue(); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, value); } else if (current->IsNullConstant()) { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, 0); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, 0); } else { DCHECK(current->IsFloatConstant()) << current->DebugName(); int32_t value = bit_cast<int32_t, float>(current->AsFloatConstant()->GetValue()); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, value); } break; } case Location::kStackSlot: { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, location.GetStackIndex()); break; } case Location::kDoubleStackSlot: { + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, location.GetStackIndex()); stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize)); + Kind::kInStack, location.GetHighStackIndex(kVRegSize)); ++i; DCHECK_LT(i, environment_size); break; @@ -1337,17 +1325,16 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int id = location.reg(); if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(id)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(id); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); if (current->GetType() == DataType::Type::kInt64) { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, offset + kVRegSize); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset + kVRegSize); ++i; DCHECK_LT(i, environment_size); } } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, id); + stack_map_stream->AddDexRegisterEntry(Kind::kInRegister, id); if (current->GetType() == DataType::Type::kInt64) { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegisterHigh, id); + stack_map_stream->AddDexRegisterEntry(Kind::kInRegisterHigh, id); ++i; DCHECK_LT(i, environment_size); } @@ -1359,18 +1346,16 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int id = location.reg(); if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(id)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(id); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); if (current->GetType() == DataType::Type::kFloat64) { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, offset + kVRegSize); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset + kVRegSize); ++i; DCHECK_LT(i, environment_size); } } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, id); + stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegister, id); if (current->GetType() == DataType::Type::kFloat64) { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInFpuRegisterHigh, id); + stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegisterHigh, id); ++i; DCHECK_LT(i, environment_size); } @@ -1383,16 +1368,16 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int high = location.high(); if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(low)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(low); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, low); + stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegister, low); } if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(high)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(high); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); ++i; } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, high); + stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegister, high); ++i; } DCHECK_LT(i, environment_size); @@ -1404,15 +1389,15 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int high = location.high(); if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(low)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(low); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, low); + stack_map_stream->AddDexRegisterEntry(Kind::kInRegister, low); } if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(high)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(high); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, high); + stack_map_stream->AddDexRegisterEntry(Kind::kInRegister, high); } ++i; DCHECK_LT(i, environment_size); @@ -1420,7 +1405,7 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo } case Location::kInvalid: { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); + stack_map_stream->AddDexRegisterEntry(Kind::kNone, 0); break; } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index a340446ac3..59f858ea52 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -188,8 +188,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // Compiles the graph to executable instructions. void Compile(CodeAllocator* allocator); static std::unique_ptr<CodeGenerator> Create(HGraph* graph, - InstructionSet instruction_set, - const InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGenerator(); @@ -323,7 +321,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { } // Record native to dex mapping for a suspend point. Required by runtime. - void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); + void RecordPcInfo(HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path = nullptr, + bool native_debug_info = false); // Check whether we have already recorded mapping at this PC. bool HasStackMapAtCurrentPc(); // Record extra stack maps if we support native debugging. @@ -542,10 +543,13 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { void GenerateInvokeStaticOrDirectRuntimeCall( HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path); + void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke); + void GenerateInvokeCustomCall(HInvokeCustom* invoke); + void CreateUnresolvedFieldLocationSummary( HInstruction* field_access, DataType::Type field_type, diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 6f173e19f5..26c9e9fa2b 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -27,6 +27,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_arm64.h" @@ -68,7 +69,7 @@ using helpers::InputCPURegisterOrZeroRegAt; using helpers::InputFPRegisterAt; using helpers::InputOperandAt; using helpers::InputRegisterAt; -using helpers::Int64ConstantFrom; +using helpers::Int64FromLocation; using helpers::IsConstantZeroBitPattern; using helpers::LocationFrom; using helpers::OperandFromMemOperand; @@ -1373,7 +1374,6 @@ Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const { } CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, - const Arm64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -1390,7 +1390,6 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), assembler_(graph->GetAllocator()), - isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), @@ -1401,6 +1400,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -1728,6 +1728,10 @@ void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg stream << DRegister(reg); } +const Arm64InstructionSetFeatures& CodeGeneratorARM64::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsArm64InstructionSetFeatures(); +} + void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) { if (constant->IsIntConstant()) { __ Mov(Register(destination), constant->AsIntConstant()->GetValue()); @@ -2459,6 +2463,9 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { // all & reg_bits - 1. __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type)); } + } else if (instr->IsMin() || instr->IsMax()) { + __ Cmp(lhs, rhs); + __ Csel(dst, lhs, rhs, instr->IsMin() ? lt : gt); } else { DCHECK(instr->IsXor()); __ Eor(dst, lhs, rhs); @@ -2474,6 +2481,10 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { __ Fadd(dst, lhs, rhs); } else if (instr->IsSub()) { __ Fsub(dst, lhs, rhs); + } else if (instr->IsMin()) { + __ Fmin(dst, lhs, rhs); + } else if (instr->IsMax()) { + __ Fmax(dst, lhs, rhs); } else { LOG(FATAL) << "Unexpected floating-point binary operation"; } @@ -2694,7 +2705,7 @@ void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIn void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex( HIntermediateAddressIndex* instruction) { Register index_reg = InputRegisterAt(instruction, 0); - uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2)); + uint32_t shift = Int64FromLocation(instruction->GetLocations()->InAt(2)); uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue(); if (shift == 0) { @@ -2824,7 +2835,7 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); if (index.IsConstant()) { // Array load with a constant index can be treated as a field load. - offset += Int64ConstantFrom(index) << DataType::SizeShift(type); + offset += Int64FromLocation(index) << DataType::SizeShift(type); Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation(); codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -2869,14 +2880,14 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { "Expecting 0=compressed, 1=uncompressed"); __ Tbnz(length.W(), 0, &uncompressed_load); __ Ldrb(Register(OutputCPURegister(instruction)), - HeapOperand(obj, offset + Int64ConstantFrom(index))); + HeapOperand(obj, offset + Int64FromLocation(index))); __ B(&done); __ Bind(&uncompressed_load); __ Ldrh(Register(OutputCPURegister(instruction)), - HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1))); + HeapOperand(obj, offset + (Int64FromLocation(index) << 1))); __ Bind(&done); } else { - offset += Int64ConstantFrom(index) << DataType::SizeShift(type); + offset += Int64FromLocation(index) << DataType::SizeShift(type); source = HeapOperand(obj, offset); } } else { @@ -2989,7 +3000,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { if (!needs_write_barrier) { DCHECK(!may_need_runtime_call_for_type_check); if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type); + offset += Int64FromLocation(index) << DataType::SizeShift(value_type); destination = HeapOperand(array, offset); } else { UseScratchRegisterScope temps(masm); @@ -3027,7 +3038,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { UseScratchRegisterScope temps(masm); Register temp = temps.AcquireSameSizeAs(array); if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type); + offset += Int64FromLocation(index) << DataType::SizeShift(value_type); destination = HeapOperand(array, offset); } else { destination = HeapOperand(temp, @@ -3336,61 +3347,30 @@ FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS) #undef DEFINE_CONDITION_VISITORS #undef FOR_EACH_CONDITION_INSTRUCTION -void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - - LocationSummary* locations = instruction->GetLocations(); - Location second = locations->InAt(1); - DCHECK(second.IsConstant()); +void InstructionCodeGeneratorARM64::GenerateIntDivForPower2Denom(HDiv* instruction) { + int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); + uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); + DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm; Register out = OutputRegister(instruction); Register dividend = InputRegisterAt(instruction, 0); - int64_t imm = Int64FromConstant(second.GetConstant()); - DCHECK(imm == 1 || imm == -1); - if (instruction->IsRem()) { - __ Mov(out, 0); + if (abs_imm == 2) { + int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte; + __ Add(out, dividend, Operand(dividend, LSR, bits - 1)); } else { - if (imm == 1) { - __ Mov(out, dividend); - } else { - __ Neg(out, dividend); - } - } -} - -void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - - LocationSummary* locations = instruction->GetLocations(); - Location second = locations->InAt(1); - DCHECK(second.IsConstant()); - - Register out = OutputRegister(instruction); - Register dividend = InputRegisterAt(instruction, 0); - int64_t imm = Int64FromConstant(second.GetConstant()); - uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); - int ctz_imm = CTZ(abs_imm); - - UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp = temps.AcquireSameSizeAs(out); - - if (instruction->IsDiv()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireSameSizeAs(out); __ Add(temp, dividend, abs_imm - 1); __ Cmp(dividend, 0); __ Csel(out, temp, dividend, lt); - if (imm > 0) { - __ Asr(out, out, ctz_imm); - } else { - __ Neg(out, Operand(out, ASR, ctz_imm)); - } + } + + int ctz_imm = CTZ(abs_imm); + if (imm > 0) { + __ Asr(out, out, ctz_imm); } else { - int bits = instruction->GetResultType() == DataType::Type::kInt32 ? 32 : 64; - __ Asr(temp, dividend, bits - 1); - __ Lsr(temp, temp, bits - ctz_imm); - __ Add(out, dividend, temp); - __ And(out, out, abs_imm - 1); - __ Sub(out, out, temp); + __ Neg(out, Operand(out, ASR, ctz_imm)); } } @@ -3446,39 +3426,34 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati } } -void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - DataType::Type type = instruction->GetResultType(); - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); +void InstructionCodeGeneratorARM64::GenerateIntDivForConstDenom(HDiv *instruction) { + int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); - LocationSummary* locations = instruction->GetLocations(); - Register out = OutputRegister(instruction); - Location second = locations->InAt(1); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + return; + } - if (second.IsConstant()) { - int64_t imm = Int64FromConstant(second.GetConstant()); + if (IsPowerOfTwo(AbsOrMin(imm))) { + GenerateIntDivForPower2Denom(instruction); + } else { + // Cases imm == -1 or imm == 1 are handled by InstructionSimplifier. + DCHECK(imm < -2 || imm > 2) << imm; + GenerateDivRemWithAnyConstant(instruction); + } +} - if (imm == 0) { - // Do not generate anything. DivZeroCheck would prevent any code to be executed. - } else if (imm == 1 || imm == -1) { - DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(AbsOrMin(imm))) { - DivRemByPowerOfTwo(instruction); - } else { - DCHECK(imm <= -2 || imm >= 2); - GenerateDivRemWithAnyConstant(instruction); - } +void InstructionCodeGeneratorARM64::GenerateIntDiv(HDiv *instruction) { + DCHECK(DataType::IsIntOrLongType(instruction->GetResultType())) + << instruction->GetResultType(); + + if (instruction->GetLocations()->InAt(1).IsConstant()) { + GenerateIntDivForConstDenom(instruction); } else { + Register out = OutputRegister(instruction); Register dividend = InputRegisterAt(instruction, 0); Register divisor = InputRegisterAt(instruction, 1); - if (instruction->IsDiv()) { - __ Sdiv(out, dividend, divisor); - } else { - UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp = temps.AcquireSameSizeAs(out); - __ Sdiv(temp, dividend, divisor); - __ Msub(out, temp, divisor, dividend); - } + __ Sdiv(out, dividend, divisor); } } @@ -3510,7 +3485,7 @@ void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) { switch (type) { case DataType::Type::kInt32: case DataType::Type::kInt64: - GenerateDivRemIntegral(div); + GenerateIntDiv(div); break; case DataType::Type::kFloat32: @@ -3542,7 +3517,7 @@ void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction } if (value.IsConstant()) { - int64_t divisor = Int64ConstantFrom(value); + int64_t divisor = Int64FromLocation(value); if (divisor == 0) { __ B(slow_path->GetEntryLabel()); } else { @@ -4695,6 +4670,22 @@ void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* i codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } +void LocationsBuilderARM64::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorARM64::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); +} + +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageIntrinsicPatch( + uint32_t intrinsic_data, + vixl::aarch64::Label* adrp_label) { + return NewPcRelativePatch( + /* dex_file */ nullptr, intrinsic_data, adrp_label, &boot_image_intrinsic_patches_); +} + vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch( uint32_t boot_image_offset, vixl::aarch64::Label* adrp_label) { @@ -4812,6 +4803,55 @@ void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_la __ ldr(out, MemOperand(base, /* offset placeholder */ 0)); } +void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg, + uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + // Add ADRP with its PC-relative type patch. + vixl::aarch64::Label* adrp_label = NewBootImageIntrinsicPatch(boot_image_reference); + EmitAdrpPlaceholder(adrp_label, reg.X()); + // Add ADD with its PC-relative type patch. + vixl::aarch64::Label* add_label = NewBootImageIntrinsicPatch(boot_image_reference, adrp_label); + EmitAddPlaceholder(add_label, reg.X(), reg.X()); + } else if (GetCompilerOptions().GetCompilePic()) { + DCHECK(Runtime::Current()->IsAotCompiler()); + // Add ADRP with its PC-relative .data.bimg.rel.ro patch. + vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_reference); + EmitAdrpPlaceholder(adrp_label, reg.X()); + // Add LDR with its PC-relative .data.bimg.rel.ro patch. + vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_reference, adrp_label); + EmitLdrOffsetPlaceholder(ldr_label, reg.W(), reg.X()); + } else { + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; + __ Ldr(reg.W(), DeduplicateBootImageAddressLiteral(reinterpret_cast<uintptr_t>(address))); + } +} + +void CodeGeneratorARM64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConvention calling_convention; + Register argument = calling_convention.GetRegisterAt(0); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + // Add ADRP with its PC-relative type patch. + vixl::aarch64::Label* adrp_label = NewBootImageTypePatch(*target_method.dex_file, type_idx); + EmitAdrpPlaceholder(adrp_label, argument.X()); + // Add ADD with its PC-relative type patch. + vixl::aarch64::Label* add_label = + NewBootImageTypePatch(*target_method.dex_file, type_idx, adrp_label); + EmitAddPlaceholder(add_label, argument.X(), argument.X()); + } else { + LoadBootImageAddress(argument, boot_image_offset); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, @@ -4824,12 +4864,13 @@ inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( } } -linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t pc_insn_offset, - uint32_t boot_image_offset) { - DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. - return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); } void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { @@ -4841,6 +4882,7 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin type_bss_entry_patches_.size() + boot_image_string_patches_.size() + string_bss_entry_patches_.size() + + boot_image_intrinsic_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { @@ -4850,11 +4892,14 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( boot_image_method_patches_, linker_patches); DCHECK(boot_image_type_patches_.empty()); DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -5633,13 +5678,81 @@ void LocationsBuilderARM64::VisitRem(HRem* rem) { } } +void InstructionCodeGeneratorARM64::GenerateIntRemForPower2Denom(HRem *instruction) { + int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); + uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); + DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm; + + Register out = OutputRegister(instruction); + Register dividend = InputRegisterAt(instruction, 0); + + if (abs_imm == 2) { + __ Cmp(dividend, 0); + __ And(out, dividend, 1); + __ Csneg(out, out, out, ge); + } else { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireSameSizeAs(out); + + __ Negs(temp, dividend); + __ And(out, dividend, abs_imm - 1); + __ And(temp, temp, abs_imm - 1); + __ Csneg(out, out, temp, mi); + } +} + +void InstructionCodeGeneratorARM64::GenerateIntRemForOneOrMinusOneDenom(HRem *instruction) { + int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); + DCHECK(imm == 1 || imm == -1) << imm; + + Register out = OutputRegister(instruction); + __ Mov(out, 0); +} + +void InstructionCodeGeneratorARM64::GenerateIntRemForConstDenom(HRem *instruction) { + int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); + + if (imm == 0) { + // Do not generate anything. + // DivZeroCheck would prevent any code to be executed. + return; + } + + if (imm == 1 || imm == -1) { + // TODO: These cases need to be optimized in InstructionSimplifier + GenerateIntRemForOneOrMinusOneDenom(instruction); + } else if (IsPowerOfTwo(AbsOrMin(imm))) { + GenerateIntRemForPower2Denom(instruction); + } else { + DCHECK(imm < -2 || imm > 2) << imm; + GenerateDivRemWithAnyConstant(instruction); + } +} + +void InstructionCodeGeneratorARM64::GenerateIntRem(HRem* instruction) { + DCHECK(DataType::IsIntOrLongType(instruction->GetResultType())) + << instruction->GetResultType(); + + if (instruction->GetLocations()->InAt(1).IsConstant()) { + GenerateIntRemForConstDenom(instruction); + } else { + Register out = OutputRegister(instruction); + Register dividend = InputRegisterAt(instruction, 0); + Register divisor = InputRegisterAt(instruction, 1); + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireSameSizeAs(out); + __ Sdiv(temp, dividend, divisor); + __ Msub(out, temp, divisor, dividend); + } +} + void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { DataType::Type type = rem->GetResultType(); switch (type) { case DataType::Type::kInt32: case DataType::Type::kInt64: { - GenerateDivRemIntegral(rem); + GenerateIntRem(rem); break; } @@ -5662,111 +5775,20 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { } } -// TODO: integrate with HandleBinaryOp? -static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { - LocationSummary* locations = new (allocator) LocationSummary(minmax); - switch (minmax->GetResultType()) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - break; - default: - LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); - } -} - -void InstructionCodeGeneratorARM64::GenerateMinMaxInt(LocationSummary* locations, - bool is_min, - DataType::Type type) { - Location op1 = locations->InAt(0); - Location op2 = locations->InAt(1); - Location out = locations->Out(); - - Register op1_reg; - Register op2_reg; - Register out_reg; - if (type == DataType::Type::kInt64) { - op1_reg = XRegisterFrom(op1); - op2_reg = XRegisterFrom(op2); - out_reg = XRegisterFrom(out); - } else { - DCHECK_EQ(type, DataType::Type::kInt32); - op1_reg = WRegisterFrom(op1); - op2_reg = WRegisterFrom(op2); - out_reg = WRegisterFrom(out); - } - - __ Cmp(op1_reg, op2_reg); - __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt); -} - -void InstructionCodeGeneratorARM64::GenerateMinMaxFP(LocationSummary* locations, - bool is_min, - DataType::Type type) { - Location op1 = locations->InAt(0); - Location op2 = locations->InAt(1); - Location out = locations->Out(); - - FPRegister op1_reg; - FPRegister op2_reg; - FPRegister out_reg; - if (type == DataType::Type::kFloat64) { - op1_reg = DRegisterFrom(op1); - op2_reg = DRegisterFrom(op2); - out_reg = DRegisterFrom(out); - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - op1_reg = SRegisterFrom(op1); - op2_reg = SRegisterFrom(op2); - out_reg = SRegisterFrom(out); - } - - if (is_min) { - __ Fmin(out_reg, op1_reg, op2_reg); - } else { - __ Fmax(out_reg, op1_reg, op2_reg); - } -} - -// TODO: integrate with HandleBinaryOp? -void InstructionCodeGeneratorARM64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { - DataType::Type type = minmax->GetResultType(); - switch (type) { - case DataType::Type::kInt32: - case DataType::Type::kInt64: - GenerateMinMaxInt(minmax->GetLocations(), is_min, type); - break; - case DataType::Type::kFloat32: - case DataType::Type::kFloat64: - GenerateMinMaxFP(minmax->GetLocations(), is_min, type); - break; - default: - LOG(FATAL) << "Unexpected type for HMinMax " << type; - } -} - void LocationsBuilderARM64::VisitMin(HMin* min) { - CreateMinMaxLocations(GetGraph()->GetAllocator(), min); + HandleBinaryOp(min); } void InstructionCodeGeneratorARM64::VisitMin(HMin* min) { - GenerateMinMax(min, /*is_min*/ true); + HandleBinaryOp(min); } void LocationsBuilderARM64::VisitMax(HMax* max) { - CreateMinMaxLocations(GetGraph()->GetAllocator(), max); + HandleBinaryOp(max); } void InstructionCodeGeneratorARM64::VisitMax(HMax* max) { - GenerateMinMax(max, /*is_min*/ false); + HandleBinaryOp(max); } void LocationsBuilderARM64::VisitAbs(HAbs* abs) { @@ -6687,7 +6709,7 @@ void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases. // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor)) if (index.IsConstant()) { - uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor); + uint32_t computed_offset = offset + (Int64FromLocation(index) << scale_factor); EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); Load(type, ref_reg, HeapOperand(obj, computed_offset)); if (needs_null_check) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index e7fe5b71b7..c44fa48066 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -280,10 +280,6 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void HandleCondition(HCondition* instruction); - void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); - void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); - void GenerateMinMax(HBinaryOperation* minmax, bool is_min); - // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -326,7 +322,13 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); - void GenerateDivRemIntegral(HBinaryOperation* instruction); + void GenerateIntDiv(HDiv* instruction); + void GenerateIntDivForConstDenom(HDiv *instruction); + void GenerateIntDivForPower2Denom(HDiv *instruction); + void GenerateIntRem(HRem* instruction); + void GenerateIntRemForConstDenom(HRem *instruction); + void GenerateIntRemForOneOrMinusOneDenom(HRem *instruction); + void GenerateIntRemForPower2Denom(HRem *instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); vixl::aarch64::MemOperand VecAddress( @@ -403,7 +405,6 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap { class CodeGeneratorARM64 : public CodeGenerator { public: CodeGeneratorARM64(HGraph* graph, - const Arm64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorARM64() {} @@ -476,9 +477,7 @@ class CodeGeneratorARM64 : public CodeGenerator { return InstructionSet::kArm64; } - const Arm64InstructionSetFeatures& GetInstructionSetFeatures() const { - return isa_features_; - } + const Arm64InstructionSetFeatures& GetInstructionSetFeatures() const; void Initialize() OVERRIDE { block_labels_.resize(GetGraph()->GetBlocks().size()); @@ -561,6 +560,13 @@ class CodeGeneratorARM64 : public CodeGenerator { UNIMPLEMENTED(FATAL); } + // Add a new boot image intrinsic patch for an instruction and return the label + // to be bound before the instruction. The instruction will be either the + // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing + // to the associated ADRP patch label). + vixl::aarch64::Label* NewBootImageIntrinsicPatch(uint32_t intrinsic_data, + vixl::aarch64::Label* adrp_label = nullptr); + // Add a new boot image relocation patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` pointing @@ -634,6 +640,9 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Register out, vixl::aarch64::Register base); + void LoadBootImageAddress(vixl::aarch64::Register reg, uint32_t boot_image_reference); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; bool NeedsThunkCode(const linker::LinkerPatch& patch) const OVERRIDE; void EmitThunkCode(const linker::LinkerPatch& patch, @@ -892,7 +901,6 @@ class CodeGeneratorARM64 : public CodeGenerator { InstructionCodeGeneratorARM64 instruction_visitor_; ParallelMoveResolverARM64 move_resolver_; Arm64Assembler assembler_; - const Arm64InstructionSetFeatures& isa_features_; // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; @@ -911,6 +919,8 @@ class CodeGeneratorARM64 : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 859e1597c6..9e1ef4002e 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -27,7 +27,9 @@ #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" +#include "intrinsics.h" #include "intrinsics_arm_vixl.h" #include "linker/linker_patch.h" #include "mirror/array-inl.h" @@ -1501,6 +1503,10 @@ void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int r stream << vixl32::SRegister(reg); } +const ArmInstructionSetFeatures& CodeGeneratorARMVIXL::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsArmInstructionSetFeatures(); +} + static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) { uint32_t mask = 0; for (uint32_t i = regs.GetFirstSRegister().GetCode(); @@ -2318,7 +2324,6 @@ vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction, } CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, - const ArmInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -2335,7 +2340,6 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), assembler_(graph->GetAllocator()), - isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -2344,6 +2348,7 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -3742,6 +3747,15 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 9); } +void LocationsBuilderARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 10); +} + void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); @@ -5493,7 +5507,7 @@ void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 10); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 11); } void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) { @@ -5513,7 +5527,7 @@ void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) { codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); DCHECK(!codegen_->IsLeafMethod()); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 11); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 12); } void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) { @@ -7084,7 +7098,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instructi return; } GenerateSuspendCheck(instruction, nullptr); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 12); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 13); } void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction, @@ -7437,7 +7451,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 13); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 14); return; } DCHECK(!cls->NeedsAccessCheck()); @@ -7523,7 +7537,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ } else { __ Bind(slow_path->GetExitLabel()); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 14); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 15); } } @@ -7732,7 +7746,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE codegen_->AddSlowPath(slow_path); __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 15); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 16); return; } case HLoadString::LoadKind::kJitTableAddress: { @@ -7754,7 +7768,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 16); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 17); } static int32_t GetExceptionTlsOffset() { @@ -8384,7 +8398,7 @@ void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* i } else { CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 17); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 18); } void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) { @@ -8883,7 +8897,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. } - MaybeGenerateMarkingRegisterCheck(/* code */ 18); + MaybeGenerateMarkingRegisterCheck(/* code */ 19); } void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -8963,7 +8977,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); } - MaybeGenerateMarkingRegisterCheck(/* code */ 19, /* temp_loc */ LocationFrom(ip)); + MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip)); return; } @@ -9041,7 +9055,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); } - MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip)); + MaybeGenerateMarkingRegisterCheck(/* code */ 21, /* temp_loc */ LocationFrom(ip)); return; } @@ -9095,7 +9109,7 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // Fast path: the GC is not marking: just load the reference. GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); __ Bind(slow_path->GetExitLabel()); - MaybeGenerateMarkingRegisterCheck(/* code */ 21); + MaybeGenerateMarkingRegisterCheck(/* code */ 22); } void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, @@ -9150,7 +9164,7 @@ void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction // Fast path: the GC is not marking: nothing to do (the field is // up-to-date, and we don't need to load the reference). __ Bind(slow_path->GetExitLabel()); - MaybeGenerateMarkingRegisterCheck(/* code */ 22); + MaybeGenerateMarkingRegisterCheck(/* code */ 23); } void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction, @@ -9450,6 +9464,11 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall( } } +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageIntrinsicPatch( + uint32_t intrinsic_data) { + return NewPcRelativePatch(/* dex_file */ nullptr, intrinsic_data, &boot_image_intrinsic_patches_); +} + CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch( uint32_t boot_image_offset) { return NewPcRelativePatch(/* dex_file */ nullptr, @@ -9527,6 +9546,46 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFil }); } +void CodeGeneratorARMVIXL::LoadBootImageAddress(vixl32::Register reg, + uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + NewBootImageIntrinsicPatch(boot_image_reference); + EmitMovwMovtPlaceholder(labels, reg); + } else if (GetCompilerOptions().GetCompilePic()) { + DCHECK(Runtime::Current()->IsAotCompiler()); + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + NewBootImageRelRoPatch(boot_image_reference); + EmitMovwMovtPlaceholder(labels, reg); + __ Ldr(reg, MemOperand(reg, /* offset */ 0)); + } else { + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + uintptr_t address = + reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference); + __ Ldr(reg, DeduplicateBootImageAddressLiteral(dchecked_integral_cast<uint32_t>(address))); + } +} + +void CodeGeneratorARMVIXL::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + vixl32::Register argument = calling_convention.GetRegisterAt(0); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + PcRelativePatchInfo* labels = NewBootImageTypePatch(*target_method.dex_file, type_idx); + EmitMovwMovtPlaceholder(labels, argument); + } else { + LoadBootImageAddress(argument, boot_image_offset); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, @@ -9547,12 +9606,13 @@ inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( } } -linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t pc_insn_offset, - uint32_t boot_image_offset) { - DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. - return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); } void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { @@ -9564,6 +9624,7 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() + /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * boot_image_intrinsic_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { @@ -9573,11 +9634,14 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( boot_image_method_patches_, linker_patches); DCHECK(boot_image_type_patches_.empty()); DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index d5b739bd7c..fc8cf98173 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -428,7 +428,6 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { class CodeGeneratorARMVIXL : public CodeGenerator { public: CodeGeneratorARMVIXL(HGraph* graph, - const ArmInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorARMVIXL() {} @@ -475,6 +474,9 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kThumb2; } + + const ArmInstructionSetFeatures& GetInstructionSetFeatures() const; + // Helper method to move a 32-bit value between two locations. void Move32(Location destination, Location source); @@ -523,8 +525,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void Finalize(CodeAllocator* allocator) OVERRIDE; - const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; } - bool NeedsTwoRegisters(DataType::Type type) const OVERRIDE { return type == DataType::Type::kFloat64 || type == DataType::Type::kInt64; } @@ -578,6 +578,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Label add_pc_label; }; + PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data); PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset); PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method); @@ -600,6 +601,9 @@ class CodeGeneratorARMVIXL : public CodeGenerator { dex::TypeIndex type_index, Handle<mirror::Class> handle); + void LoadBootImageAddress(vixl::aarch32::Register reg, uint32_t boot_image_reference); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; bool NeedsThunkCode(const linker::LinkerPatch& patch) const OVERRIDE; void EmitThunkCode(const linker::LinkerPatch& patch, @@ -886,7 +890,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ParallelMoveResolverARMVIXL move_resolver_; ArmVIXLAssembler assembler_; - const ArmInstructionSetFeatures& isa_features_; // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; @@ -903,6 +906,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 8be84a15bd..f0ef30ee37 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -26,6 +26,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_mips.h" @@ -996,7 +997,6 @@ class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS { }; CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, - const MipsInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -1013,8 +1013,8 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), - assembler_(graph->GetAllocator(), &isa_features), - isa_features_(isa_features), + assembler_(graph->GetAllocator(), + compiler_options.GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -1023,6 +1023,7 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), clobbered_ra_(false) { @@ -1596,12 +1597,13 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( } } -linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t pc_insn_offset, - uint32_t boot_image_offset) { - DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. - return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); } void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { @@ -1612,7 +1614,8 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link boot_image_type_patches_.size() + type_bss_entry_patches_.size() + boot_image_string_patches_.size() + - string_bss_entry_patches_.size(); + string_bss_entry_patches_.size() + + boot_image_intrinsic_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( @@ -1621,11 +1624,14 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( boot_image_method_patches_, linker_patches); DCHECK(boot_image_type_patches_.empty()); DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1636,6 +1642,13 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link DCHECK_EQ(size, linker_patches->size()); } +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageIntrinsicPatch( + uint32_t intrinsic_data, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file */ nullptr, intrinsic_data, info_high, &boot_image_intrinsic_patches_); +} + CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageRelRoPatch( uint32_t boot_image_offset, const PcRelativePatchInfo* info_high) { @@ -1739,6 +1752,48 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo // offset to `out` (e.g. lw, jialc, addiu). } +void CodeGeneratorMIPS::LoadBootImageAddress(Register reg, uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + PcRelativePatchInfo* info_high = NewBootImageIntrinsicPatch(boot_image_reference); + PcRelativePatchInfo* info_low = NewBootImageIntrinsicPatch(boot_image_reference, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, /* base */ ZERO); + __ Addiu(reg, TMP, /* placeholder */ 0x5678, &info_low->label); + } else if (GetCompilerOptions().GetCompilePic()) { + DCHECK(Runtime::Current()->IsAotCompiler()); + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_reference); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_reference, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, reg, /* base */ ZERO); + __ Lw(reg, reg, /* placeholder */ 0x5678, &info_low->label); + } else { + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; + __ LoadConst32(reg, dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))); + } +} + +void CodeGeneratorMIPS::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConvention calling_convention; + Register argument = calling_convention.GetRegisterAt(0); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + PcRelativePatchInfo* info_high = NewBootImageTypePatch(*target_method.dex_file, type_idx); + PcRelativePatchInfo* info_low = + NewBootImageTypePatch(*target_method.dex_file, type_idx, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, argument, /* base */ ZERO); + __ Addiu(argument, argument, /* placeholder */ 0x5678, &info_low->label); + } else { + LoadBootImageAddress(argument, boot_image_offset); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootStringPatch( const DexFile& dex_file, dex::StringIndex string_index, @@ -1895,6 +1950,10 @@ void CodeGeneratorMIPS::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << FRegister(reg); } +const MipsInstructionSetFeatures& CodeGeneratorMIPS::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsMipsInstructionSetFeatures(); +} + constexpr size_t kMipsDirectEntrypointRuntimeOffset = 16; void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint, @@ -7795,6 +7854,14 @@ void InstructionCodeGeneratorMIPS::VisitInvokePolymorphic(HInvokePolymorphic* in codegen_->GenerateInvokePolymorphicCall(invoke); } +void LocationsBuilderMIPS::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorMIPS::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); +} + static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen) { if (invoke->GetLocations()->Intrinsified()) { IntrinsicCodeGeneratorMIPS intrinsic(codegen); diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 2e7c736dbd..4830ac9bc6 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -370,7 +370,6 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { class CodeGeneratorMIPS : public CodeGenerator { public: CodeGeneratorMIPS(HGraph* graph, - const MipsInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorMIPS() {} @@ -509,9 +508,7 @@ class CodeGeneratorMIPS : public CodeGenerator { InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kMips; } - const MipsInstructionSetFeatures& GetInstructionSetFeatures() const { - return isa_features_; - } + const MipsInstructionSetFeatures& GetInstructionSetFeatures() const; MipsLabel* GetLabelOf(HBasicBlock* block) const { return CommonGetLabelOf<MipsLabel>(block_labels_, block); @@ -621,6 +618,8 @@ class CodeGeneratorMIPS : public CodeGenerator { DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); }; + PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset, const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, @@ -645,6 +644,9 @@ class CodeGeneratorMIPS : public CodeGenerator { Register out, Register base); + void LoadBootImageAddress(Register reg, uint32_t boot_image_reference); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); + // The JitPatchInfo is used for JIT string and class loads. struct JitPatchInfo { JitPatchInfo(const DexFile& dex_file, uint64_t idx) @@ -693,7 +695,6 @@ class CodeGeneratorMIPS : public CodeGenerator { InstructionCodeGeneratorMIPS instruction_visitor_; ParallelMoveResolverMIPS move_resolver_; MipsAssembler assembler_; - const MipsInstructionSetFeatures& isa_features_; // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; @@ -710,6 +711,8 @@ class CodeGeneratorMIPS : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<JitPatchInfo> jit_string_patches_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index cd9e0e521e..6e72727f59 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -24,6 +24,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_mips64.h" @@ -939,7 +940,6 @@ class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 { }; CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, - const Mips64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -956,8 +956,8 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), - assembler_(graph->GetAllocator(), &isa_features), - isa_features_(isa_features), + assembler_(graph->GetAllocator(), + compiler_options.GetInstructionSetFeatures()->AsMips64InstructionSetFeatures()), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), @@ -968,6 +968,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -1508,12 +1509,13 @@ inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches( } } -linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t pc_insn_offset, - uint32_t boot_image_offset) { - DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. - return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); } void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { @@ -1524,7 +1526,8 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li boot_image_type_patches_.size() + type_bss_entry_patches_.size() + boot_image_string_patches_.size() + - string_bss_entry_patches_.size(); + string_bss_entry_patches_.size() + + boot_image_intrinsic_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( @@ -1533,11 +1536,14 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( boot_image_method_patches_, linker_patches); DCHECK(boot_image_type_patches_.empty()); DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1548,6 +1554,13 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li DCHECK_EQ(size, linker_patches->size()); } +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageIntrinsicPatch( + uint32_t intrinsic_data, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file */ nullptr, intrinsic_data, info_high, &boot_image_intrinsic_patches_); +} + CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageRelRoPatch( uint32_t boot_image_offset, const PcRelativePatchInfo* info_high) { @@ -1638,6 +1651,50 @@ void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchIn } } +void CodeGeneratorMIPS64::LoadBootImageAddress(GpuRegister reg, uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + PcRelativePatchInfo* info_high = NewBootImageIntrinsicPatch(boot_image_reference); + PcRelativePatchInfo* info_low = NewBootImageIntrinsicPatch(boot_image_reference, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + __ Daddiu(reg, AT, /* placeholder */ 0x5678); + } else if (GetCompilerOptions().GetCompilePic()) { + DCHECK(Runtime::Current()->IsAotCompiler()); + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_reference); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_reference, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + __ Lwu(reg, AT, /* placeholder */ 0x5678); + } else { + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + uintptr_t address = + reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference); + __ LoadLiteral(reg, kLoadDoubleword, DeduplicateBootImageAddressLiteral(address)); + } +} + +void CodeGeneratorMIPS64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConvention calling_convention; + GpuRegister argument = calling_convention.GetRegisterAt(0); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + PcRelativePatchInfo* info_high = NewBootImageTypePatch(*target_method.dex_file, type_idx); + PcRelativePatchInfo* info_low = + NewBootImageTypePatch(*target_method.dex_file, type_idx, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + __ Daddiu(argument, AT, /* placeholder */ 0x5678); + } else { + LoadBootImageAddress(argument, boot_image_offset); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + Literal* CodeGeneratorMIPS64::DeduplicateJitStringLiteral(const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) { @@ -1753,6 +1810,10 @@ void CodeGeneratorMIPS64::DumpFloatingPointRegister(std::ostream& stream, int re stream << FpuRegister(reg); } +const Mips64InstructionSetFeatures& CodeGeneratorMIPS64::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsMips64InstructionSetFeatures(); +} + void CodeGeneratorMIPS64::InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, @@ -5908,6 +5969,14 @@ void InstructionCodeGeneratorMIPS64::VisitInvokePolymorphic(HInvokePolymorphic* codegen_->GenerateInvokePolymorphicCall(invoke); } +void LocationsBuilderMIPS64::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorMIPS64::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); +} + static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) { if (invoke->GetLocations()->Intrinsified()) { IntrinsicCodeGeneratorMIPS64 intrinsic(codegen); diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 6e69e4611a..fc0908b2cb 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -352,7 +352,6 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { class CodeGeneratorMIPS64 : public CodeGenerator { public: CodeGeneratorMIPS64(HGraph* graph, - const Mips64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorMIPS64() {} @@ -484,9 +483,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kMips64; } - const Mips64InstructionSetFeatures& GetInstructionSetFeatures() const { - return isa_features_; - } + const Mips64InstructionSetFeatures& GetInstructionSetFeatures() const; Mips64Label* GetLabelOf(HBasicBlock* block) const { return CommonGetLabelOf<Mips64Label>(block_labels_, block); @@ -591,6 +588,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator { DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); }; + PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset, const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, @@ -615,6 +614,9 @@ class CodeGeneratorMIPS64 : public CodeGenerator { GpuRegister out, PcRelativePatchInfo* info_low = nullptr); + void LoadBootImageAddress(GpuRegister reg, uint32_t boot_image_reference); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); + void PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, const Literal* literal, @@ -655,7 +657,6 @@ class CodeGeneratorMIPS64 : public CodeGenerator { InstructionCodeGeneratorMIPS64 instruction_visitor_; ParallelMoveResolverMIPS64 move_resolver_; Mips64Assembler assembler_; - const Mips64InstructionSetFeatures& isa_features_; // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; @@ -675,6 +676,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; // Patches for string root accesses in JIT compiled code. StringToLiteralMap jit_string_patches_; diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 6b0ec253e9..6d135a9bfb 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -29,7 +29,7 @@ using helpers::Arm64CanEncodeConstantAsImmediate; using helpers::DRegisterFrom; using helpers::HeapOperand; using helpers::InputRegisterAt; -using helpers::Int64ConstantFrom; +using helpers::Int64FromLocation; using helpers::OutputRegister; using helpers::VRegisterFrom; using helpers::WRegisterFrom; @@ -78,7 +78,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { - __ Movi(dst.V16B(), Int64ConstantFrom(src_loc)); + __ Movi(dst.V16B(), Int64FromLocation(src_loc)); } else { __ Dup(dst.V16B(), InputRegisterAt(instruction, 0)); } @@ -87,7 +87,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { - __ Movi(dst.V8H(), Int64ConstantFrom(src_loc)); + __ Movi(dst.V8H(), Int64FromLocation(src_loc)); } else { __ Dup(dst.V8H(), InputRegisterAt(instruction, 0)); } @@ -95,7 +95,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { - __ Movi(dst.V4S(), Int64ConstantFrom(src_loc)); + __ Movi(dst.V4S(), Int64FromLocation(src_loc)); } else { __ Dup(dst.V4S(), InputRegisterAt(instruction, 0)); } @@ -103,7 +103,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { - __ Movi(dst.V2D(), Int64ConstantFrom(src_loc)); + __ Movi(dst.V2D(), Int64FromLocation(src_loc)); } else { __ Dup(dst.V2D(), XRegisterFrom(src_loc)); } @@ -1333,7 +1333,7 @@ MemOperand InstructionCodeGeneratorARM64::VecAddress( DCHECK(!instruction->InputAt(0)->IsIntermediateAddress()); if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << shift; + offset += Int64FromLocation(index) << shift; return HeapOperand(base, offset); } else { *scratch = temps_scope->AcquireSameSizeAs(base); diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 086ae07a06..58808769e2 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -1125,13 +1125,59 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in } } -void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { - CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); +void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); + switch (instr->GetPackedType()) { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt( + HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); + DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + // VecMultiplyAccumulate is supported only for single and + // double precision floating points. Hence integral types + // are still not converted. + LOG(FATAL) << "Unsupported SIMD Type"; + } } -void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { - // TODO: pmaddwd? - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LocationSummary* locations = instr->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister accumulator = locations->InAt( + HVecMultiplyAccumulate::kInputAccumulatorIndex).AsFpuRegister<XmmRegister>(); + XmmRegister mul_left = locations->InAt( + HVecMultiplyAccumulate::kInputMulLeftIndex).AsFpuRegister<XmmRegister>(); + XmmRegister mul_right = locations->InAt( + HVecMultiplyAccumulate::kInputMulRightIndex).AsFpuRegister<XmmRegister>(); + switch (instr->GetPackedType()) { + case DataType::Type::kFloat32: + DCHECK_EQ(4u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd) + __ vfmadd231ps(accumulator, mul_left, mul_right); + else + __ vfmsub231ps(accumulator, mul_left, mul_right); + break; + case DataType::Type::kFloat64: + DCHECK_EQ(2u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd) + __ vfmadd231pd(accumulator, mul_left, mul_right); + else + __ vfmsub231pd(accumulator, mul_left, mul_right); + break; + default: + + // VecMultiplyAccumulate is supported only for single and + // double precision floating points. Hence integral types + // are still not converted. + LOG(FATAL) << "Unsupported SIMD Type"; + } } void LocationsBuilderX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 4d31ab68d1..4795e86933 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -1098,13 +1098,61 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in } } -void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { - CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); +void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); + switch (instr->GetPackedType()) { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt( + HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); + DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + // VecMultiplyAccumulate is supported only for single and + // double precision floating points. Hence integral types + // are still not converted. + LOG(FATAL) << "Unsupported SIMD type"; + } } -void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { - // TODO: pmaddwd? - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + +void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LocationSummary* locations = instr->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister accumulator = locations->InAt( + HVecMultiplyAccumulate::kInputAccumulatorIndex).AsFpuRegister<XmmRegister>(); + XmmRegister mul_left = locations->InAt( + HVecMultiplyAccumulate::kInputMulLeftIndex).AsFpuRegister<XmmRegister>(); + XmmRegister mul_right = locations->InAt( + HVecMultiplyAccumulate::kInputMulRightIndex).AsFpuRegister<XmmRegister>(); + + switch (instr->GetPackedType()) { + case DataType::Type::kFloat32: + DCHECK_EQ(4u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd) + __ vfmadd231ps(accumulator, mul_left, mul_right); + else + __ vfmsub231ps(accumulator, mul_left, mul_right); + break; + case DataType::Type::kFloat64: + DCHECK_EQ(2u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::InstructionKind::kAdd) + __ vfmadd231pd(accumulator, mul_left, mul_right); + else + __ vfmsub231pd(accumulator, mul_left, mul_right); + break; + default: + + // VecMultiplyAccumulate is supported only for single and + // double precision floating points. Hence integral types + // are still not converted. + LOG(FATAL) << "Unsupported SIMD Type"; + } } void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 9e315381b1..d189476a48 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -23,6 +23,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_x86.h" @@ -957,6 +958,10 @@ void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << XmmRegister(reg); } +const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures(); +} + size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id)); return kX86WordSize; @@ -1008,7 +1013,6 @@ void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) { } CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, - const X86InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -1026,13 +1030,13 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), assembler_(graph->GetAllocator()), - isa_features_(isa_features), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), constant_area_start_(-1), @@ -2188,7 +2192,9 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { - if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeMethodLoadKind()) { + if (invoke->GetLocations()->CanCall() && + invoke->HasPcRelativeMethodLoadKind() && + invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) { invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); } return; @@ -2311,6 +2317,14 @@ void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* inv codegen_->GenerateInvokePolymorphicCall(invoke); } +void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); +} + void LocationsBuilderX86::VisitNeg(HNeg* neg) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); @@ -4902,6 +4916,13 @@ void CodeGeneratorX86::GenerateVirtualCall( RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } +void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address, + uint32_t intrinsic_data) { + boot_image_intrinsic_patches_.emplace_back( + method_address, /* target_dex_file */ nullptr, intrinsic_data); + __ Bind(&boot_image_intrinsic_patches_.back().label); +} + void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address, uint32_t boot_image_offset) { boot_image_method_patches_.emplace_back( @@ -4961,6 +4982,62 @@ Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { return &string_bss_entry_patches_.back().label; } +void CodeGeneratorX86::LoadBootImageAddress(Register reg, + uint32_t boot_image_reference, + HInvokeStaticOrDirect* invoke) { + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); + DCHECK(method_address != nullptr); + Register method_address_reg = + invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>(); + __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset)); + RecordBootImageIntrinsicPatch(method_address, boot_image_reference); + } else if (GetCompilerOptions().GetCompilePic()) { + DCHECK(Runtime::Current()->IsAotCompiler()); + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); + DCHECK(method_address != nullptr); + Register method_address_reg = + invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>(); + __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset)); + RecordBootImageRelRoPatch(method_address, boot_image_reference); + } else { + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; + __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address)))); + } +} + +void CodeGeneratorX86::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConvention calling_convention; + Register argument = calling_convention.GetRegisterAt(0); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); + DCHECK(method_address != nullptr); + Register method_address_reg = + invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>(); + __ leal(argument, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset)); + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_); + __ Bind(&boot_image_type_patches_.back().label); + } else { + LoadBootImageAddress(argument, boot_image_offset, invoke); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + // The label points to the end of the "movl" or another instruction but the literal offset // for method patch needs to point to the embedded constant which occupies the last 4 bytes. constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; @@ -4978,12 +5055,13 @@ inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( } } -linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t pc_insn_offset, - uint32_t boot_image_offset) { - DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. - return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); } void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { @@ -4994,7 +5072,8 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke boot_image_type_patches_.size() + type_bss_entry_patches_.size() + boot_image_string_patches_.size() + - string_bss_entry_patches_.size(); + string_bss_entry_patches_.size() + + boot_image_intrinsic_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( @@ -5003,11 +5082,14 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( boot_image_method_patches_, linker_patches); DCHECK(boot_image_type_patches_.empty()); DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 6c76e27d35..cb58e920ea 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -316,7 +316,6 @@ class JumpTableRIPFixup; class CodeGeneratorX86 : public CodeGenerator { public: CodeGeneratorX86(HGraph* graph, - const X86InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorX86() {} @@ -390,6 +389,8 @@ class CodeGeneratorX86 : public CodeGenerator { return InstructionSet::kX86; } + const X86InstructionSetFeatures& GetInstructionSetFeatures() const; + // Helper method to move a 32bits value between two locations. void Move32(Location destination, Location source); // Helper method to move a 64bits value between two locations. @@ -418,6 +419,8 @@ class CodeGeneratorX86 : public CodeGenerator { void GenerateVirtualCall( HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address, + uint32_t intrinsic_data); void RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address, uint32_t boot_image_offset); void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke); @@ -426,6 +429,12 @@ class CodeGeneratorX86 : public CodeGenerator { Label* NewTypeBssEntryPatch(HLoadClass* load_class); void RecordBootImageStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); + + void LoadBootImageAddress(Register reg, + uint32_t boot_image_reference, + HInvokeStaticOrDirect* invoke); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); + Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle); @@ -469,10 +478,6 @@ class CodeGeneratorX86 : public CodeGenerator { Label* GetFrameEntryLabel() { return &frame_entry_label_; } - const X86InstructionSetFeatures& GetInstructionSetFeatures() const { - return isa_features_; - } - void AddMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base, int32_t offset) { method_address_offset_.Put(method_base->GetId(), offset); } @@ -635,7 +640,6 @@ class CodeGeneratorX86 : public CodeGenerator { InstructionCodeGeneratorX86 instruction_visitor_; ParallelMoveResolverX86 move_resolver_; X86Assembler assembler_; - const X86InstructionSetFeatures& isa_features_; // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). @@ -650,6 +654,8 @@ class CodeGeneratorX86 : public CodeGenerator { ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<X86PcRelativePatchInfo> boot_image_intrinsic_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index f7397046d7..bea3da070a 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -22,6 +22,7 @@ #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_x86_64.h" @@ -1066,6 +1067,11 @@ void CodeGeneratorX86_64::GenerateVirtualCall( RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } +void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) { + boot_image_intrinsic_patches_.emplace_back(/* target_dex_file */ nullptr, intrinsic_data); + __ Bind(&boot_image_intrinsic_patches_.back().label); +} + void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) { boot_image_method_patches_.emplace_back(/* target_dex_file */ nullptr, boot_image_offset); __ Bind(&boot_image_method_patches_.back().label); @@ -1107,6 +1113,43 @@ Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { return &string_bss_entry_patches_.back().label; } +void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + __ leal(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); + RecordBootImageIntrinsicPatch(boot_image_reference); + } else if (GetCompilerOptions().GetCompilePic()) { + DCHECK(Runtime::Current()->IsAotCompiler()); + __ movl(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); + RecordBootImageRelRoPatch(boot_image_reference); + } else { + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; + __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address)))); + } +} + +void CodeGeneratorX86_64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConvention calling_convention; + CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + __ leal(argument, + Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_); + __ Bind(&boot_image_type_patches_.back().label); + } else { + LoadBootImageAddress(argument, boot_image_offset); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + // The label points to the end of the "movl" or another instruction but the literal offset // for method patch needs to point to the embedded constant which occupies the last 4 bytes. constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; @@ -1122,12 +1165,13 @@ inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( } } -linker::LinkerPatch DataBimgRelRoPatchAdapter(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t pc_insn_offset, - uint32_t boot_image_offset) { - DCHECK(target_dex_file == nullptr); // Unused for DataBimgRelRoPatch(), should be null. - return linker::LinkerPatch::DataBimgRelRoPatch(literal_offset, pc_insn_offset, boot_image_offset); +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); } void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { @@ -1138,7 +1182,8 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li boot_image_type_patches_.size() + type_bss_entry_patches_.size() + boot_image_string_patches_.size() + - string_bss_entry_patches_.size(); + string_bss_entry_patches_.size() + + boot_image_intrinsic_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( @@ -1147,11 +1192,14 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - EmitPcRelativeLinkerPatches<DataBimgRelRoPatchAdapter>( + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( boot_image_method_patches_, linker_patches); DCHECK(boot_image_type_patches_.empty()); DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1170,6 +1218,10 @@ void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int re stream << FloatRegister(reg); } +const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures(); +} + size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id)); return kX86_64WordSize; @@ -1224,7 +1276,6 @@ static constexpr int kNumberOfCpuRegisterPairs = 0; // Use a fake return address register to mimic Quick. static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, - const X86_64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -1243,7 +1294,6 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), assembler_(graph->GetAllocator()), - isa_features_(isa_features), constant_area_start_(0), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -1251,6 +1301,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { @@ -2501,6 +2552,14 @@ void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* codegen_->GenerateInvokePolymorphicCall(invoke); } +void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); +} + void LocationsBuilderX86_64::VisitNeg(HNeg* neg) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 9a4c53b524..5ba7f9cb71 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -296,7 +296,6 @@ class JumpTableRIPFixup; class CodeGeneratorX86_64 : public CodeGenerator { public: CodeGeneratorX86_64(HGraph* graph, - const X86_64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorX86_64() {} @@ -370,6 +369,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { return InstructionSet::kX86_64; } + const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const; + // Emit a write barrier. void MarkGCCard(CpuRegister temp, CpuRegister card, @@ -415,6 +416,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateVirtualCall( HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void RecordBootImageIntrinsicPatch(uint32_t intrinsic_data); void RecordBootImageRelRoPatch(uint32_t boot_image_offset); void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke); void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke); @@ -429,7 +431,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { dex::TypeIndex type_index, Handle<mirror::Class> handle); - void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE; + void LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; @@ -440,10 +443,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; - const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const { - return isa_features_; - } - // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -566,6 +565,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. void Store64BitValueToStack(Location dest, int64_t value); + void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE; + // Assign a 64 bit constant to an address. void MoveInt64ToAddress(const Address& addr_low, const Address& addr_high, @@ -604,7 +605,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { InstructionCodeGeneratorX86_64 instruction_visitor_; ParallelMoveResolverX86_64 move_resolver_; X86_64Assembler assembler_; - const X86_64InstructionSetFeatures& isa_features_; // Offset to the start of the constant area in the assembled code. // Used for fixups to the constant area. @@ -623,6 +623,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { ArenaDeque<PatchInfo<Label>> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<PatchInfo<Label>> boot_image_intrinsic_patches_; // Patches for string literals in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index a0fd5ffcb1..86687e60a9 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -89,7 +89,8 @@ void CodegenTest::TestCode(const std::vector<uint16_t>& data, bool has_result, i HGraph* graph = CreateCFG(data); // Remove suspend checks, they cannot be executed in this context. RemoveSuspendChecks(graph); - RunCode(target_config, graph, [](HGraph*) {}, has_result, expected); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, [](HGraph*) {}, has_result, expected); } } @@ -100,7 +101,8 @@ void CodegenTest::TestCodeLong(const std::vector<uint16_t>& data, HGraph* graph = CreateCFG(data, DataType::Type::kInt64); // Remove suspend checks, they cannot be executed in this context. RemoveSuspendChecks(graph); - RunCode(target_config, graph, [](HGraph*) {}, has_result, expected); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, [](HGraph*) {}, has_result, expected); } } @@ -460,7 +462,8 @@ TEST_F(CodegenTest, NonMaterializedCondition) { block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCode(target_config, graph, hook_before_codegen, true, 0); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, hook_before_codegen, true, 0); } } @@ -506,7 +509,8 @@ TEST_F(CodegenTest, MaterializedCondition1) { new (graph_in->GetAllocator()) HParallelMove(graph_in->GetAllocator()); block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCode(target_config, graph, hook_before_codegen, true, lhs[i] < rhs[i]); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, hook_before_codegen, true, lhs[i] < rhs[i]); } } } @@ -573,7 +577,8 @@ TEST_F(CodegenTest, MaterializedCondition2) { new (graph_in->GetAllocator()) HParallelMove(graph_in->GetAllocator()); block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCode(target_config, graph, hook_before_codegen, true, lhs[i] < rhs[i]); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, hook_before_codegen, true, lhs[i] < rhs[i]); } } } @@ -682,7 +687,8 @@ void CodegenTest::TestComparison(IfCondition condition, block->AddInstruction(new (GetAllocator()) HReturn(comparison)); graph->BuildDominatorTree(); - RunCode(target_config, graph, [](HGraph*) {}, true, expected_result); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, [](HGraph*) {}, true, expected_result); } TEST_F(CodegenTest, ComparisonsInt) { @@ -713,10 +719,9 @@ TEST_F(CodegenTest, ComparisonsLong) { #ifdef ART_ENABLE_CODEGEN_arm TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) { - std::unique_ptr<const ArmInstructionSetFeatures> features( - ArmInstructionSetFeatures::FromCppDefines()); + OverrideInstructionSetFeatures(InstructionSet::kThumb2, "default"); HGraph* graph = CreateGraph(); - arm::CodeGeneratorARMVIXL codegen(graph, *features.get(), CompilerOptions()); + arm::CodeGeneratorARMVIXL codegen(graph, *compiler_options_); codegen.Initialize(); @@ -737,10 +742,9 @@ TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) { #ifdef ART_ENABLE_CODEGEN_arm64 // Regression test for b/34760542. TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) { - std::unique_ptr<const Arm64InstructionSetFeatures> features( - Arm64InstructionSetFeatures::FromCppDefines()); + OverrideInstructionSetFeatures(InstructionSet::kArm64, "default"); HGraph* graph = CreateGraph(); - arm64::CodeGeneratorARM64 codegen(graph, *features.get(), CompilerOptions()); + arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_); codegen.Initialize(); @@ -787,10 +791,9 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) { // Check that ParallelMoveResolver works fine for ARM64 for both cases when SIMD is on and off. TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) { - std::unique_ptr<const Arm64InstructionSetFeatures> features( - Arm64InstructionSetFeatures::FromCppDefines()); + OverrideInstructionSetFeatures(InstructionSet::kArm64, "default"); HGraph* graph = CreateGraph(); - arm64::CodeGeneratorARM64 codegen(graph, *features.get(), CompilerOptions()); + arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_); codegen.Initialize(); @@ -824,9 +827,9 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) { #ifdef ART_ENABLE_CODEGEN_mips TEST_F(CodegenTest, MipsClobberRA) { - std::unique_ptr<const MipsInstructionSetFeatures> features_mips( - MipsInstructionSetFeatures::FromCppDefines()); - if (!CanExecute(InstructionSet::kMips) || features_mips->IsR6()) { + OverrideInstructionSetFeatures(InstructionSet::kMips, "mips32r"); + CHECK(!instruction_set_features_->AsMipsInstructionSetFeatures()->IsR6()); + if (!CanExecute(InstructionSet::kMips)) { // HMipsComputeBaseMethodAddress and the NAL instruction behind it // should only be generated on non-R6. return; @@ -860,7 +863,7 @@ TEST_F(CodegenTest, MipsClobberRA) { graph->BuildDominatorTree(); - mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), CompilerOptions()); + mips::CodeGeneratorMIPS codegenMIPS(graph, *compiler_options_); // Since there isn't HLoadClass or HLoadString, we need to manually indicate // that RA is clobbered and the method entry code should generate a stack frame // and preserve RA in it. And this is what we're testing here. diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h index 792cfb539a..91811262de 100644 --- a/compiler/optimizing/codegen_test_utils.h +++ b/compiler/optimizing/codegen_test_utils.h @@ -17,17 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_ #define ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_ -#include "arch/arm/instruction_set_features_arm.h" #include "arch/arm/registers_arm.h" -#include "arch/arm64/instruction_set_features_arm64.h" #include "arch/instruction_set.h" -#include "arch/mips/instruction_set_features_mips.h" #include "arch/mips/registers_mips.h" -#include "arch/mips64/instruction_set_features_mips64.h" #include "arch/mips64/registers_mips64.h" -#include "arch/x86/instruction_set_features_x86.h" #include "arch/x86/registers_x86.h" -#include "arch/x86_64/instruction_set_features_x86_64.h" #include "code_simulator.h" #include "code_simulator_container.h" #include "common_compiler_test.h" @@ -101,10 +95,8 @@ class CodegenTargetConfig { // to just overwrite the code generator. class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL { public: - TestCodeGeneratorARMVIXL(HGraph* graph, - const ArmInstructionSetFeatures& isa_features, - const CompilerOptions& compiler_options) - : arm::CodeGeneratorARMVIXL(graph, isa_features, compiler_options) { + TestCodeGeneratorARMVIXL(HGraph* graph, const CompilerOptions& compiler_options) + : arm::CodeGeneratorARMVIXL(graph, compiler_options) { AddAllocatedRegister(Location::RegisterLocation(arm::R6)); AddAllocatedRegister(Location::RegisterLocation(arm::R7)); } @@ -145,10 +137,8 @@ class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL { // function. class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 { public: - TestCodeGeneratorARM64(HGraph* graph, - const Arm64InstructionSetFeatures& isa_features, - const CompilerOptions& compiler_options) - : arm64::CodeGeneratorARM64(graph, isa_features, compiler_options) {} + TestCodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options) + : arm64::CodeGeneratorARM64(graph, compiler_options) {} void MaybeGenerateMarkingRegisterCheck(int codem ATTRIBUTE_UNUSED, Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE { @@ -165,10 +155,8 @@ class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 { #ifdef ART_ENABLE_CODEGEN_x86 class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 { public: - TestCodeGeneratorX86(HGraph* graph, - const X86InstructionSetFeatures& isa_features, - const CompilerOptions& compiler_options) - : x86::CodeGeneratorX86(graph, isa_features, compiler_options) { + TestCodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options) + : x86::CodeGeneratorX86(graph, compiler_options) { // Save edi, we need it for getting enough registers for long multiplication. AddAllocatedRegister(Location::RegisterLocation(x86::EDI)); } @@ -324,11 +312,11 @@ static void RunCode(CodeGenerator* codegen, template <typename Expected> static void RunCode(CodegenTargetConfig target_config, + const CompilerOptions& compiler_options, HGraph* graph, std::function<void(HGraph*)> hook_before_codegen, bool has_result, Expected expected) { - CompilerOptions compiler_options; std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, compiler_options)); RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected); @@ -336,55 +324,37 @@ static void RunCode(CodegenTargetConfig target_config, #ifdef ART_ENABLE_CODEGEN_arm CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const ArmInstructionSetFeatures> features_arm( - ArmInstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) - TestCodeGeneratorARMVIXL(graph, *features_arm.get(), compiler_options); + return new (graph->GetAllocator()) TestCodeGeneratorARMVIXL(graph, compiler_options); } #endif #ifdef ART_ENABLE_CODEGEN_arm64 CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( - Arm64InstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) - TestCodeGeneratorARM64(graph, *features_arm64.get(), compiler_options); + return new (graph->GetAllocator()) TestCodeGeneratorARM64(graph, compiler_options); } #endif #ifdef ART_ENABLE_CODEGEN_x86 CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) TestCodeGeneratorX86( - graph, *features_x86.get(), compiler_options); + return new (graph->GetAllocator()) TestCodeGeneratorX86(graph, compiler_options); } #endif #ifdef ART_ENABLE_CODEGEN_x86_64 CodeGenerator* create_codegen_x86_64(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( - X86_64InstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) - x86_64::CodeGeneratorX86_64(graph, *features_x86_64.get(), compiler_options); + return new (graph->GetAllocator()) x86_64::CodeGeneratorX86_64(graph, compiler_options); } #endif #ifdef ART_ENABLE_CODEGEN_mips CodeGenerator* create_codegen_mips(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const MipsInstructionSetFeatures> features_mips( - MipsInstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) - mips::CodeGeneratorMIPS(graph, *features_mips.get(), compiler_options); + return new (graph->GetAllocator()) mips::CodeGeneratorMIPS(graph, compiler_options); } #endif #ifdef ART_ENABLE_CODEGEN_mips64 CodeGenerator* create_codegen_mips64(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64( - Mips64InstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) - mips64::CodeGeneratorMIPS64(graph, *features_mips64.get(), compiler_options); + return new (graph->GetAllocator()) mips64::CodeGeneratorMIPS64(graph, compiler_options); } #endif diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index ed2f8e995d..5556f16740 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -151,23 +151,15 @@ inline vixl::aarch64::CPURegister InputCPURegisterOrZeroRegAt(HInstruction* inst return InputCPURegisterAt(instr, index); } -inline int64_t Int64ConstantFrom(Location location) { - HConstant* instr = location.GetConstant(); - if (instr->IsIntConstant()) { - return instr->AsIntConstant()->GetValue(); - } else if (instr->IsNullConstant()) { - return 0; - } else { - DCHECK(instr->IsLongConstant()) << instr->DebugName(); - return instr->AsLongConstant()->GetValue(); - } +inline int64_t Int64FromLocation(Location location) { + return Int64FromConstant(location.GetConstant()); } inline vixl::aarch64::Operand OperandFrom(Location location, DataType::Type type) { if (location.IsRegister()) { return vixl::aarch64::Operand(RegisterFrom(location, type)); } else { - return vixl::aarch64::Operand(Int64ConstantFrom(location)); + return vixl::aarch64::Operand(Int64FromLocation(location)); } } @@ -234,6 +226,13 @@ inline vixl::aarch64::Operand OperandFromMemOperand( } } +inline bool AddSubCanEncodeAsImmediate(int64_t value) { + // If `value` does not fit but `-value` does, VIXL will automatically use + // the 'opposite' instruction. + return vixl::aarch64::Assembler::IsImmAddSub(value) + || vixl::aarch64::Assembler::IsImmAddSub(-value); +} + inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) { int64_t value = CodeGenerator::GetInt64ValueOf(constant); @@ -249,6 +248,20 @@ inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* return IsUint<8>(value); } + // Code generation for Min/Max: + // Cmp left_op, right_op + // Csel dst, left_op, right_op, cond + if (instr->IsMin() || instr->IsMax()) { + if (constant->GetUses().HasExactlyOneElement()) { + // If value can be encoded as immediate for the Cmp, then let VIXL handle + // the constant generation for the Csel. + return AddSubCanEncodeAsImmediate(value); + } + // These values are encodable as immediates for Cmp and VIXL will use csinc and csinv + // with the zr register as right_op, hence no constant generation is required. + return constant->IsZeroBitPattern() || constant->IsOne() || constant->IsMinusOne(); + } + // For single uses we let VIXL handle the constant generation since it will // use registers that are not managed by the register allocator (wip0, wip1). if (constant->GetUses().HasExactlyOneElement()) { @@ -275,10 +288,7 @@ inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr->IsSub()) << instr->DebugName(); // Uses aliases of ADD/SUB instructions. - // If `value` does not fit but `-value` does, VIXL will automatically use - // the 'opposite' instruction. - return vixl::aarch64::Assembler::IsImmAddSub(value) - || vixl::aarch64::Assembler::IsImmAddSub(-value); + return AddSubCanEncodeAsImmediate(value); } } diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index d27104752b..b1436f863c 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -16,8 +16,6 @@ #include <functional> -#include "arch/x86/instruction_set_features_x86.h" -#include "code_generator_x86.h" #include "constant_folding.h" #include "dead_code_elimination.h" #include "driver/compiler_options.h" @@ -60,9 +58,6 @@ class ConstantFoldingTest : public OptimizingUnitTest { std::string actual_before = printer_before.str(); EXPECT_EQ(expected_before, actual_before); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegenX86(graph_, *features_x86.get(), CompilerOptions()); HConstantFolding(graph_, "constant_folding").Run(); GraphChecker graph_checker_cf(graph_); graph_checker_cf.Run(); diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc index 1a7f9266e9..54bff22e98 100644 --- a/compiler/optimizing/constructor_fence_redundancy_elimination.cc +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc @@ -47,7 +47,7 @@ class CFREVisitor : public HGraphVisitor { candidate_fences_.push_back(constructor_fence); for (size_t input_idx = 0; input_idx < constructor_fence->InputCount(); ++input_idx) { - candidate_fence_targets_.Insert(constructor_fence->InputAt(input_idx)); + candidate_fence_targets_.insert(constructor_fence->InputAt(input_idx)); } } @@ -208,13 +208,13 @@ class CFREVisitor : public HGraphVisitor { // there is no benefit to this extra complexity unless we also reordered // the stores to come later. candidate_fences_.clear(); - candidate_fence_targets_.Clear(); + candidate_fence_targets_.clear(); } // A publishing 'store' is only interesting if the value being stored // is one of the fence `targets` in `candidate_fences`. bool IsInterestingPublishTarget(HInstruction* store_input) const { - return candidate_fence_targets_.Find(store_input) != candidate_fence_targets_.end(); + return candidate_fence_targets_.find(store_input) != candidate_fence_targets_.end(); } void MaybeMerge(HConstructorFence* target, HConstructorFence* src) { diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index adb6ce1187..277453545a 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -16,8 +16,6 @@ #include "dead_code_elimination.h" -#include "arch/x86/instruction_set_features_x86.h" -#include "code_generator_x86.h" #include "driver/compiler_options.h" #include "graph_checker.h" #include "optimizing_unit_test.h" @@ -45,9 +43,6 @@ void DeadCodeEliminationTest::TestCode(const std::vector<uint16_t>& data, std::string actual_before = printer_before.str(); ASSERT_EQ(actual_before, expected_before); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions()); HDeadCodeElimination(graph, nullptr /* stats */, "dead_code_elimination").Run(); GraphChecker graph_checker(graph); graph_checker.Run(); diff --git a/compiler/optimizing/emit_swap_mips_test.cc b/compiler/optimizing/emit_swap_mips_test.cc index b63914faf7..293c1ab3f3 100644 --- a/compiler/optimizing/emit_swap_mips_test.cc +++ b/compiler/optimizing/emit_swap_mips_test.cc @@ -28,11 +28,12 @@ namespace art { class EmitSwapMipsTest : public OptimizingUnitTest { public: void SetUp() OVERRIDE { + instruction_set_ = InstructionSet::kMips; + instruction_set_features_ = MipsInstructionSetFeatures::FromCppDefines(); + OptimizingUnitTest::SetUp(); graph_ = CreateGraph(); - isa_features_ = MipsInstructionSetFeatures::FromCppDefines(); - codegen_ = new (graph_->GetAllocator()) mips::CodeGeneratorMIPS(graph_, - *isa_features_.get(), - CompilerOptions()); + codegen_.reset( + new (graph_->GetAllocator()) mips::CodeGeneratorMIPS(graph_, *compiler_options_)); moves_ = new (GetAllocator()) HParallelMove(GetAllocator()); test_helper_.reset( new AssemblerTestInfrastructure(GetArchitectureString(), @@ -47,8 +48,10 @@ class EmitSwapMipsTest : public OptimizingUnitTest { void TearDown() OVERRIDE { test_helper_.reset(); - isa_features_.reset(); + codegen_.reset(); + graph_ = nullptr; ResetPoolAndAllocator(); + OptimizingUnitTest::TearDown(); } // Get the typically used name for this architecture. @@ -106,10 +109,9 @@ class EmitSwapMipsTest : public OptimizingUnitTest { protected: HGraph* graph_; HParallelMove* moves_; - mips::CodeGeneratorMIPS* codegen_; + std::unique_ptr<mips::CodeGeneratorMIPS> codegen_; mips::MipsAssembler* assembler_; std::unique_ptr<AssemblerTestInfrastructure> test_helper_; - std::unique_ptr<const MipsInstructionSetFeatures> isa_features_; }; TEST_F(EmitSwapMipsTest, TwoRegisters) { diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 4863718518..e6b6326726 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -479,7 +479,10 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { HInstruction* next = current->GetNext(); // Do not kill the set with the side effects of the instruction just now: if // the instruction is GVN'ed, we don't need to kill. - if (current->CanBeMoved()) { + // + // BoundType is a special case example of an instruction which shouldn't be moved but can be + // GVN'ed. + if (current->CanBeMoved() || current->IsBoundType()) { if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) { // For commutative ops, (x op y) will be treated the same as (y op x) // after fixed ordering. diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 6900cd883a..3ba741472e 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -126,7 +126,7 @@ void HInliner::UpdateInliningBudget() { } bool HInliner::Run() { - if (compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits() == 0) { + if (codegen_->GetCompilerOptions().GetInlineMaxCodeUnits() == 0) { // Inlining effectively disabled. return false; } else if (graph_->IsDebuggable()) { @@ -460,9 +460,10 @@ static bool AlwaysThrows(CompilerDriver* const compiler_driver, ArtMethod* metho bool HInliner::TryInline(HInvoke* invoke_instruction) { if (invoke_instruction->IsInvokeUnresolved() || - invoke_instruction->IsInvokePolymorphic()) { - return false; // Don't bother to move further if we know the method is unresolved or an - // invoke-polymorphic. + invoke_instruction->IsInvokePolymorphic() || + invoke_instruction->IsInvokeCustom()) { + return false; // Don't bother to move further if we know the method is unresolved or the + // invocation is polymorphic (invoke-{polymorphic,custom}). } ScopedObjectAccess soa(Thread::Current()); @@ -730,7 +731,7 @@ HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile( offline_profile.dex_references.size()); for (size_t i = 0; i < offline_profile.dex_references.size(); i++) { bool found = false; - for (const DexFile* dex_file : compiler_driver_->GetDexFilesForOatFile()) { + for (const DexFile* dex_file : codegen_->GetCompilerOptions().GetDexFilesForOatFile()) { if (offline_profile.dex_references[i].MatchesDex(dex_file)) { dex_profile_index_to_dex_cache[i] = caller_compilation_unit_.GetClassLinker()->FindDexCache(self, *dex_file); @@ -948,7 +949,7 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, invoke_instruction->GetDexPc(), /* needs_access_check */ false); HLoadClass::LoadKind kind = HSharpening::ComputeLoadClassKind( - load_class, codegen_, compiler_driver_, caller_compilation_unit_); + load_class, codegen_, caller_compilation_unit_); DCHECK(kind != HLoadClass::LoadKind::kInvalid) << "We should always be able to reference a class for inline caches"; // Load kind must be set before inserting the instruction into the graph. @@ -1417,6 +1418,22 @@ size_t HInliner::CountRecursiveCallsOf(ArtMethod* method) const { return count; } +static inline bool MayInline(const CompilerOptions& compiler_options, + const DexFile& inlined_from, + const DexFile& inlined_into) { + if (kIsTargetBuild) { + return true; + } + + // We're not allowed to inline across dex files if we're the no-inline-from dex file. + if (!IsSameDexFile(inlined_from, inlined_into) && + ContainsElement(compiler_options.GetNoInlineFromDexFile(), &inlined_from)) { + return false; + } + + return true; +} + bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* method, ReferenceTypeInfo receiver_type, @@ -1438,8 +1455,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, // Check whether we're allowed to inline. The outermost compilation unit is the relevant // dex file here (though the transitivity of an inline chain would allow checking the calller). - if (!compiler_driver_->MayInline(method->GetDexFile(), - outer_compilation_unit_.GetDexFile())) { + if (!MayInline(codegen_->GetCompilerOptions(), + *method->GetDexFile(), + *outer_compilation_unit_.GetDexFile())) { if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) { LOG_SUCCESS() << "Successfully replaced pattern of invoke " << method->PrettyMethod(); @@ -1464,7 +1482,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits(); + size_t inline_max_code_units = codegen_->GetCompilerOptions().GetInlineMaxCodeUnits(); if (accessor.InsnsSizeInCodeUnits() > inline_max_code_units) { LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCodeItem) << "Method " << method->PrettyMethod() @@ -1765,7 +1783,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, graph_->GetArenaStack(), callee_dex_file, method_index, - compiler_driver_->GetInstructionSet(), + codegen_->GetCompilerOptions().GetInstructionSet(), invoke_type, graph_->IsDebuggable(), /* osr */ false, @@ -1802,8 +1820,8 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, return false; } - if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, - compiler_driver_->GetInstructionSet())) { + if (!RegisterAllocator::CanAllocateRegistersFor( + *callee_graph, codegen_->GetCompilerOptions().GetInstructionSet())) { LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedRegisterAllocator) << "Method " << callee_dex_file.PrettyMethod(method_index) << " cannot be inlined because of the register allocator"; @@ -2004,8 +2022,8 @@ void HInliner::RunOptimizations(HGraph* callee_graph, // optimization that could lead to a HDeoptimize. The following optimizations do not. HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner"); HConstantFolding fold(callee_graph, "constant_folding$inliner"); - HSharpening sharpening(callee_graph, codegen_, compiler_driver_); - InstructionSimplifier simplify(callee_graph, codegen_, compiler_driver_, inline_stats_); + HSharpening sharpening(callee_graph, codegen_); + InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_); IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_); HOptimization* optimizations[] = { diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 24dc2ee9b4..731accd692 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -449,11 +449,7 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) { target_method, HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); RangeInstructionOperands operands(graph_->GetNumberOfVRegs() - in_vregs, in_vregs); - HandleInvoke(invoke, - operands, - dex_file_->GetMethodShorty(method_idx), - /* clinit_check */ nullptr, - /* is_unresolved */ false); + HandleInvoke(invoke, operands, dex_file_->GetMethodShorty(method_idx), /* is_unresolved */ false); // Add the return instruction. if (return_type_ == DataType::Type::kVoid) { @@ -916,11 +912,11 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, uint32_t method_idx, const InstructionOperands& operands) { InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode()); - const char* descriptor = dex_file_->GetMethodShorty(method_idx); - DataType::Type return_type = DataType::FromShorty(descriptor[0]); + const char* shorty = dex_file_->GetMethodShorty(method_idx); + DataType::Type return_type = DataType::FromShorty(shorty[0]); // Remove the return type from the 'proto'. - size_t number_of_arguments = strlen(descriptor) - 1; + size_t number_of_arguments = strlen(shorty) - 1; if (invoke_type != kStatic) { // instance call // One extra argument for 'this'. number_of_arguments++; @@ -937,11 +933,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, dex_pc, method_idx, invoke_type); - return HandleInvoke(invoke, - operands, - descriptor, - nullptr /* clinit_check */, - true /* is_unresolved */); + return HandleInvoke(invoke, operands, shorty, /* is_unresolved */ true); } // Replace calls to String.<init> with StringFactory. @@ -968,7 +960,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, invoke_type, target_method, HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); - return HandleStringInit(invoke, operands, descriptor); + return HandleStringInit(invoke, operands, shorty); } // Potential class initialization check, in the case of a static method call. @@ -1028,29 +1020,39 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, resolved_method, ImTable::GetImtIndex(resolved_method)); } - - return HandleInvoke(invoke, operands, descriptor, clinit_check, false /* is_unresolved */); + return HandleInvoke(invoke, operands, shorty, /* is_unresolved */ false, clinit_check); } -bool HInstructionBuilder::BuildInvokePolymorphic(const Instruction& instruction ATTRIBUTE_UNUSED, - uint32_t dex_pc, +bool HInstructionBuilder::BuildInvokePolymorphic(uint32_t dex_pc, uint32_t method_idx, dex::ProtoIndex proto_idx, const InstructionOperands& operands) { - const char* descriptor = dex_file_->GetShorty(proto_idx); - DCHECK_EQ(1 + ArtMethod::NumArgRegisters(descriptor), operands.GetNumberOfOperands()); - DataType::Type return_type = DataType::FromShorty(descriptor[0]); - size_t number_of_arguments = strlen(descriptor); + const char* shorty = dex_file_->GetShorty(proto_idx); + DCHECK_EQ(1 + ArtMethod::NumArgRegisters(shorty), operands.GetNumberOfOperands()); + DataType::Type return_type = DataType::FromShorty(shorty[0]); + size_t number_of_arguments = strlen(shorty); HInvoke* invoke = new (allocator_) HInvokePolymorphic(allocator_, number_of_arguments, return_type, dex_pc, method_idx); - return HandleInvoke(invoke, - operands, - descriptor, - nullptr /* clinit_check */, - false /* is_unresolved */); + return HandleInvoke(invoke, operands, shorty, /* is_unresolved */ false); +} + + +bool HInstructionBuilder::BuildInvokeCustom(uint32_t dex_pc, + uint32_t call_site_idx, + const InstructionOperands& operands) { + dex::ProtoIndex proto_idx = dex_file_->GetProtoIndexForCallSite(call_site_idx); + const char* shorty = dex_file_->GetShorty(proto_idx); + DataType::Type return_type = DataType::FromShorty(shorty[0]); + size_t number_of_arguments = strlen(shorty) - 1; + HInvoke* invoke = new (allocator_) HInvokeCustom(allocator_, + number_of_arguments, + call_site_idx, + return_type, + dex_pc); + return HandleInvoke(invoke, operands, shorty, /* is_unresolved */ false); } HNewInstance* HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc) { @@ -1197,10 +1199,10 @@ HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke( bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke, const InstructionOperands& operands, - const char* descriptor, + const char* shorty, size_t start_index, size_t* argument_index) { - uint32_t descriptor_index = 1; // Skip the return type. + uint32_t shorty_index = 1; // Skip the return type. const size_t number_of_operands = operands.GetNumberOfOperands(); for (size_t i = start_index; // Make sure we don't go over the expected arguments or over the number of @@ -1208,7 +1210,7 @@ bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke, // it hasn't been properly checked. (i < number_of_operands) && (*argument_index < invoke->GetNumberOfArguments()); i++, (*argument_index)++) { - DataType::Type type = DataType::FromShorty(descriptor[descriptor_index++]); + DataType::Type type = DataType::FromShorty(shorty[shorty_index++]); bool is_wide = (type == DataType::Type::kInt64) || (type == DataType::Type::kFloat64); if (is_wide && ((i + 1 == number_of_operands) || (operands.GetOperand(i) + 1 != operands.GetOperand(i + 1)))) { @@ -1250,9 +1252,9 @@ bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke, bool HInstructionBuilder::HandleInvoke(HInvoke* invoke, const InstructionOperands& operands, - const char* descriptor, - HClinitCheck* clinit_check, - bool is_unresolved) { + const char* shorty, + bool is_unresolved, + HClinitCheck* clinit_check) { DCHECK(!invoke->IsInvokeStaticOrDirect() || !invoke->AsInvokeStaticOrDirect()->IsStringInit()); size_t start_index = 0; @@ -1267,7 +1269,7 @@ bool HInstructionBuilder::HandleInvoke(HInvoke* invoke, argument_index = 1; } - if (!SetupInvokeArguments(invoke, operands, descriptor, start_index, &argument_index)) { + if (!SetupInvokeArguments(invoke, operands, shorty, start_index, &argument_index)) { return false; } @@ -1288,13 +1290,13 @@ bool HInstructionBuilder::HandleInvoke(HInvoke* invoke, bool HInstructionBuilder::HandleStringInit(HInvoke* invoke, const InstructionOperands& operands, - const char* descriptor) { + const char* shorty) { DCHECK(invoke->IsInvokeStaticOrDirect()); DCHECK(invoke->AsInvokeStaticOrDirect()->IsStringInit()); size_t start_index = 1; size_t argument_index = 0; - if (!SetupInvokeArguments(invoke, operands, descriptor, start_index, &argument_index)) { + if (!SetupInvokeArguments(invoke, operands, shorty, start_index, &argument_index)) { return false; } @@ -1306,28 +1308,25 @@ bool HInstructionBuilder::HandleStringInit(HInvoke* invoke, HInstruction* arg_this = LoadLocal(orig_this_reg, DataType::Type::kReference); // Replacing the NewInstance might render it redundant. Keep a list of these - // to be visited once it is clear whether it is has remaining uses. + // to be visited once it is clear whether it has remaining uses. if (arg_this->IsNewInstance()) { ssa_builder_->AddUninitializedString(arg_this->AsNewInstance()); + // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`. + for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) { + if ((*current_locals_)[vreg] == arg_this) { + (*current_locals_)[vreg] = invoke; + } + } } else { - // The only reason a HPhi can flow in a String.<init> is when there is an - // irreducible loop, which will create HPhi for all dex registers at loop entry. DCHECK(arg_this->IsPhi()); - DCHECK(graph_->HasIrreducibleLoops()); - // Don't bother compiling a method in that situation. While we could look at all - // phis related to the HNewInstance, it's not worth the trouble. - MaybeRecordStat(compilation_stats_, - MethodCompilationStat::kNotCompiledIrreducibleAndStringInit); - return false; - } - - // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`. - for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) { - if ((*current_locals_)[vreg] == arg_this) { - (*current_locals_)[vreg] = invoke; - } + // We can get a phi as input of a String.<init> if there is a loop between the + // allocation and the String.<init> call. As we don't know which other phis might alias + // with `arg_this`, we keep a record of these phis and will analyze their inputs and + // uses once the inputs and users are populated (in ssa_builder.cc). + // Note: we only do this for phis, as it is a somewhat more expensive operation than + // what we're doing above when the input is the `HNewInstance`. + ssa_builder_->AddUninitializedStringPhi(arg_this->AsPhi(), invoke); } - return true; } @@ -1774,7 +1773,6 @@ void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_ new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc); HSharpening::ProcessLoadString(load_string, code_generator_, - compiler_driver_, *dex_compilation_unit_, handles_); AppendInstruction(load_string); @@ -1816,7 +1814,6 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, HLoadClass::LoadKind load_kind = HSharpening::ComputeLoadClassKind(load_class, code_generator_, - compiler_driver_, *dex_compilation_unit_); if (load_kind == HLoadClass::LoadKind::kInvalid) { @@ -1876,7 +1873,7 @@ void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction, Handle<mirror::Class> klass = ResolveClass(soa, type_index); bool needs_access_check = LoadClassNeedsAccessCheck(klass); TypeCheckKind check_kind = HSharpening::ComputeTypeCheckKind( - klass.Get(), code_generator_, compiler_driver_, needs_access_check); + klass.Get(), code_generator_, needs_access_check); HInstruction* class_or_null = nullptr; HIntConstant* bitstring_path_to_root = nullptr; @@ -2144,14 +2141,28 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, uint32_t args[5]; uint32_t number_of_vreg_arguments = instruction.GetVarArgs(args); VarArgsInstructionOperands operands(args, number_of_vreg_arguments); - return BuildInvokePolymorphic(instruction, dex_pc, method_idx, proto_idx, operands); + return BuildInvokePolymorphic(dex_pc, method_idx, proto_idx, operands); } case Instruction::INVOKE_POLYMORPHIC_RANGE: { uint16_t method_idx = instruction.VRegB_4rcc(); dex::ProtoIndex proto_idx(instruction.VRegH_4rcc()); RangeInstructionOperands operands(instruction.VRegC_4rcc(), instruction.VRegA_4rcc()); - return BuildInvokePolymorphic(instruction, dex_pc, method_idx, proto_idx, operands); + return BuildInvokePolymorphic(dex_pc, method_idx, proto_idx, operands); + } + + case Instruction::INVOKE_CUSTOM: { + uint16_t call_site_idx = instruction.VRegB_35c(); + uint32_t args[5]; + uint32_t number_of_vreg_arguments = instruction.GetVarArgs(args); + VarArgsInstructionOperands operands(args, number_of_vreg_arguments); + return BuildInvokeCustom(dex_pc, call_site_idx, operands); + } + + case Instruction::INVOKE_CUSTOM_RANGE: { + uint16_t call_site_idx = instruction.VRegB_3rc(); + RangeInstructionOperands operands(instruction.VRegC_3rc(), instruction.VRegA_3rc()); + return BuildInvokeCustom(dex_pc, call_site_idx, operands); } case Instruction::NEG_INT: { @@ -2933,7 +2944,21 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, break; } - default: + case Instruction::UNUSED_3E: + case Instruction::UNUSED_3F: + case Instruction::UNUSED_40: + case Instruction::UNUSED_41: + case Instruction::UNUSED_42: + case Instruction::UNUSED_43: + case Instruction::UNUSED_79: + case Instruction::UNUSED_7A: + case Instruction::UNUSED_F3: + case Instruction::UNUSED_F4: + case Instruction::UNUSED_F5: + case Instruction::UNUSED_F6: + case Instruction::UNUSED_F7: + case Instruction::UNUSED_F8: + case Instruction::UNUSED_F9: { VLOG(compiler) << "Did not compile " << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex()) << " because of unhandled instruction " @@ -2941,6 +2966,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, MaybeRecordStat(compilation_stats_, MethodCompilationStat::kNotCompiledUnhandledInstruction); return false; + } } return true; } // NOLINT(readability/fn_size) diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 2218a691ea..af7092a0cf 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -173,12 +173,17 @@ class HInstructionBuilder : public ValueObject { // Builds an invocation node for invoke-polymorphic and returns whether the // instruction is supported. - bool BuildInvokePolymorphic(const Instruction& instruction, - uint32_t dex_pc, + bool BuildInvokePolymorphic(uint32_t dex_pc, uint32_t method_idx, dex::ProtoIndex proto_idx, const InstructionOperands& operands); + // Builds an invocation node for invoke-custom and returns whether the + // instruction is supported. + bool BuildInvokeCustom(uint32_t dex_pc, + uint32_t call_site_idx, + const InstructionOperands& operands); + // Builds a new array node and the instructions that fill it. HNewArray* BuildFilledNewArray(uint32_t dex_pc, dex::TypeIndex type_index, @@ -253,19 +258,19 @@ class HInstructionBuilder : public ValueObject { bool SetupInvokeArguments(HInvoke* invoke, const InstructionOperands& operands, - const char* descriptor, + const char* shorty, size_t start_index, size_t* argument_index); bool HandleInvoke(HInvoke* invoke, const InstructionOperands& operands, - const char* descriptor, - HClinitCheck* clinit_check, - bool is_unresolved); + const char* shorty, + bool is_unresolved, + HClinitCheck* clinit_check = nullptr); bool HandleStringInit(HInvoke* invoke, const InstructionOperands& operands, - const char* descriptor); + const char* shorty); void HandleStringInitResult(HInvokeStaticOrDirect* invoke); HClinitCheck* ProcessClinitCheckForInvoke( diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 63704a470e..70af49f8f0 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -36,11 +36,9 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { public: InstructionSimplifierVisitor(HGraph* graph, CodeGenerator* codegen, - CompilerDriver* compiler_driver, OptimizingCompilerStats* stats) : HGraphDelegateVisitor(graph), codegen_(codegen), - compiler_driver_(compiler_driver), stats_(stats) {} bool Run(); @@ -117,6 +115,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void SimplifyFP2Int(HInvoke* invoke); void SimplifyStringCharAt(HInvoke* invoke); void SimplifyStringIsEmptyOrLength(HInvoke* invoke); + void SimplifyStringIndexOf(HInvoke* invoke); void SimplifyNPEOnArgN(HInvoke* invoke, size_t); void SimplifyReturnThis(HInvoke* invoke); void SimplifyAllocationIntrinsic(HInvoke* invoke); @@ -126,7 +125,6 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void SimplifyAbs(HInvoke* invoke, DataType::Type type); CodeGenerator* codegen_; - CompilerDriver* compiler_driver_; OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; int simplifications_at_current_position_ = 0; @@ -143,7 +141,7 @@ bool InstructionSimplifier::Run() { visitor.VisitReversePostOrder(); } - InstructionSimplifierVisitor visitor(graph_, codegen_, compiler_driver_, stats_); + InstructionSimplifierVisitor visitor(graph_, codegen_, stats_); return visitor.Run(); } @@ -637,8 +635,8 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { return; } - // Note: The `outcome` is initialized to please valgrind - the compiler can reorder - // the return value check with the `outcome` check, b/27651442 . + // Historical note: The `outcome` was initialized to please Valgrind - the compiler can reorder + // the return value check with the `outcome` check, b/27651442. bool outcome = false; if (TypeCheckHasKnownOutcome(check_cast->GetTargetClassRTI(), object, &outcome)) { if (outcome) { @@ -683,8 +681,8 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { return; } - // Note: The `outcome` is initialized to please valgrind - the compiler can reorder - // the return value check with the `outcome` check, b/27651442 . + // Historical note: The `outcome` was initialized to please Valgrind - the compiler can reorder + // the return value check with the `outcome` check, b/27651442. bool outcome = false; if (TypeCheckHasKnownOutcome(instruction->GetTargetClassRTI(), object, &outcome)) { MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf); @@ -2308,7 +2306,7 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) // the invoke, as we would need to look it up in the current dex file, and it // is unlikely that it exists. The most usual situation for such typed // arraycopy methods is a direct pointer to the boot image. - HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_, compiler_driver_); + HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_); } } } @@ -2417,6 +2415,43 @@ void InstructionSimplifierVisitor::SimplifyStringIsEmptyOrLength(HInvoke* invoke invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, replacement); } +void InstructionSimplifierVisitor::SimplifyStringIndexOf(HInvoke* invoke) { + DCHECK(invoke->GetIntrinsic() == Intrinsics::kStringIndexOf || + invoke->GetIntrinsic() == Intrinsics::kStringIndexOfAfter); + if (invoke->InputAt(0)->IsLoadString()) { + HLoadString* load_string = invoke->InputAt(0)->AsLoadString(); + const DexFile& dex_file = load_string->GetDexFile(); + uint32_t utf16_length; + const char* data = + dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), &utf16_length); + if (utf16_length == 0) { + invoke->ReplaceWith(GetGraph()->GetIntConstant(-1)); + invoke->GetBlock()->RemoveInstruction(invoke); + RecordSimplification(); + return; + } + if (utf16_length == 1 && invoke->GetIntrinsic() == Intrinsics::kStringIndexOf) { + // Simplify to HSelect(HEquals(., load_string.charAt(0)), 0, -1). + // If the sought character is supplementary, this gives the correct result, i.e. -1. + uint32_t c = GetUtf16FromUtf8(&data); + DCHECK_EQ(GetTrailingUtf16Char(c), 0u); + DCHECK_EQ(GetLeadingUtf16Char(c), c); + uint32_t dex_pc = invoke->GetDexPc(); + ArenaAllocator* allocator = GetGraph()->GetAllocator(); + HEqual* equal = + new (allocator) HEqual(invoke->InputAt(1), GetGraph()->GetIntConstant(c), dex_pc); + invoke->GetBlock()->InsertInstructionBefore(equal, invoke); + HSelect* result = new (allocator) HSelect(equal, + GetGraph()->GetIntConstant(0), + GetGraph()->GetIntConstant(-1), + dex_pc); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, result); + RecordSimplification(); + return; + } + } +} + // This method should only be used on intrinsics whose sole way of throwing an // exception is raising a NPE when the nth argument is null. If that argument // is provably non-null, we can clear the flag. @@ -2554,6 +2589,10 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { case Intrinsics::kStringLength: SimplifyStringIsEmptyOrLength(instruction); break; + case Intrinsics::kStringIndexOf: + case Intrinsics::kStringIndexOfAfter: + SimplifyStringIndexOf(instruction); + break; case Intrinsics::kStringStringIndexOf: case Intrinsics::kStringStringIndexOfAfter: SimplifyNPEOnArgN(instruction, 1); // 0th has own NullCheck diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index f409e873de..2d134e0067 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -24,7 +24,6 @@ namespace art { class CodeGenerator; -class CompilerDriver; /** * Implements optimizations specific to each instruction. @@ -40,12 +39,10 @@ class InstructionSimplifier : public HOptimization { public: InstructionSimplifier(HGraph* graph, CodeGenerator* codegen, - CompilerDriver* compiler_driver, OptimizingCompilerStats* stats = nullptr, const char* name = kInstructionSimplifierPassName) : HOptimization(graph, name, stats), - codegen_(codegen), - compiler_driver_(compiler_driver) {} + codegen_(codegen) {} static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier"; @@ -53,7 +50,6 @@ class InstructionSimplifier : public HOptimization { private: CodeGenerator* codegen_; - CompilerDriver* compiler_driver_; DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier); }; diff --git a/compiler/optimizing/instruction_simplifier_x86.cc b/compiler/optimizing/instruction_simplifier_x86.cc new file mode 100644 index 0000000000..b3f67d6e84 --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_x86.cc @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "instruction_simplifier_x86.h" +#include "arch/x86/instruction_set_features_x86.h" +#include "mirror/array-inl.h" +#include "code_generator.h" + + +namespace art { + +namespace x86 { + +class InstructionSimplifierX86Visitor : public HGraphVisitor { + public: + InstructionSimplifierX86Visitor(HGraph* graph, + CodeGeneratorX86 *codegen, + OptimizingCompilerStats* stats) + : HGraphVisitor(graph), codegen_(codegen), stats_(stats) {} + + private: + void RecordSimplification() { + MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch); + } + + bool HasCpuFeatureFlag() { + return (codegen_->GetInstructionSetFeatures().HasAVX2()); + } + + /** + * This simplifier uses a special-purpose BB visitor. + * (1) No need to visit Phi nodes. + * (2) Since statements can be removed in a "forward" fashion, + * the visitor should test if each statement is still there. + */ + void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + // TODO: fragile iteration, provide more robust iterators? + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (instruction->IsInBlock()) { + instruction->Accept(this); + } + } + } + + bool TryGenerateVecMultiplyAccumulate(HVecMul* mul); + void VisitVecMul(HVecMul* instruction) OVERRIDE; + + CodeGeneratorX86* codegen_; + OptimizingCompilerStats* stats_; +}; + +/* generic expressions for FMA +a = (b * c) + a +a = (b * c) – a +*/ +bool InstructionSimplifierX86Visitor::TryGenerateVecMultiplyAccumulate(HVecMul* mul) { + if (!(mul->GetPackedType() == DataType::Type::kFloat32 || + mul->GetPackedType() == DataType::Type::kFloat64)) { + return false; + } + ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator(); + if (mul->HasOnlyOneNonEnvironmentUse()) { + HInstruction* use = mul->GetUses().front().GetUser(); + if (use->IsVecAdd() || use->IsVecSub()) { + // Replace code looking like + // VECMUL tmp, x, y + // VECADD dst, acc, tmp or VECADD dst, tmp, acc + // or + // VECSUB dst, tmp, acc + // with + // VECMULACC dst, acc, x, y + + // Note that we do not want to (unconditionally) perform the merge when the + // multiplication has multiple uses and it can be merged in all of them. + // Multiple uses could happen on the same control-flow path, and we would + // then increase the amount of work. In the future we could try to evaluate + // whether all uses are on different control-flow paths (using dominance and + // reverse-dominance information) and only perform the merge when they are. + HInstruction* accumulator = nullptr; + HVecBinaryOperation* binop = use->AsVecBinaryOperation(); + HInstruction* binop_left = binop->GetLeft(); + HInstruction* binop_right = binop->GetRight(); + DCHECK_NE(binop_left, binop_right); + if (use->IsVecSub()) { + if (binop_left == mul) { + accumulator = binop_right; + } + } else { + // VecAdd + if (binop_right == mul) { + accumulator = binop_left; + } else { + DCHECK_EQ(binop_left, mul); + accumulator = binop_right; + } + } + HInstruction::InstructionKind kind = + use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub; + + if (accumulator != nullptr) { + HVecMultiplyAccumulate* mulacc = + new (allocator) HVecMultiplyAccumulate(allocator, + kind, + accumulator, + mul->GetLeft(), + mul->GetRight(), + binop->GetPackedType(), + binop->GetVectorLength(), + binop->GetDexPc()); + binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); + DCHECK(!mul->HasUses()); + mul->GetBlock()->RemoveInstruction(mul); + return true; + } + } + } + return false; +} + +void InstructionSimplifierX86Visitor::VisitVecMul(HVecMul* instruction) { + if (HasCpuFeatureFlag()) { + if (TryGenerateVecMultiplyAccumulate(instruction)) { + RecordSimplification(); + } + } +} + +bool InstructionSimplifierX86::Run() { + InstructionSimplifierX86Visitor visitor(graph_, codegen_, stats_); + visitor.VisitReversePostOrder(); + return true; +} + +} // namespace x86 +} // namespace art diff --git a/compiler/optimizing/instruction_simplifier_x86.h b/compiler/optimizing/instruction_simplifier_x86.h new file mode 100644 index 0000000000..1fb199f728 --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_x86.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ +#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ + +#include "nodes.h" +#include "optimization.h" +#include "code_generator_x86.h" + +namespace art { +namespace x86 { + +class InstructionSimplifierX86 : public HOptimization { + public: + InstructionSimplifierX86(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HOptimization(graph, kInstructionSimplifierX86PassName, stats), + codegen_(down_cast<CodeGeneratorX86*>(codegen)) {} + + static constexpr const char* kInstructionSimplifierX86PassName = "instruction_simplifier_x86"; + + bool Run() OVERRIDE; + + private: + CodeGeneratorX86* codegen_; +}; + +} // namespace x86 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ diff --git a/compiler/optimizing/intrinsic_objects.cc b/compiler/optimizing/intrinsic_objects.cc new file mode 100644 index 0000000000..3c20ad698b --- /dev/null +++ b/compiler/optimizing/intrinsic_objects.cc @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsic_objects.h" + +#include "art_field-inl.h" +#include "base/logging.h" +#include "class_root.h" +#include "handle.h" +#include "obj_ptr-inl.h" +#include "mirror/object_array-inl.h" + +namespace art { + +static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache(Thread* self, + ClassLinker* class_linker) + REQUIRES_SHARED(Locks::mutator_lock_) { + ObjPtr<mirror::Class> integer_cache_class = class_linker->LookupClass( + self, "Ljava/lang/Integer$IntegerCache;", /* class_linker */ nullptr); + if (integer_cache_class == nullptr || !integer_cache_class->IsInitialized()) { + return nullptr; + } + ArtField* cache_field = + integer_cache_class->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;"); + CHECK(cache_field != nullptr); + ObjPtr<mirror::ObjectArray<mirror::Object>> integer_cache = + ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast( + cache_field->GetObject(integer_cache_class)); + CHECK(integer_cache != nullptr); + return integer_cache; +} + +ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::AllocateBootImageLiveObjects( + Thread* self, + ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_) { + // The objects used for the Integer.valueOf() intrinsic must remain live even if references + // to them are removed using reflection. Image roots are not accessible through reflection, + // so the array we construct here shall keep them alive. + StackHandleScope<1> hs(self); + Handle<mirror::ObjectArray<mirror::Object>> integer_cache = + hs.NewHandle(LookupIntegerCache(self, class_linker)); + size_t live_objects_size = + (integer_cache != nullptr) ? (/* cache */ 1u + integer_cache->GetLength()) : 0u; + ObjPtr<mirror::ObjectArray<mirror::Object>> live_objects = + mirror::ObjectArray<mirror::Object>::Alloc( + self, GetClassRoot<mirror::ObjectArray<mirror::Object>>(class_linker), live_objects_size); + int32_t index = 0; + if (integer_cache != nullptr) { + live_objects->Set(index++, integer_cache.Get()); + for (int32_t i = 0, length = integer_cache->GetLength(); i != length; ++i) { + live_objects->Set(index++, integer_cache->Get(i)); + } + } + CHECK_EQ(index, live_objects->GetLength()); + + if (kIsDebugBuild && integer_cache != nullptr) { + CHECK_EQ(integer_cache.Get(), GetIntegerValueOfCache(live_objects)); + for (int32_t i = 0, len = integer_cache->GetLength(); i != len; ++i) { + CHECK_EQ(integer_cache->GetWithoutChecks(i), GetIntegerValueOfObject(live_objects, i)); + } + } + return live_objects; +} + +ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::GetIntegerValueOfCache( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) { + DCHECK(boot_image_live_objects != nullptr); + if (boot_image_live_objects->GetLength() == 0u) { + return nullptr; // No intrinsic objects. + } + // No need for read barrier for boot image object or for verifying the value that was just stored. + ObjPtr<mirror::Object> result = + boot_image_live_objects->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>(0); + DCHECK(result != nullptr); + DCHECK(result->IsObjectArray()); + DCHECK(result->GetClass()->DescriptorEquals("[Ljava/lang/Integer;")); + return ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast(result); +} + +ObjPtr<mirror::Object> IntrinsicObjects::GetIntegerValueOfObject( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, + uint32_t index) { + DCHECK(boot_image_live_objects != nullptr); + DCHECK_NE(boot_image_live_objects->GetLength(), 0); + DCHECK_LT(index, + static_cast<uint32_t>(GetIntegerValueOfCache(boot_image_live_objects)->GetLength())); + + // No need for read barrier for boot image object or for verifying the value that was just stored. + ObjPtr<mirror::Object> result = + boot_image_live_objects->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>( + /* skip the IntegerCache.cache */ 1u + index); + DCHECK(result != nullptr); + DCHECK(result->GetClass()->DescriptorEquals("Ljava/lang/Integer;")); + return result; +} + +MemberOffset IntrinsicObjects::GetIntegerValueOfArrayDataOffset( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) { + DCHECK_NE(boot_image_live_objects->GetLength(), 0); + MemberOffset result = mirror::ObjectArray<mirror::Object>::OffsetOfElement(1u); + DCHECK_EQ(GetIntegerValueOfObject(boot_image_live_objects, 0u), + (boot_image_live_objects + ->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(result))); + return result; +} + +} // namespace art diff --git a/compiler/optimizing/intrinsic_objects.h b/compiler/optimizing/intrinsic_objects.h new file mode 100644 index 0000000000..863017be38 --- /dev/null +++ b/compiler/optimizing/intrinsic_objects.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSIC_OBJECTS_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSIC_OBJECTS_H_ + +#include "base/bit_field.h" +#include "base/bit_utils.h" +#include "base/mutex.h" + +namespace art { + +class ClassLinker; +template <class MirrorType> class ObjPtr; +class MemberOffset; +class Thread; + +namespace mirror { +class Object; +template <class T> class ObjectArray; +} // namespace mirror + +class IntrinsicObjects { + public: + enum class PatchType { + kIntegerValueOfObject, + kIntegerValueOfArray, + + kLast = kIntegerValueOfArray + }; + + static uint32_t EncodePatch(PatchType patch_type, uint32_t index = 0u) { + DCHECK(patch_type == PatchType::kIntegerValueOfObject || index == 0u); + return PatchTypeField::Encode(static_cast<uint32_t>(patch_type)) | IndexField::Encode(index); + } + + static PatchType DecodePatchType(uint32_t intrinsic_data) { + return static_cast<PatchType>(PatchTypeField::Decode(intrinsic_data)); + } + + static uint32_t DecodePatchIndex(uint32_t intrinsic_data) { + return IndexField::Decode(intrinsic_data); + } + + static ObjPtr<mirror::ObjectArray<mirror::Object>> AllocateBootImageLiveObjects( + Thread* self, + ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_); + + // Functions for retrieving data for Integer.valueOf(). + static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerValueOfCache( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) + REQUIRES_SHARED(Locks::mutator_lock_); + static ObjPtr<mirror::Object> GetIntegerValueOfObject( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, + uint32_t index) REQUIRES_SHARED(Locks::mutator_lock_); + static MemberOffset GetIntegerValueOfArrayDataOffset( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) + REQUIRES_SHARED(Locks::mutator_lock_); + + private: + static constexpr size_t kPatchTypeBits = + MinimumBitsToStore(static_cast<uint32_t>(PatchType::kLast)); + static constexpr size_t kIndexBits = BitSizeOf<uint32_t>() - kPatchTypeBits; + using PatchTypeField = BitField<uint32_t, 0u, kPatchTypeBits>; + using IndexField = BitField<uint32_t, kPatchTypeBits, kIndexBits>; +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSIC_OBJECTS_H_ diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 056f533398..21efe11f31 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -21,10 +21,12 @@ #include "base/utils.h" #include "class_linker.h" #include "dex/invoke_type.h" -#include "driver/compiler_driver.h" #include "driver/compiler_options.h" -#include "mirror/dex_cache-inl.h" +#include "gc/space/image_space.h" +#include "image-inl.h" +#include "intrinsic_objects.h" #include "nodes.h" +#include "obj_ptr-inl.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" @@ -142,6 +144,7 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) case kSuper: case kInterface: case kPolymorphic: + case kCustom: return false; } LOG(FATAL) << "Unknown intrinsic invoke type: " << intrinsic_type; @@ -220,112 +223,315 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { return os; } +static const char kIntegerCacheDescriptor[] = "Ljava/lang/Integer$IntegerCache;"; +static const char kIntegerDescriptor[] = "Ljava/lang/Integer;"; +static const char kIntegerArrayDescriptor[] = "[Ljava/lang/Integer;"; +static const char kLowFieldName[] = "low"; +static const char kHighFieldName[] = "high"; +static const char kValueFieldName[] = "value"; + +static ObjPtr<mirror::ObjectArray<mirror::Object>> GetBootImageLiveObjects() + REQUIRES_SHARED(Locks::mutator_lock_) { + gc::Heap* heap = Runtime::Current()->GetHeap(); + const std::vector<gc::space::ImageSpace*>& boot_image_spaces = heap->GetBootImageSpaces(); + DCHECK(!boot_image_spaces.empty()); + const ImageHeader& main_header = boot_image_spaces[0]->GetImageHeader(); + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects = + ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast( + main_header.GetImageRoot<kWithoutReadBarrier>(ImageHeader::kBootImageLiveObjects)); + DCHECK(boot_image_live_objects != nullptr); + DCHECK(heap->ObjectIsInBootImageSpace(boot_image_live_objects)); + return boot_image_live_objects; +} + +static ObjPtr<mirror::Class> LookupInitializedClass(Thread* self, + ClassLinker* class_linker, + const char* descriptor) + REQUIRES_SHARED(Locks::mutator_lock_) { + ObjPtr<mirror::Class> klass = + class_linker->LookupClass(self, descriptor, /* class_loader */ nullptr); + DCHECK(klass != nullptr); + DCHECK(klass->IsInitialized()); + return klass; +} + +static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerCacheArray( + ObjPtr<mirror::Class> cache_class) REQUIRES_SHARED(Locks::mutator_lock_) { + ArtField* cache_field = cache_class->FindDeclaredStaticField("cache", kIntegerArrayDescriptor); + DCHECK(cache_field != nullptr); + return ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast(cache_field->GetObject(cache_class)); +} + +static int32_t GetIntegerCacheField(ObjPtr<mirror::Class> cache_class, const char* field_name) + REQUIRES_SHARED(Locks::mutator_lock_) { + ArtField* field = cache_class->FindDeclaredStaticField(field_name, "I"); + DCHECK(field != nullptr); + return field->GetInt(cache_class); +} + +static bool CheckIntegerCache(Thread* self, + ClassLinker* class_linker, + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_cache) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(boot_image_cache != nullptr); + + // Since we have a cache in the boot image, both java.lang.Integer and + // java.lang.Integer$IntegerCache must be initialized in the boot image. + ObjPtr<mirror::Class> cache_class = + LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor); + ObjPtr<mirror::Class> integer_class = + LookupInitializedClass(self, class_linker, kIntegerDescriptor); + + // Check that the current cache is the same as the `boot_image_cache`. + ObjPtr<mirror::ObjectArray<mirror::Object>> current_cache = GetIntegerCacheArray(cache_class); + if (current_cache != boot_image_cache) { + return false; // Messed up IntegerCache.cache. + } + + // Check that the range matches the boot image cache length. + int32_t low = GetIntegerCacheField(cache_class, kLowFieldName); + int32_t high = GetIntegerCacheField(cache_class, kHighFieldName); + if (boot_image_cache->GetLength() != high - low + 1) { + return false; // Messed up IntegerCache.low or IntegerCache.high. + } + + // Check that the elements match the boot image intrinsic objects and check their values as well. + ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); + DCHECK(value_field != nullptr); + for (int32_t i = 0, len = boot_image_cache->GetLength(); i != len; ++i) { + ObjPtr<mirror::Object> boot_image_object = + IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, i); + DCHECK(Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boot_image_object)); + // No need for read barrier for comparison with a boot image object. + ObjPtr<mirror::Object> current_object = + boot_image_cache->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>(i); + if (boot_image_object != current_object) { + return false; // Messed up IntegerCache.cache[i] + } + if (value_field->GetInt(boot_image_object) != low + i) { + return false; // Messed up IntegerCache.cache[i].value. + } + } + + return true; +} + void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke, CodeGenerator* codegen, Location return_location, Location first_argument_location) { - if (Runtime::Current()->IsAotCompiler()) { - if (codegen->GetCompilerOptions().IsBootImage() || - codegen->GetCompilerOptions().GetCompilePic()) { - // TODO(ngeoffray): Support boot image compilation. + // The intrinsic will call if it needs to allocate a j.l.Integer. + LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly; + const CompilerOptions& compiler_options = codegen->GetCompilerOptions(); + if (compiler_options.IsBootImage()) { + // Piggyback on the method load kind to determine whether we can use PC-relative addressing. + // This should cover both the testing config (non-PIC boot image) and codegens that reject + // PC-relative load kinds and fall back to the runtime call. + if (!invoke->AsInvokeStaticOrDirect()->HasPcRelativeMethodLoadKind()) { + return; + } + if (!compiler_options.IsImageClass(kIntegerCacheDescriptor) || + !compiler_options.IsImageClass(kIntegerDescriptor)) { + return; + } + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); + ObjPtr<mirror::Class> cache_class = class_linker->LookupClass( + self, kIntegerCacheDescriptor, /* class_loader */ nullptr); + DCHECK(cache_class != nullptr); + if (UNLIKELY(!cache_class->IsInitialized())) { + LOG(WARNING) << "Image class " << cache_class->PrettyDescriptor() << " is uninitialized."; return; } + ObjPtr<mirror::Class> integer_class = + class_linker->LookupClass(self, kIntegerDescriptor, /* class_loader */ nullptr); + DCHECK(integer_class != nullptr); + if (UNLIKELY(!integer_class->IsInitialized())) { + LOG(WARNING) << "Image class " << integer_class->PrettyDescriptor() << " is uninitialized."; + return; + } + int32_t low = GetIntegerCacheField(cache_class, kLowFieldName); + int32_t high = GetIntegerCacheField(cache_class, kHighFieldName); + if (kIsDebugBuild) { + ObjPtr<mirror::ObjectArray<mirror::Object>> current_cache = GetIntegerCacheArray(cache_class); + CHECK(current_cache != nullptr); + CHECK_EQ(current_cache->GetLength(), high - low + 1); + ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); + CHECK(value_field != nullptr); + for (int32_t i = 0, len = current_cache->GetLength(); i != len; ++i) { + ObjPtr<mirror::Object> current_object = current_cache->GetWithoutChecks(i); + CHECK(current_object != nullptr); + CHECK_EQ(value_field->GetInt(current_object), low + i); + } + } + if (invoke->InputAt(0)->IsIntConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) < + static_cast<uint32_t>(high - low + 1)) { + // No call, we shall use direct pointer to the Integer object. + call_kind = LocationSummary::kNoCall; + } + } + } else { + Runtime* runtime = Runtime::Current(); + if (runtime->GetHeap()->GetBootImageSpaces().empty()) { + return; // Running without boot image, cannot use required boot image objects. + } + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects = GetBootImageLiveObjects(); + ObjPtr<mirror::ObjectArray<mirror::Object>> cache = + IntrinsicObjects::GetIntegerValueOfCache(boot_image_live_objects); + if (cache == nullptr) { + return; // No cache in the boot image. + } + if (runtime->UseJitCompilation()) { + if (!CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache)) { + return; // The cache was somehow messed up, probably by using reflection. + } + } else { + DCHECK(runtime->IsAotCompiler()); + DCHECK(CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache)); + if (invoke->InputAt(0)->IsIntConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + // Retrieve the `value` from the lowest cached Integer. + ObjPtr<mirror::Object> low_integer = + IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u); + ObjPtr<mirror::Class> integer_class = + low_integer->GetClass<kVerifyNone, kWithoutReadBarrier>(); + ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); + DCHECK(value_field != nullptr); + int32_t low = value_field->GetInt(low_integer); + if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) < + static_cast<uint32_t>(cache->GetLength())) { + // No call, we shall use direct pointer to the Integer object. Note that we cannot + // do this for JIT as the "low" can change through reflection before emitting the code. + call_kind = LocationSummary::kNoCall; + } + } + } } - IntegerValueOfInfo info = ComputeIntegerValueOfInfo(); - - // Most common case is that we have found all we needed (classes are initialized - // and in the boot image). Bail if not. - if (info.integer_cache == nullptr || - info.integer == nullptr || - info.cache == nullptr || - info.value_offset == 0 || - // low and high cannot be 0, per the spec. - info.low == 0 || - info.high == 0) { - LOG(INFO) << "Integer.valueOf will not be optimized"; - return; + ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); + LocationSummary* locations = new (allocator) LocationSummary(invoke, call_kind, kIntrinsified); + if (call_kind == LocationSummary::kCallOnMainOnly) { + locations->SetInAt(0, Location::RegisterOrConstant(invoke->InputAt(0))); + locations->AddTemp(first_argument_location); + locations->SetOut(return_location); + } else { + locations->SetInAt(0, Location::ConstantLocation(invoke->InputAt(0)->AsConstant())); + locations->SetOut(Location::RequiresRegister()); } +} - // The intrinsic will call if it needs to allocate a j.l.Integer. - LocationSummary* locations = new (invoke->GetBlock()->GetGraph()->GetAllocator()) LocationSummary( - invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); - if (!invoke->InputAt(0)->IsConstant()) { - locations->SetInAt(0, Location::RequiresRegister()); - } - locations->AddTemp(first_argument_location); - locations->SetOut(return_location); +static int32_t GetIntegerCacheLowFromIntegerCache(Thread* self, ClassLinker* class_linker) + REQUIRES_SHARED(Locks::mutator_lock_) { + ObjPtr<mirror::Class> cache_class = + LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor); + return GetIntegerCacheField(cache_class, kLowFieldName); +} + +static uint32_t CalculateBootImageOffset(ObjPtr<mirror::Object> object) + REQUIRES_SHARED(Locks::mutator_lock_) { + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(heap->ObjectIsInBootImageSpace(object)); + return reinterpret_cast<const uint8_t*>(object.Ptr()) - heap->GetBootImageSpaces()[0]->Begin(); } -IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo() { +inline IntrinsicVisitor::IntegerValueOfInfo::IntegerValueOfInfo() + : value_offset(0), + low(0), + length(0u), + integer_boot_image_offset(kInvalidReference), + value_boot_image_reference(kInvalidReference) {} + +IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo( + HInvoke* invoke, const CompilerOptions& compiler_options) { // Note that we could cache all of the data looked up here. but there's no good // location for it. We don't want to add it to WellKnownClasses, to avoid creating global // jni values. Adding it as state to the compiler singleton seems like wrong // separation of concerns. // The need for this data should be pretty rare though. - // The most common case is that the classes are in the boot image and initialized, - // which is easy to generate code for. We bail if not. - Thread* self = Thread::Current(); - ScopedObjectAccess soa(self); + // Note that at this point we can no longer abort the code generation. Therefore, + // we need to provide data that shall not lead to a crash even if the fields were + // modified through reflection since ComputeIntegerValueOfLocations() when JITting. + Runtime* runtime = Runtime::Current(); ClassLinker* class_linker = runtime->GetClassLinker(); - gc::Heap* heap = runtime->GetHeap(); - IntegerValueOfInfo info; - info.integer_cache = - class_linker->FindSystemClass(self, "Ljava/lang/Integer$IntegerCache;").Ptr(); - if (info.integer_cache == nullptr) { - self->ClearException(); - return info; - } - if (!heap->ObjectIsInBootImageSpace(info.integer_cache) || !info.integer_cache->IsInitialized()) { - // Optimization only works if the class is initialized and in the boot image. - return info; - } - info.integer = class_linker->FindSystemClass(self, "Ljava/lang/Integer;").Ptr(); - if (info.integer == nullptr) { - self->ClearException(); - return info; - } - if (!heap->ObjectIsInBootImageSpace(info.integer) || !info.integer->IsInitialized()) { - // Optimization only works if the class is initialized and in the boot image. - return info; - } - - ArtField* field = info.integer_cache->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;"); - if (field == nullptr) { - return info; - } - info.cache = static_cast<mirror::ObjectArray<mirror::Object>*>( - field->GetObject(info.integer_cache).Ptr()); - if (info.cache == nullptr) { - return info; - } - - if (!heap->ObjectIsInBootImageSpace(info.cache)) { - // Optimization only works if the object is in the boot image. - return info; - } - - field = info.integer->FindDeclaredInstanceField("value", "I"); - if (field == nullptr) { - return info; - } - info.value_offset = field->GetOffset().Int32Value(); - - field = info.integer_cache->FindDeclaredStaticField("low", "I"); - if (field == nullptr) { - return info; - } - info.low = field->GetInt(info.integer_cache); + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); - field = info.integer_cache->FindDeclaredStaticField("high", "I"); - if (field == nullptr) { - return info; + IntegerValueOfInfo info; + if (compiler_options.IsBootImage()) { + ObjPtr<mirror::Class> integer_class = + LookupInitializedClass(self, class_linker, kIntegerDescriptor); + ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); + DCHECK(value_field != nullptr); + info.value_offset = value_field->GetOffset().Uint32Value(); + ObjPtr<mirror::Class> cache_class = + LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor); + info.low = GetIntegerCacheField(cache_class, kLowFieldName); + int32_t high = GetIntegerCacheField(cache_class, kHighFieldName); + info.length = dchecked_integral_cast<uint32_t>(high - info.low + 1); + + info.integer_boot_image_offset = IntegerValueOfInfo::kInvalidReference; + if (invoke->InputAt(0)->IsIntConstant()) { + int32_t input_value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + uint32_t index = static_cast<uint32_t>(input_value) - static_cast<uint32_t>(info.low); + if (index < static_cast<uint32_t>(info.length)) { + info.value_boot_image_reference = IntrinsicObjects::EncodePatch( + IntrinsicObjects::PatchType::kIntegerValueOfObject, index); + } else { + // Not in the cache. + info.value_boot_image_reference = IntegerValueOfInfo::kInvalidReference; + } + } else { + info.array_data_boot_image_reference = + IntrinsicObjects::EncodePatch(IntrinsicObjects::PatchType::kIntegerValueOfArray); + } + } else { + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects = GetBootImageLiveObjects(); + ObjPtr<mirror::Object> low_integer = + IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u); + ObjPtr<mirror::Class> integer_class = low_integer->GetClass<kVerifyNone, kWithoutReadBarrier>(); + ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); + DCHECK(value_field != nullptr); + info.value_offset = value_field->GetOffset().Uint32Value(); + if (runtime->UseJitCompilation()) { + // Use the current `IntegerCache.low` for JIT to avoid truly surprising behavior if the + // code messes up the `value` field in the lowest cached Integer using reflection. + info.low = GetIntegerCacheLowFromIntegerCache(self, class_linker); + } else { + // For app AOT, the `low_integer->value` should be the same as `IntegerCache.low`. + info.low = value_field->GetInt(low_integer); + DCHECK_EQ(info.low, GetIntegerCacheLowFromIntegerCache(self, class_linker)); + } + // Do not look at `IntegerCache.high`, use the immutable length of the cache array instead. + info.length = dchecked_integral_cast<uint32_t>( + IntrinsicObjects::GetIntegerValueOfCache(boot_image_live_objects)->GetLength()); + + info.integer_boot_image_offset = CalculateBootImageOffset(integer_class); + if (invoke->InputAt(0)->IsIntConstant()) { + int32_t input_value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + uint32_t index = static_cast<uint32_t>(input_value) - static_cast<uint32_t>(info.low); + if (index < static_cast<uint32_t>(info.length)) { + ObjPtr<mirror::Object> integer = + IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, index); + info.value_boot_image_reference = CalculateBootImageOffset(integer); + } else { + // Not in the cache. + info.value_boot_image_reference = IntegerValueOfInfo::kInvalidReference; + } + } else { + info.array_data_boot_image_reference = + CalculateBootImageOffset(boot_image_live_objects) + + IntrinsicObjects::GetIntegerValueOfArrayDataOffset(boot_image_live_objects).Uint32Value(); + } } - info.high = field->GetInt(info.integer_cache); - DCHECK_EQ(info.cache->GetLength(), info.high - info.low + 1); return info; } diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 30cffac015..993648f765 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -126,33 +126,41 @@ class IntrinsicVisitor : public ValueObject { Location return_location, Location first_argument_location); - // Temporary data structure for holding Integer.valueOf useful data. We only - // use it if the mirror::Class* are in the boot image, so it is fine to keep raw - // mirror::Class pointers in this structure. + // Temporary data structure for holding Integer.valueOf data for generating code. + // We only use it if the boot image contains the IntegerCache objects. struct IntegerValueOfInfo { - IntegerValueOfInfo() - : integer_cache(nullptr), - integer(nullptr), - cache(nullptr), - low(0), - high(0), - value_offset(0) {} - - // The java.lang.IntegerCache class. - mirror::Class* integer_cache; - // The java.lang.Integer class. - mirror::Class* integer; - // Value of java.lang.IntegerCache#cache. - mirror::ObjectArray<mirror::Object>* cache; - // Value of java.lang.IntegerCache#low. + static constexpr uint32_t kInvalidReference = static_cast<uint32_t>(-1); + + IntegerValueOfInfo(); + + // Offset of the Integer.value field for initializing a newly allocated instance. + uint32_t value_offset; + // The low value in the cache. int32_t low; - // Value of java.lang.IntegerCache#high. - int32_t high; - // The offset of java.lang.Integer.value. - int32_t value_offset; + // The length of the cache array. + uint32_t length; + + // Boot image offset of java.lang.Integer for allocating an instance. + uint32_t integer_boot_image_offset; // Set to kInvalidReference when compiling the boot image. + + // This union contains references to the boot image. For app AOT or JIT compilation, + // these are the boot image offsets of the target. For boot image compilation, the + // location shall be known only at link time, so we encode a symbolic reference using + // IntrinsicObjects::EncodePatch(). + union { + // The target value for a constant input in the cache range. If the constant input + // is out of range (use `low` and `length` to check), this value is bogus (set to + // kInvalidReference) and the code must allocate a new Integer. + uint32_t value_boot_image_reference; + + // The cache array data used for a non-constant input in the cache range. + // If the input is out of range, the code must allocate a new Integer. + uint32_t array_data_boot_image_reference; + }; }; - static IntegerValueOfInfo ComputeIntegerValueOfInfo(); + static IntegerValueOfInfo ComputeIntegerValueOfInfo( + HInvoke* invoke, const CompilerOptions& compiler_options); protected: IntrinsicVisitor() {} diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index c3d643a7d1..4b2bcc8ca8 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2791,33 +2791,27 @@ void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); MacroAssembler* masm = GetVIXLAssembler(); Register out = RegisterFrom(locations->Out(), DataType::Type::kReference); UseScratchRegisterScope temps(masm); Register temp = temps.AcquireW(); - InvokeRuntimeCallingConvention calling_convention; - Register argument = calling_convention.GetRegisterAt(0); if (invoke->InputAt(0)->IsConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - uint32_t address = - dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ Mov(temp.W(), value); __ Str(temp.W(), HeapOperand(out.W(), info.value_offset)); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation @@ -2825,16 +2819,15 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } } else { + DCHECK(locations->CanCall()); Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32); // Check bounds of our cache. __ Add(out.W(), in.W(), -info.low); - __ Cmp(out.W(), info.high - info.low + 1); + __ Cmp(out.W(), info.length); vixl::aarch64::Label allocate, done; __ B(&allocate, hs); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address)); + codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference); MemOperand source = HeapOperand( temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference)); codegen_->Load(DataType::Type::kReference, out, source); @@ -2842,10 +2835,8 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { __ B(&done); __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ Str(in.W(), HeapOperand(out.W(), info.value_offset)); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index fecf1ccbfa..f11e5a1989 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -2940,33 +2940,27 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); ArmVIXLAssembler* const assembler = GetAssembler(); vixl32::Register out = RegisterFrom(locations->Out()); UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); - InvokeRuntimeCallingConventionARMVIXL calling_convention; - vixl32::Register argument = calling_convention.GetRegisterAt(0); if (invoke->InputAt(0)->IsConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - uint32_t address = - dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ Mov(temp, value); assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation @@ -2974,25 +2968,22 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } } else { + DCHECK(locations->CanCall()); vixl32::Register in = RegisterFrom(locations->InAt(0)); // Check bounds of our cache. __ Add(out, in, -info.low); - __ Cmp(out, info.high - info.low + 1); + __ Cmp(out, info.length); vixl32::Label allocate, done; __ B(hs, &allocate, /* is_far_target */ false); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address)); + codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference); codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out); assembler->MaybeUnpoisonHeapReference(out); __ B(&done); __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); assembler->StoreToOffset(kStoreWord, in, out, info.value_offset); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index ae248a3e5c..01d9f962f2 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2601,59 +2601,50 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); MipsAssembler* assembler = GetAssembler(); InstructionCodeGeneratorMIPS* icodegen = down_cast<InstructionCodeGeneratorMIPS*>(codegen_->GetInstructionVisitor()); Register out = locations->Out().AsRegister<Register>(); - InvokeRuntimeCallingConvention calling_convention; if (invoke->InputAt(0)->IsConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ LoadConst32(out, address); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - uint32_t address = - dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ LoadConst32(calling_convention.GetRegisterAt(0), address); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } } else { + DCHECK(locations->CanCall()); Register in = locations->InAt(0).AsRegister<Register>(); MipsLabel allocate, done; - int32_t count = static_cast<uint32_t>(info.high) - info.low + 1; - // Is (info.low <= in) && (in <= info.high)? __ Addiu32(out, in, -info.low); - // As unsigned quantities is out < (info.high - info.low + 1)? - if (IsInt<16>(count)) { - __ Sltiu(AT, out, count); + // As unsigned quantities is out < info.length ? + if (IsUint<15>(info.length)) { + __ Sltiu(AT, out, info.length); } else { - __ LoadConst32(AT, count); + __ LoadConst32(AT, info.length); __ Sltu(AT, out, AT); } - // Branch if out >= (info.high - info.low + 1). - // This means that "in" is outside of the range [info.low, info.high]. + // Branch if out >= info.length. This means that "in" is outside of the valid range. __ Beqz(AT, &allocate); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - __ LoadConst32(TMP, data_offset + address); + codegen_->LoadBootImageAddress(TMP, info.array_data_boot_image_reference); __ ShiftAndAdd(out, out, TMP, TIMES_4); __ Lw(out, out, 0); __ MaybeUnpoisonHeapReference(out); @@ -2661,10 +2652,8 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerValueOf(HInvoke* invoke) { __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ LoadConst32(calling_convention.GetRegisterAt(0), address); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ StoreToOffset(kStoreWord, in, out, info.value_offset); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 9a9ae714bc..0bd69c6ec8 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -2267,54 +2267,45 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); Mips64Assembler* assembler = GetAssembler(); InstructionCodeGeneratorMIPS64* icodegen = down_cast<InstructionCodeGeneratorMIPS64*>(codegen_->GetInstructionVisitor()); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - InvokeRuntimeCallingConvention calling_convention; if (invoke->InputAt(0)->IsConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ LoadConst64(out, address); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - uint32_t address = - dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ LoadConst64(calling_convention.GetRegisterAt(0), address); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } } else { + DCHECK(locations->CanCall()); GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); Mips64Label allocate, done; - int32_t count = static_cast<uint32_t>(info.high) - info.low + 1; - // Is (info.low <= in) && (in <= info.high)? __ Addiu32(out, in, -info.low); - // As unsigned quantities is out < (info.high - info.low + 1)? - __ LoadConst32(AT, count); - // Branch if out >= (info.high - info.low + 1). - // This means that "in" is outside of the range [info.low, info.high]. + // As unsigned quantities is out < info.length ? + __ LoadConst32(AT, info.length); + // Branch if out >= info.length . This means that "in" is outside of the valid range. __ Bgeuc(out, AT, &allocate); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - __ LoadConst64(TMP, data_offset + address); + codegen_->LoadBootImageAddress(TMP, info.array_data_boot_image_reference); __ Dlsa(out, out, TMP, TIMES_4); __ Lwu(out, out, 0); __ MaybeUnpoisonHeapReference(out); @@ -2322,10 +2313,8 @@ void IntrinsicCodeGeneratorMIPS64::VisitIntegerValueOf(HInvoke* invoke) { __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ LoadConst64(calling_convention.GetRegisterAt(0), address); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ StoreToOffset(kStoreWord, in, out, info.value_offset); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index f84a33bb8e..98cea35af1 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -2851,16 +2851,30 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) { + DCHECK(invoke->IsInvokeStaticOrDirect()); InvokeRuntimeCallingConvention calling_convention; IntrinsicVisitor::ComputeIntegerValueOfLocations( invoke, codegen_, Location::RegisterLocation(EAX), Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + + LocationSummary* locations = invoke->GetLocations(); + if (locations != nullptr) { + HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + if (invoke_static_or_direct->HasSpecialInput() && + invoke->InputAt(invoke_static_or_direct->GetSpecialInputIndex()) + ->IsX86ComputeBaseMethodAddress()) { + locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(), + Location::RequiresRegister()); + } + } } void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + DCHECK(invoke->IsInvokeStaticOrDirect()); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); X86Assembler* assembler = GetAssembler(); @@ -2868,42 +2882,58 @@ void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { InvokeRuntimeCallingConvention calling_convention; if (invoke->InputAt(0)->IsConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ movl(out, Immediate(address)); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress( + out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect()); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ movl(calling_convention.GetRegisterAt(0), Immediate(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ movl(Address(out, info.value_offset), Immediate(value)); } } else { + DCHECK(locations->CanCall()); Register in = locations->InAt(0).AsRegister<Register>(); // Check bounds of our cache. __ leal(out, Address(in, -info.low)); - __ cmpl(out, Immediate(info.high - info.low + 1)); + __ cmpl(out, Immediate(info.length)); NearLabel allocate, done; __ j(kAboveEqual, &allocate); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - __ movl(out, Address(out, TIMES_4, data_offset + address)); + constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>); + static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>), + "Check heap reference size."); + if (codegen_->GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex(); + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress(); + DCHECK(method_address != nullptr); + Register method_address_reg = + invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>(); + __ movl(out, Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kDummy32BitOffset)); + codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference); + } else { + // Note: We're about to clobber the index in `out`, so we need to use `in` and + // adjust the offset accordingly. + uint32_t mid_array_boot_image_offset = + info.array_data_boot_image_reference - info.low * kElementSize; + codegen_->LoadBootImageAddress( + out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect()); + DCHECK_NE(out, in); + __ movl(out, Address(out, in, TIMES_4, 0)); + } __ MaybeUnpoisonHeapReference(out); __ jmp(&done); __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ movl(calling_convention.GetRegisterAt(0), Immediate(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ movl(Address(out, info.value_offset), in); __ Bind(&done); } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 7627dc9490..ac6eab0834 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2660,58 +2660,49 @@ void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); X86_64Assembler* assembler = GetAssembler(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); InvokeRuntimeCallingConvention calling_convention; - if (invoke->InputAt(0)->IsConstant()) { + CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); + if (invoke->InputAt(0)->IsIntConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ movl(out, Immediate(static_cast<int32_t>(address))); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ movl(argument, Immediate(static_cast<int32_t>(address))); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ movl(Address(out, info.value_offset), Immediate(value)); } } else { + DCHECK(locations->CanCall()); CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>(); // Check bounds of our cache. __ leal(out, Address(in, -info.low)); - __ cmpl(out, Immediate(info.high - info.low + 1)); + __ cmpl(out, Immediate(info.length)); NearLabel allocate, done; __ j(kAboveEqual, &allocate); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - if (data_offset + address <= std::numeric_limits<int32_t>::max()) { - __ movl(out, Address(out, TIMES_4, data_offset + address)); - } else { - CpuRegister temp = CpuRegister(calling_convention.GetRegisterAt(0)); - __ movl(temp, Immediate(static_cast<int32_t>(data_offset + address))); - __ movl(out, Address(temp, out, TIMES_4, 0)); - } + DCHECK_NE(out.AsRegister(), argument.AsRegister()); + codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference); + static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>), + "Check heap reference size."); + __ movl(out, Address(argument, out, TIMES_4, 0)); __ MaybeUnpoisonHeapReference(out); __ jmp(&done); __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ movl(argument, Immediate(static_cast<int32_t>(address))); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ movl(Address(out, info.value_offset), in); __ Bind(&done); } diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 9fa5b74c62..50bfe843b5 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -16,11 +16,9 @@ #include <fstream> -#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" -#include "code_generator_x86.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" #include "driver/compiler_options.h" @@ -43,10 +41,8 @@ template <size_t number_of_blocks> void LinearizeTest::TestCode(const std::vector<uint16_t>& data, const uint32_t (&expected_order)[number_of_blocks]) { HGraph* graph = CreateCFG(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); ASSERT_EQ(graph->GetLinearOrder().size(), number_of_blocks); diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 66660662e4..0fb90fb370 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -14,11 +14,9 @@ * limitations under the License. */ -#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" -#include "code_generator_x86.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" #include "driver/compiler_options.h" @@ -63,10 +61,8 @@ TEST_F(LiveRangesTest, CFG1) { HGraph* graph = BuildGraph(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); @@ -109,10 +105,8 @@ TEST_F(LiveRangesTest, CFG2) { Instruction::RETURN | 0 << 8); HGraph* graph = BuildGraph(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); @@ -158,10 +152,8 @@ TEST_F(LiveRangesTest, CFG3) { Instruction::RETURN | 0 << 8); HGraph* graph = BuildGraph(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); // Test for the 4 constant. @@ -235,10 +227,8 @@ TEST_F(LiveRangesTest, Loop1) { HGraph* graph = BuildGraph(data); RemoveSuspendChecks(graph); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); // Test for the 0 constant. @@ -312,10 +302,8 @@ TEST_F(LiveRangesTest, Loop2) { Instruction::RETURN | 0 << 8); HGraph* graph = BuildGraph(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); // Test for the 0 constant. @@ -388,10 +376,8 @@ TEST_F(LiveRangesTest, CFG4) { Instruction::RETURN); HGraph* graph = BuildGraph(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); // Test for the 0 constant. diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 6621a03568..72f995e773 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -14,11 +14,9 @@ * limitations under the License. */ -#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" -#include "code_generator_x86.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" #include "driver/compiler_options.h" @@ -50,10 +48,8 @@ void LivenessTest::TestCode(const std::vector<uint16_t>& data, const char* expec HGraph* graph = CreateCFG(data); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); std::ostringstream buffer; diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc index a2124455e2..efb23e7d3e 100644 --- a/compiler/optimizing/loop_analysis.cc +++ b/compiler/optimizing/loop_analysis.cc @@ -17,19 +17,34 @@ #include "loop_analysis.h" #include "base/bit_vector-inl.h" +#include "induction_var_range.h" namespace art { void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, - LoopAnalysisInfo* analysis_results) { + LoopAnalysisInfo* analysis_results, + int64_t trip_count) { + analysis_results->trip_count_ = trip_count; + for (HBlocksInLoopIterator block_it(*loop_info); !block_it.Done(); block_it.Advance()) { HBasicBlock* block = block_it.Current(); + // Check whether one of the successor is loop exit. for (HBasicBlock* successor : block->GetSuccessors()) { if (!loop_info->Contains(*successor)) { analysis_results->exits_num_++; + + // We track number of invariant loop exits which correspond to HIf instruction and + // can be eliminated by loop peeling; other control flow instruction are ignored and will + // not cause loop peeling to happen as they either cannot be inside a loop, or by + // definition cannot be loop exits (unconditional instructions), or are not beneficial for + // the optimization. + HIf* hif = block->GetLastInstruction()->AsIf(); + if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) { + analysis_results->invariant_exits_num_++; + } } } @@ -48,20 +63,13 @@ void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, } } -bool LoopAnalysis::HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info) { - HGraph* graph = loop_info->GetHeader()->GetGraph(); - for (uint32_t block_id : loop_info->GetBlocks().Indexes()) { - HBasicBlock* block = graph->GetBlocks()[block_id]; - DCHECK(block != nullptr); - if (block->EndsWithIf()) { - HIf* hif = block->GetLastInstruction()->AsIf(); - HInstruction* input = hif->InputAt(0); - if (IsLoopExit(loop_info, hif) && !loop_info->Contains(*input->GetBlock())) { - return true; - } - } +int64_t LoopAnalysis::GetLoopTripCount(HLoopInformation* loop_info, + const InductionVarRange* induction_range) { + int64_t trip_count; + if (!induction_range->HasKnownTripCount(loop_info, &trip_count)) { + trip_count = LoopAnalysisInfo::kUnknownTripCount; } - return false; + return trip_count; } // Default implementation of loop helper; used for all targets unless a custom implementation @@ -77,18 +85,22 @@ class ArchDefaultLoopHelper : public ArchNoOptsLoopHelper { // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled. static constexpr uint32_t kScalarHeuristicMaxBodySizeBlocks = 6; - bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE { - return loop_analysis_info->HasLongTypeInstructions() || - IsLoopTooBig(loop_analysis_info, + bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* analysis_info) const OVERRIDE { + return analysis_info->HasLongTypeInstructions() || + IsLoopTooBig(analysis_info, kScalarHeuristicMaxBodySizeInstr, kScalarHeuristicMaxBodySizeBlocks); } - uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED, - uint64_t trip_count) const OVERRIDE { + uint32_t GetScalarUnrollingFactor(const LoopAnalysisInfo* analysis_info) const OVERRIDE { + int64_t trip_count = analysis_info->GetTripCount(); + // Unroll only loops with known trip count. + if (trip_count == LoopAnalysisInfo::kUnknownTripCount) { + return LoopAnalysisInfo::kNoUnrollingFactor; + } uint32_t desired_unrolling_factor = kScalarMaxUnrollFactor; if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) { - return kNoUnrollingFactor; + return LoopAnalysisInfo::kNoUnrollingFactor; } return desired_unrolling_factor; @@ -136,12 +148,12 @@ class Arm64LoopHelper : public ArchDefaultLoopHelper { // TODO: Unroll loops with unknown trip count. DCHECK_NE(vector_length, 0u); if (trip_count < (2 * vector_length + max_peel)) { - return kNoUnrollingFactor; + return LoopAnalysisInfo::kNoUnrollingFactor; } // Don't unroll for large loop body size. uint32_t instruction_count = block->GetInstructions().CountSize(); if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) { - return kNoUnrollingFactor; + return LoopAnalysisInfo::kNoUnrollingFactor; } // Find a beneficial unroll factor with the following restrictions: // - At least one iteration of the transformed loop should be executed. diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h index c09d3ff00f..bcb7b70494 100644 --- a/compiler/optimizing/loop_analysis.h +++ b/compiler/optimizing/loop_analysis.h @@ -21,26 +21,33 @@ namespace art { +class InductionVarRange; class LoopAnalysis; -// No loop unrolling factor (just one copy of the loop-body). -static constexpr uint32_t kNoUnrollingFactor = 1; - // Class to hold cached information on properties of the loop. class LoopAnalysisInfo : public ValueObject { public: + // No loop unrolling factor (just one copy of the loop-body). + static constexpr uint32_t kNoUnrollingFactor = 1; + // Used for unknown and non-constant trip counts (see InductionVarRange::HasKnownTripCount). + static constexpr int64_t kUnknownTripCount = -1; + explicit LoopAnalysisInfo(HLoopInformation* loop_info) - : bb_num_(0), + : trip_count_(kUnknownTripCount), + bb_num_(0), instr_num_(0), exits_num_(0), + invariant_exits_num_(0), has_instructions_preventing_scalar_peeling_(false), has_instructions_preventing_scalar_unrolling_(false), has_long_type_instructions_(false), loop_info_(loop_info) {} + int64_t GetTripCount() const { return trip_count_; } size_t GetNumberOfBasicBlocks() const { return bb_num_; } size_t GetNumberOfInstructions() const { return instr_num_; } size_t GetNumberOfExits() const { return exits_num_; } + size_t GetNumberOfInvariantExits() const { return invariant_exits_num_; } bool HasInstructionsPreventingScalarPeeling() const { return has_instructions_preventing_scalar_peeling_; @@ -50,19 +57,27 @@ class LoopAnalysisInfo : public ValueObject { return has_instructions_preventing_scalar_unrolling_; } + bool HasInstructionsPreventingScalarOpts() const { + return HasInstructionsPreventingScalarPeeling() || HasInstructionsPreventingScalarUnrolling(); + } + bool HasLongTypeInstructions() const { return has_long_type_instructions_; } - const HLoopInformation* GetLoopInfo() const { return loop_info_; } + HLoopInformation* GetLoopInfo() const { return loop_info_; } private: + // Trip count of the loop if known, kUnknownTripCount otherwise. + int64_t trip_count_; // Number of basic blocks in the loop body. size_t bb_num_; // Number of instructions in the loop body. size_t instr_num_; // Number of loop's exits. size_t exits_num_; + // Number of "if" loop exits (with HIf instruction) whose condition is loop-invariant. + size_t invariant_exits_num_; // Whether the loop has instructions which make scalar loop peeling non-beneficial. bool has_instructions_preventing_scalar_peeling_; // Whether the loop has instructions which make scalar loop unrolling non-beneficial. @@ -72,7 +87,7 @@ class LoopAnalysisInfo : public ValueObject { bool has_long_type_instructions_; // Corresponding HLoopInformation. - const HLoopInformation* loop_info_; + HLoopInformation* loop_info_; friend class LoopAnalysis; }; @@ -84,20 +99,12 @@ class LoopAnalysis : public ValueObject { // Calculates loops basic properties like body size, exits number, etc. and fills // 'analysis_results' with this information. static void CalculateLoopBasicProperties(HLoopInformation* loop_info, - LoopAnalysisInfo* analysis_results); + LoopAnalysisInfo* analysis_results, + int64_t trip_count); - // Returns whether the loop has at least one loop invariant exit. - static bool HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info); - - // Returns whether HIf's true or false successor is outside the specified loop. - // - // Prerequisite: HIf must be in the specified loop. - static bool IsLoopExit(HLoopInformation* loop_info, const HIf* hif) { - DCHECK(loop_info->Contains(*hif->GetBlock())); - HBasicBlock* true_succ = hif->IfTrueSuccessor(); - HBasicBlock* false_succ = hif->IfFalseSuccessor(); - return (!loop_info->Contains(*true_succ) || !loop_info->Contains(*false_succ)); - } + // Returns the trip count of the loop if it is known and kUnknownTripCount otherwise. + static int64_t GetLoopTripCount(HLoopInformation* loop_info, + const InductionVarRange* induction_range); private: // Returns whether an instruction makes scalar loop peeling/unrolling non-beneficial. @@ -113,9 +120,7 @@ class LoopAnalysis : public ValueObject { instruction->IsUnresolvedStaticFieldGet() || instruction->IsUnresolvedStaticFieldSet() || // TODO: Support loops with intrinsified invokes. - instruction->IsInvoke() || - // TODO: Support loops with ClinitChecks. - instruction->IsClinitCheck()); + instruction->IsInvoke()); } }; @@ -145,9 +150,9 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> { // Returns optimal scalar unrolling factor for the loop. // // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. - virtual uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED, - uint64_t trip_count ATTRIBUTE_UNUSED) const { - return kNoUnrollingFactor; + virtual uint32_t GetScalarUnrollingFactor( + const LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const { + return LoopAnalysisInfo::kNoUnrollingFactor; } // Returns whether scalar loop peeling is enabled, @@ -162,7 +167,7 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> { int64_t trip_count ATTRIBUTE_UNUSED, uint32_t max_peel ATTRIBUTE_UNUSED, uint32_t vector_length ATTRIBUTE_UNUSED) const { - return kNoUnrollingFactor; + return LoopAnalysisInfo::kNoUnrollingFactor; } }; diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index eda6bd1e86..440cd3351e 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -23,7 +23,7 @@ #include "arch/mips64/instruction_set_features_mips64.h" #include "arch/x86/instruction_set_features_x86.h" #include "arch/x86_64/instruction_set_features_x86_64.h" -#include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "linear_order.h" #include "mirror/array-inl.h" #include "mirror/string.h" @@ -427,12 +427,12 @@ static void TryToEvaluateIfCondition(HIf* instruction, HGraph* graph) { // HLoopOptimization::HLoopOptimization(HGraph* graph, - CompilerDriver* compiler_driver, + const CompilerOptions* compiler_options, HInductionVarAnalysis* induction_analysis, OptimizingCompilerStats* stats, const char* name) : HOptimization(graph, name, stats), - compiler_driver_(compiler_driver), + compiler_options_(compiler_options), induction_range_(induction_analysis), loop_allocator_(nullptr), global_allocator_(graph_->GetAllocator()), @@ -454,8 +454,8 @@ HLoopOptimization::HLoopOptimization(HGraph* graph, vector_header_(nullptr), vector_body_(nullptr), vector_index_(nullptr), - arch_loop_helper_(ArchNoOptsLoopHelper::Create(compiler_driver_ != nullptr - ? compiler_driver_->GetInstructionSet() + arch_loop_helper_(ArchNoOptsLoopHelper::Create(compiler_options_ != nullptr + ? compiler_options_->GetInstructionSet() : InstructionSet::kNone, global_allocator_)) { } @@ -744,100 +744,102 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { } bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { - return TryOptimizeInnerLoopFinite(node) || - TryPeelingForLoopInvariantExitsElimination(node) || - TryUnrollingForBranchPenaltyReduction(node); + return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node); } // -// Loop unrolling: generic part methods. +// Scalar loop peeling and unrolling: generic part methods. // -bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* node) { - // Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests) - // as InstructionSet is needed. - if (compiler_driver_ == nullptr) { +bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info, + bool generate_code) { + if (analysis_info->GetNumberOfExits() > 1) { return false; } - HLoopInformation* loop_info = node->loop_info; - int64_t trip_count = 0; - // Only unroll loops with a known tripcount. - if (!induction_range_.HasKnownTripCount(loop_info, &trip_count)) { + uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(analysis_info); + if (unrolling_factor == LoopAnalysisInfo::kNoUnrollingFactor) { return false; } - uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(loop_info, trip_count); - if (unrolling_factor == kNoUnrollingFactor) { - return false; - } + if (generate_code) { + // TODO: support other unrolling factors. + DCHECK_EQ(unrolling_factor, 2u); - LoopAnalysisInfo loop_analysis_info(loop_info); - LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info); + // Perform unrolling. + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + PeelUnrollSimpleHelper helper(loop_info); + helper.DoUnrolling(); - // Check "IsLoopClonable" last as it can be time-consuming. - if (loop_analysis_info.HasInstructionsPreventingScalarUnrolling() || - arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) || - (loop_analysis_info.GetNumberOfExits() > 1) || - !PeelUnrollHelper::IsLoopClonable(loop_info)) { - return false; + // Remove the redundant loop check after unrolling. + HIf* copy_hif = + helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf(); + int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0; + copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u); } + return true; +} - // TODO: support other unrolling factors. - DCHECK_EQ(unrolling_factor, 2u); +bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info, + bool generate_code) { + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + if (!arch_loop_helper_->IsLoopPeelingEnabled()) { + return false; + } - // Perform unrolling. - PeelUnrollSimpleHelper helper(loop_info); - helper.DoUnrolling(); + if (analysis_info->GetNumberOfInvariantExits() == 0) { + return false; + } - // Remove the redundant loop check after unrolling. - HIf* copy_hif = - helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf(); - int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0; - copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u); + if (generate_code) { + // Perform peeling. + PeelUnrollSimpleHelper helper(loop_info); + helper.DoPeeling(); + + // Statically evaluate loop check after peeling for loop invariant condition. + const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap(); + for (auto entry : *hir_map) { + HInstruction* copy = entry.second; + if (copy->IsIf()) { + TryToEvaluateIfCondition(copy->AsIf(), graph_); + } + } + } return true; } -bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* node) { - // Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests) +bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { + // Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests) // as InstructionSet is needed. - if (compiler_driver_ == nullptr) { + if (compiler_options_ == nullptr) { return false; } HLoopInformation* loop_info = node->loop_info; - // Check 'IsLoopClonable' the last as it might be time-consuming. - if (!arch_loop_helper_->IsLoopPeelingEnabled()) { + int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_); + LoopAnalysisInfo analysis_info(loop_info); + LoopAnalysis::CalculateLoopBasicProperties(loop_info, &analysis_info, trip_count); + + if (analysis_info.HasInstructionsPreventingScalarOpts() || + arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&analysis_info)) { return false; } - LoopAnalysisInfo loop_analysis_info(loop_info); - LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info); - - // Check "IsLoopClonable" last as it can be time-consuming. - if (loop_analysis_info.HasInstructionsPreventingScalarPeeling() || - arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) || - !LoopAnalysis::HasLoopAtLeastOneInvariantExit(loop_info) || - !PeelUnrollHelper::IsLoopClonable(loop_info)) { + if (!TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) && + !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) { return false; } - // Perform peeling. - PeelUnrollSimpleHelper helper(loop_info); - helper.DoPeeling(); - - const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap(); - for (auto entry : *hir_map) { - HInstruction* copy = entry.second; - if (copy->IsIf()) { - TryToEvaluateIfCondition(copy->AsIf(), graph_); - } + // Run 'IsLoopClonable' the last as it might be time-consuming. + if (!PeelUnrollHelper::IsLoopClonable(loop_info)) { + return false; } - return true; + return TryPeelingForLoopInvariantExitsElimination(&analysis_info) || + TryUnrollingForBranchPenaltyReduction(&analysis_info); } // @@ -1076,7 +1078,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, vector_index_, ptc, graph_->GetConstant(induc_type, 1), - kNoUnrollingFactor); + LoopAnalysisInfo::kNoUnrollingFactor); } // Generate vector loop, possibly further unrolled: @@ -1103,7 +1105,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, vector_index_, stc, graph_->GetConstant(induc_type, 1), - kNoUnrollingFactor); + LoopAnalysisInfo::kNoUnrollingFactor); } // Link reductions to their final uses. @@ -1459,7 +1461,7 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, } uint32_t HLoopOptimization::GetVectorSizeInBytes() { - switch (compiler_driver_->GetInstructionSet()) { + switch (compiler_options_->GetInstructionSet()) { case InstructionSet::kArm: case InstructionSet::kThumb2: return 8; // 64-bit SIMD @@ -1469,8 +1471,8 @@ uint32_t HLoopOptimization::GetVectorSizeInBytes() { } bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrictions) { - const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures(); - switch (compiler_driver_->GetInstructionSet()) { + const InstructionSetFeatures* features = compiler_options_->GetInstructionSetFeatures(); + switch (compiler_options_->GetInstructionSet()) { case InstructionSet::kArm: case InstructionSet::kThumb2: // Allow vectorization for all ARM devices, because Android assumes that diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 191a93da26..bc4792458b 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -27,7 +27,7 @@ namespace art { -class CompilerDriver; +class CompilerOptions; class ArchNoOptsLoopHelper; /** @@ -38,7 +38,7 @@ class ArchNoOptsLoopHelper; class HLoopOptimization : public HOptimization { public: HLoopOptimization(HGraph* graph, - CompilerDriver* compiler_driver, + const CompilerOptions* compiler_options, HInductionVarAnalysis* induction_analysis, OptimizingCompilerStats* stats, const char* name = kLoopOptimizationPassName); @@ -144,12 +144,19 @@ class HLoopOptimization : public HOptimization { bool OptimizeInnerLoop(LoopNode* node); // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling - // opportunities. Returns whether transformation happened. - bool TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node); + // opportunities. Returns whether transformation happened. 'generate_code' determines whether the + // optimization should be actually applied. + bool TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info, + bool generate_code = true); // Tries to apply loop peeling for loop invariant exits elimination. Returns whether - // transformation happened. - bool TryPeelingForLoopInvariantExitsElimination(LoopNode* loop_node); + // transformation happened. 'generate_code' determines whether the optimization should be + // actually applied. + bool TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info, + bool generate_code = true); + + // Tries to apply scalar loop peeling and unrolling. + bool TryPeelingAndUnrolling(LoopNode* node); // // Vectorization analysis and synthesis. @@ -243,8 +250,8 @@ class HLoopOptimization : public HOptimization { void RemoveDeadInstructions(const HInstructionList& list); bool CanRemoveCycle(); // Whether the current 'iset_' is removable. - // Compiler driver (to query ISA features). - const CompilerDriver* compiler_driver_; + // Compiler options (to query ISA features). + const CompilerOptions* compiler_options_; // Range information based on prior induction variable analysis. InductionVarRange induction_range_; diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc index c21bd65d97..c7cc661303 100644 --- a/compiler/optimizing/loop_optimization_test.cc +++ b/compiler/optimizing/loop_optimization_test.cc @@ -29,7 +29,8 @@ class LoopOptimizationTest : public OptimizingUnitTest { LoopOptimizationTest() : graph_(CreateGraph()), iva_(new (GetAllocator()) HInductionVarAnalysis(graph_)), - loop_opt_(new (GetAllocator()) HLoopOptimization(graph_, nullptr, iva_, nullptr)) { + loop_opt_(new (GetAllocator()) HLoopOptimization( + graph_, /* compiler_options */ nullptr, iva_, /* stats */ nullptr)) { BuildGraph(); } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index ef8a757ad0..d243331dbe 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1305,6 +1305,19 @@ void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* } } +void HInstruction::ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) { + const HUseList<HEnvironment*>& uses = GetEnvUses(); + for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { + HEnvironment* user = it->GetUser(); + size_t index = it->GetIndex(); + // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). + ++it; + if (dominator->StrictlyDominates(user->GetHolder())) { + user->ReplaceInput(replacement, index); + } + } +} + void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) { HUserRecord<HInstruction*> input_use = InputRecordAt(index); if (input_use.GetInstruction() == replacement) { @@ -2786,6 +2799,14 @@ void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) { SetPackedFlag<kFlagReferenceTypeIsExact>(rti.IsExact()); } +bool HBoundType::InstructionDataEquals(const HInstruction* other) const { + const HBoundType* other_bt = other->AsBoundType(); + ScopedObjectAccess soa(Thread::Current()); + return GetUpperBound().IsEqual(other_bt->GetUpperBound()) && + GetUpperCanBeNull() == other_bt->GetUpperCanBeNull() && + CanBeNull() == other_bt->CanBeNull(); +} + void HBoundType::SetUpperBound(const ReferenceTypeInfo& upper_bound, bool can_be_null) { if (kIsDebugBuild) { ScopedObjectAccess soa(Thread::Current()); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 3fd5b6b02d..cd8d07a17a 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1380,6 +1380,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(InvokeStaticOrDirect, Invoke) \ M(InvokeVirtual, Invoke) \ M(InvokePolymorphic, Invoke) \ + M(InvokeCustom, Invoke) \ M(LessThan, Condition) \ M(LessThanOrEqual, Condition) \ M(LoadClass, Instruction) \ @@ -2216,6 +2217,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { void ReplaceWith(HInstruction* instruction); void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement); + void ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement); void ReplaceInput(HInstruction* replacement, size_t index); // This is almost the same as doing `ReplaceWith()`. But in this helper, the @@ -4382,6 +4384,38 @@ class HInvokePolymorphic FINAL : public HInvoke { DEFAULT_COPY_CONSTRUCTOR(InvokePolymorphic); }; +class HInvokeCustom FINAL : public HInvoke { + public: + HInvokeCustom(ArenaAllocator* allocator, + uint32_t number_of_arguments, + uint32_t call_site_index, + DataType::Type return_type, + uint32_t dex_pc) + : HInvoke(kInvokeCustom, + allocator, + number_of_arguments, + /* number_of_other_inputs */ 0u, + return_type, + dex_pc, + /* dex_method_index */ dex::kDexNoIndex, + /* resolved_method */ nullptr, + kStatic), + call_site_index_(call_site_index) { + } + + uint32_t GetCallSiteIndex() const { return call_site_index_; } + + bool IsClonable() const OVERRIDE { return true; } + + DECLARE_INSTRUCTION(InvokeCustom); + + protected: + DEFAULT_COPY_CONSTRUCTOR(InvokeCustom); + + private: + uint32_t call_site_index_; +}; + class HInvokeStaticOrDirect FINAL : public HInvoke { public: // Requirements of this method call regarding the class @@ -5122,6 +5156,7 @@ class HDivZeroCheck FINAL : public HExpression<1> { SetRawInputAt(0, value); } + bool IsClonable() const OVERRIDE { return true; } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { @@ -5573,6 +5608,7 @@ class HTypeConversion FINAL : public HExpression<1> { DataType::Type GetInputType() const { return GetInput()->GetType(); } DataType::Type GetResultType() const { return GetType(); } + bool IsClonable() const OVERRIDE { return true; } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; @@ -6510,9 +6546,9 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) { class HLoadMethodHandle FINAL : public HInstruction { public: HLoadMethodHandle(HCurrentMethod* current_method, - uint16_t method_handle_idx, - const DexFile& dex_file, - uint32_t dex_pc) + uint16_t method_handle_idx, + const DexFile& dex_file, + uint32_t dex_pc) : HInstruction(kLoadMethodHandle, DataType::Type::kReference, SideEffectsForArchRuntimeCalls(), @@ -6608,8 +6644,7 @@ class HClinitCheck FINAL : public HExpression<1> { dex_pc) { SetRawInputAt(0, constant); } - - bool IsClonable() const OVERRIDE { return true; } + // TODO: Make ClinitCheck clonable. bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; @@ -7079,6 +7114,8 @@ class HInstanceOf FINAL : public HTypeCheckInstruction { bitstring_mask, SideEffectsForArchRuntimeCalls(check_kind)) {} + bool IsClonable() const OVERRIDE { return true; } + bool NeedsEnvironment() const OVERRIDE { return CanCallRuntime(GetTypeCheckKind()); } @@ -7109,6 +7146,7 @@ class HBoundType FINAL : public HExpression<1> { SetRawInputAt(0, input); } + bool InstructionDataEquals(const HInstruction* other) const OVERRIDE; bool IsClonable() const OVERRIDE { return true; } // {Get,Set}Upper* should only be used in reference type propagation. @@ -7167,6 +7205,7 @@ class HCheckCast FINAL : public HTypeCheckInstruction { bitstring_mask, SideEffects::CanTriggerGC()) {} + bool IsClonable() const OVERRIDE { return true; } bool NeedsEnvironment() const OVERRIDE { // Instruction may throw a CheckCastError. return true; diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index c5e9a8d036..b4f9993ad6 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -958,6 +958,10 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { SetRawInputAt(2, mul_right); } + static constexpr int kInputAccumulatorIndex = 0; + static constexpr int kInputMulLeftIndex = 1; + static constexpr int kInputMulRightIndex = 2; + bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index d37c43db81..3c803ab627 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -28,6 +28,7 @@ #endif #ifdef ART_ENABLE_CODEGEN_x86 #include "pc_relative_fixups_x86.h" +#include "instruction_simplifier_x86.h" #endif #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) #include "x86_memory_gen.h" @@ -40,6 +41,7 @@ #include "constructor_fence_redundancy_elimination.h" #include "dead_code_elimination.h" #include "dex/code_item_accessors-inl.h" +#include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "gvn.h" #include "induction_var_analysis.h" @@ -120,6 +122,8 @@ const char* OptimizationPassName(OptimizationPass pass) { #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) case OptimizationPass::kX86MemoryOperandGeneration: return x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName; + case OptimizationPass::kInstructionSimplifierX86: + return x86::InstructionSimplifierX86::kInstructionSimplifierX86PassName; #endif case OptimizationPass::kNone: LOG(FATAL) << "kNone does not represent an actual pass"; @@ -162,6 +166,7 @@ OptimizationPass OptimizationPassByName(const std::string& pass_name) { #ifdef ART_ENABLE_CODEGEN_x86 X(OptimizationPass::kPcRelativeFixupsX86); X(OptimizationPass::kX86MemoryOperandGeneration); + X(OptimizationPass::kInstructionSimplifierX86); #endif LOG(FATAL) << "Cannot find optimization " << pass_name; UNREACHABLE(); @@ -224,7 +229,7 @@ ArenaVector<HOptimization*> ConstructOptimizations( case OptimizationPass::kLoopOptimization: CHECK(most_recent_induction != nullptr); opt = new (allocator) HLoopOptimization( - graph, driver, most_recent_induction, stats, pass_name); + graph, &codegen->GetCompilerOptions(), most_recent_induction, stats, pass_name); break; case OptimizationPass::kBoundsCheckElimination: CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr); @@ -264,13 +269,13 @@ ArenaVector<HOptimization*> ConstructOptimizations( break; } case OptimizationPass::kSharpening: - opt = new (allocator) HSharpening(graph, codegen, driver, pass_name); + opt = new (allocator) HSharpening(graph, codegen, pass_name); break; case OptimizationPass::kSelectGenerator: opt = new (allocator) HSelectGenerator(graph, handles, stats, pass_name); break; case OptimizationPass::kInstructionSimplifier: - opt = new (allocator) InstructionSimplifier(graph, codegen, driver, stats, pass_name); + opt = new (allocator) InstructionSimplifier(graph, codegen, stats, pass_name); break; case OptimizationPass::kIntrinsicsRecognizer: opt = new (allocator) IntrinsicsRecognizer(graph, stats, pass_name); @@ -286,7 +291,7 @@ ArenaVector<HOptimization*> ConstructOptimizations( break; case OptimizationPass::kScheduling: opt = new (allocator) HInstructionScheduling( - graph, driver->GetInstructionSet(), codegen, pass_name); + graph, codegen->GetCompilerOptions().GetInstructionSet(), codegen, pass_name); break; // // Arch-specific passes. @@ -322,6 +327,10 @@ ArenaVector<HOptimization*> ConstructOptimizations( DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; opt = new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats); break; + case OptimizationPass::kInstructionSimplifierX86: + DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; + opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats); + break; #endif case OptimizationPass::kNone: LOG(FATAL) << "kNone does not represent an actual pass"; diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index 88b283cebf..a9fafa0864 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -101,6 +101,7 @@ enum class OptimizationPass { #endif #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) kX86MemoryOperandGeneration, + kInstructionSimplifierX86, #endif kNone, kLast = kNone diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index 2e189fdd14..1c1cf28294 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -47,25 +47,20 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { static constexpr bool kGenerateExpected = false; OptimizingCFITest() - : pool_and_allocator_(), - opts_(), - isa_features_(), - graph_(nullptr), + : graph_(nullptr), code_gen_(), blocks_(GetAllocator()->Adapter()) {} - ArenaAllocator* GetAllocator() { return pool_and_allocator_.GetAllocator(); } - void SetUpFrame(InstructionSet isa) { + OverrideInstructionSetFeatures(isa, "default"); + // Ensure that slow-debug is off, so that there is no unexpected read-barrier check emitted. SetRuntimeDebugFlagsEnabled(false); // Setup simple context. - std::string error; - isa_features_ = InstructionSetFeatures::FromVariant(isa, "default", &error); graph_ = CreateGraph(); // Generate simple frame with some spills. - code_gen_ = CodeGenerator::Create(graph_, isa, *isa_features_, opts_); + code_gen_ = CodeGenerator::Create(graph_, *compiler_options_); code_gen_->GetAssembler()->cfi().SetEnabled(true); code_gen_->InitializeCodeGenerationData(); const int frame_size = 64; @@ -148,9 +143,6 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); }; - ArenaPoolAndAllocator pool_and_allocator_; - CompilerOptions opts_; - std::unique_ptr<const InstructionSetFeatures> isa_features_; HGraph* graph_; std::unique_ptr<CodeGenerator> code_gen_; ArenaVector<HBasicBlock*> blocks_; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 79ac6b9b9d..5352f26e46 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -287,7 +287,7 @@ class OptimizingCompiler FINAL : public Compiler { uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize( - InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet()))); + InstructionSetPointerSize(GetCompilerDriver()->GetCompilerOptions().GetInstructionSet()))); } void Init() OVERRIDE; @@ -460,7 +460,7 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, VariableSizedHandleScope* handles) const { - switch (GetCompilerDriver()->GetInstructionSet()) { + switch (codegen->GetCompilerOptions().GetInstructionSet()) { #if defined(ART_ENABLE_CODEGEN_arm) case InstructionSet::kThumb2: case InstructionSet::kArm: { @@ -530,7 +530,8 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, OptDef(OptimizationPass::kSideEffectsAnalysis), OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), OptDef(OptimizationPass::kPcRelativeFixupsX86), - OptDef(OptimizationPass::kX86MemoryOperandGeneration) + OptDef(OptimizationPass::kX86MemoryOperandGeneration), + OptDef(OptimizationPass::kInstructionSimplifierX86) }; return RunOptimizations(graph, codegen, @@ -545,7 +546,8 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, OptimizationDef x86_64_optimizations[] = { OptDef(OptimizationPass::kSideEffectsAnalysis), OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), - OptDef(OptimizationPass::kX86MemoryOperandGeneration) + OptDef(OptimizationPass::kX86MemoryOperandGeneration), + OptDef(OptimizationPass::kInstructionSimplifierX86) }; return RunOptimizations(graph, codegen, @@ -758,7 +760,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, VariableSizedHandleScope* handles) const { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kAttemptBytecodeCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); - InstructionSet instruction_set = compiler_driver->GetInstructionSet(); + const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions(); + InstructionSet instruction_set = compiler_options.GetInstructionSet(); const DexFile& dex_file = *dex_compilation_unit.GetDexFile(); uint32_t method_idx = dex_compilation_unit.GetDexMethodIndex(); const DexFile::CodeItem* code_item = dex_compilation_unit.GetCodeItem(); @@ -782,7 +785,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, // Implementation of the space filter: do not compile a code item whose size in // code units is bigger than 128. static constexpr size_t kSpaceFilterOptimizingThreshold = 128; - const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions(); if ((compiler_options.GetCompilerFilter() == CompilerFilter::kSpace) && (CodeItemInstructionAccessor(dex_file, code_item).InsnsSizeInCodeUnits() > kSpaceFilterOptimizingThreshold)) { @@ -796,7 +798,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, arena_stack, dex_file, method_idx, - compiler_driver->GetInstructionSet(), + compiler_options.GetInstructionSet(), kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable(), osr); @@ -813,9 +815,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, std::unique_ptr<CodeGenerator> codegen( CodeGenerator::Create(graph, - instruction_set, - *compiler_driver->GetInstructionSetFeatures(), - compiler_driver->GetCompilerOptions(), + compiler_options, compilation_stats_.get())); if (codegen.get() == nullptr) { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledNoCodegen); @@ -848,23 +848,23 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, case kAnalysisSkipped: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledSkipped); - } break; + } case kAnalysisInvalidBytecode: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledInvalidBytecode); - } break; + } case kAnalysisFailThrowCatchLoop: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledThrowCatchLoop); - } break; + } case kAnalysisFailAmbiguousArrayOp: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledAmbiguousArrayOp); - } break; + } case kAnalysisSuccess: UNREACHABLE(); } @@ -903,7 +903,8 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( VariableSizedHandleScope* handles) const { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kAttemptIntrinsicCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); - InstructionSet instruction_set = compiler_driver->GetInstructionSet(); + const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions(); + InstructionSet instruction_set = compiler_options.GetInstructionSet(); const DexFile& dex_file = *dex_compilation_unit.GetDexFile(); uint32_t method_idx = dex_compilation_unit.GetDexMethodIndex(); @@ -921,7 +922,7 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( arena_stack, dex_file, method_idx, - compiler_driver->GetInstructionSet(), + compiler_driver->GetCompilerOptions().GetInstructionSet(), kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable(), /* osr */ false); @@ -932,15 +933,12 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( std::unique_ptr<CodeGenerator> codegen( CodeGenerator::Create(graph, - instruction_set, - *compiler_driver->GetInstructionSetFeatures(), - compiler_driver->GetCompilerOptions(), + compiler_options, compilation_stats_.get())); if (codegen.get() == nullptr) { return nullptr; } - codegen->GetAssembler()->cfi().SetEnabled( - compiler_driver->GetCompilerOptions().GenerateAnyDebugInfo()); + codegen->GetAssembler()->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo()); PassObserver pass_observer(graph, codegen.get(), @@ -1095,7 +1093,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, if (kIsDebugBuild && IsCompilingWithCoreImage() && - IsInstructionSetSupported(compiler_driver->GetInstructionSet())) { + IsInstructionSetSupported(compiler_driver->GetCompilerOptions().GetInstructionSet())) { // For testing purposes, we put a special marker on method names // that should be compiled with this compiler (when the // instruction set is supported). This makes sure we're not @@ -1112,7 +1110,8 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const { - if (GetCompilerDriver()->GetCompilerOptions().IsBootImage()) { + const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); + if (compiler_options.IsBootImage()) { ScopedObjectAccess soa(Thread::Current()); Runtime* runtime = Runtime::Current(); ArtMethod* method = runtime->GetClassLinker()->LookupResolvedMethod( @@ -1154,7 +1153,7 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, } JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod( - GetCompilerDriver(), access_flags, method_idx, dex_file); + compiler_options, access_flags, method_idx, dex_file); MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledNativeStub); return CompiledMethod::SwapAllocCompiledMethod( GetCompilerDriver(), @@ -1218,8 +1217,9 @@ bool OptimizingCompiler::JitCompile(Thread* self, ArenaAllocator allocator(runtime->GetJitArenaPool()); if (UNLIKELY(method->IsNative())) { + const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod( - GetCompilerDriver(), access_flags, method_idx, *dex_file); + compiler_options, access_flags, method_idx, *dex_file); ScopedNullHandle<mirror::ObjectArray<mirror::Object>> roots; ArenaSet<ArtMethod*, std::less<ArtMethod*>> cha_single_implementation_list( allocator.Adapter(kArenaAllocCHA)); @@ -1243,7 +1243,6 @@ bool OptimizingCompiler::JitCompile(Thread* self, return false; } - const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); if (compiler_options.GenerateAnyDebugInfo()) { const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); @@ -1420,8 +1419,8 @@ void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method, debug::MethodDe // Create entry for the single method that we just compiled. std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( - GetCompilerDriver()->GetInstructionSet(), - GetCompilerDriver()->GetInstructionSetFeatures(), + compiler_options.GetInstructionSet(), + compiler_options.GetInstructionSetFeatures(), mini_debug_info, ArrayRef<const debug::MethodDebugInfo>(&info, 1)); MutexLock mu(Thread::Current(), *Locks::native_debug_interface_lock_); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index f246228074..9a26f2f6c4 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -50,7 +50,6 @@ enum class MethodCompilationStat { kNotCompiledThrowCatchLoop, kNotCompiledAmbiguousArrayOp, kNotCompiledHugeMethod, - kNotCompiledIrreducibleAndStringInit, kNotCompiledLargeMethodNoBranches, kNotCompiledMalformedOpcode, kNotCompiledNoCodegen, diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index a9bc5664c0..f903f82d50 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -29,6 +29,7 @@ #include "dex/dex_instruction.h" #include "dex/standard_dex_file.h" #include "driver/dex_compilation_unit.h" +#include "graph_checker.h" #include "handle_scope-inl.h" #include "mirror/class_loader.h" #include "mirror/dex_cache.h" @@ -187,6 +188,77 @@ class OptimizingUnitTestHelper { class OptimizingUnitTest : public CommonCompilerTest, public OptimizingUnitTestHelper {}; +// OptimizingUnitTest with some handy functions to ease the graph creation. +class ImprovedOptimizingUnitTest : public OptimizingUnitTest { + public: + ImprovedOptimizingUnitTest() : graph_(CreateGraph()), + entry_block_(nullptr), + return_block_(nullptr), + exit_block_(nullptr), + parameter_(nullptr) {} + + virtual ~ImprovedOptimizingUnitTest() {} + + void InitGraph() { + entry_block_ = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(entry_block_); + graph_->SetEntryBlock(entry_block_); + + return_block_ = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(return_block_); + + exit_block_ = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(exit_block_); + graph_->SetExitBlock(exit_block_); + + entry_block_->AddSuccessor(return_block_); + return_block_->AddSuccessor(exit_block_); + + parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kInt32); + entry_block_->AddInstruction(parameter_); + return_block_->AddInstruction(new (GetAllocator()) HReturnVoid()); + exit_block_->AddInstruction(new (GetAllocator()) HExit()); + } + + bool CheckGraph() { + GraphChecker checker(graph_); + checker.Run(); + if (!checker.IsValid()) { + for (const std::string& error : checker.GetErrors()) { + std::cout << error << std::endl; + } + return false; + } + return true; + } + + HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction, + ArenaVector<HInstruction*>* current_locals) { + HEnvironment* environment = new (GetAllocator()) HEnvironment( + (GetAllocator()), + current_locals->size(), + graph_->GetArtMethod(), + instruction->GetDexPc(), + instruction); + + environment->CopyFrom(ArrayRef<HInstruction* const>(*current_locals)); + instruction->SetRawEnvironment(environment); + return environment; + } + + protected: + HGraph* graph_; + + HBasicBlock* entry_block_; + HBasicBlock* return_block_; + HBasicBlock* exit_block_; + + HInstruction* parameter_; +}; + // Naive string diff data type. typedef std::list<std::pair<std::string, std::string>> diff_t; diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 9049457da5..05ec765b19 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -193,18 +193,19 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } void HandleInvoke(HInvoke* invoke) { - // If this is an invoke-static/-direct with PC-relative dex cache array - // addressing, we need the PC-relative address base. HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); - // We can't add a pointer to the constant area if we already have a current - // method pointer. This may arise when sharpening doesn't remove the current - // method pointer from the invoke. - if (invoke_static_or_direct != nullptr && - invoke_static_or_direct->HasCurrentMethodInput()) { + + // We can't add the method address if we already have a current method pointer. + // This may arise when sharpening doesn't remove the current method pointer from the invoke. + if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasCurrentMethodInput()) { + // Note: This happens only for recursive calls (including compiling an intrinsic + // by faking a call to itself; we use kRuntimeCall for this case). DCHECK(!invoke_static_or_direct->HasPcRelativeMethodLoadKind()); return; } + // If this is an invoke-static/-direct with PC-relative addressing (within boot image + // or using .bss or .data.bimg.rel.ro), we need the PC-relative address base. bool base_added = false; if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeMethodLoadKind() && @@ -224,7 +225,6 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } - // These intrinsics need the constant area. switch (invoke->GetIntrinsic()) { case Intrinsics::kMathAbsDouble: case Intrinsics::kMathAbsFloat: @@ -235,7 +235,15 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { LOG(FATAL) << "Unreachable min/max/abs: intrinsics should have been lowered " "to IR nodes by instruction simplifier"; UNREACHABLE(); + case Intrinsics::kIntegerValueOf: + // This intrinsic can be call free if it loads the address of the boot image object. + // If we're compiling PIC, we need the address base for loading from .data.bimg.rel.ro. + if (!codegen_->GetCompilerOptions().GetCompilePic()) { + break; + } + FALLTHROUGH_INTENDED; case Intrinsics::kMathRoundFloat: + // This intrinsic needs the constant area. if (!base_added) { DCHECK(invoke_static_or_direct != nullptr); DCHECK(!invoke_static_or_direct->HasCurrentMethodInput()); diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index fa7ad82316..42e6498148 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -1183,7 +1183,7 @@ static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNo void ColoringIteration::BuildInterferenceGraph( const ScopedArenaVector<LiveInterval*>& intervals, const ScopedArenaVector<InterferenceNode*>& physical_nodes) { - DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty()); + DCHECK(interval_node_map_.empty() && prunable_nodes_.empty()); // Build the interference graph efficiently by ordering range endpoints // by position and doing a linear sweep to find interferences. (That is, we // jump from endpoint to endpoint, maintaining a set of intervals live at each @@ -1208,7 +1208,7 @@ void ColoringIteration::BuildInterferenceGraph( if (range != nullptr) { InterferenceNode* node = new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_); - interval_node_map_.Insert(std::make_pair(sibling, node)); + interval_node_map_.insert(std::make_pair(sibling, node)); if (sibling->HasRegister()) { // Fixed nodes should alias the canonical node for the corresponding register. @@ -1303,7 +1303,7 @@ void ColoringIteration::FindCoalesceOpportunities() { // Coalesce siblings. LiveInterval* next_sibling = interval->GetNextSibling(); if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) { - auto it = interval_node_map_.Find(next_sibling); + auto it = interval_node_map_.find(next_sibling); if (it != interval_node_map_.end()) { InterferenceNode* sibling_node = it->second; CreateCoalesceOpportunity(node, @@ -1318,7 +1318,7 @@ void ColoringIteration::FindCoalesceOpportunities() { if (parent->HasRegister() && parent->GetNextSibling() == interval && parent->GetEnd() == interval->GetStart()) { - auto it = interval_node_map_.Find(parent); + auto it = interval_node_map_.find(parent); if (it != interval_node_map_.end()) { InterferenceNode* parent_node = it->second; CreateCoalesceOpportunity(node, @@ -1341,7 +1341,7 @@ void ColoringIteration::FindCoalesceOpportunities() { size_t position = predecessor->GetLifetimeEnd() - 1; LiveInterval* existing = interval->GetParent()->GetSiblingAt(position); if (existing != nullptr) { - auto it = interval_node_map_.Find(existing); + auto it = interval_node_map_.find(existing); if (it != interval_node_map_.end()) { InterferenceNode* existing_node = it->second; CreateCoalesceOpportunity(node, @@ -1364,7 +1364,7 @@ void ColoringIteration::FindCoalesceOpportunities() { size_t position = predecessors[i]->GetLifetimeEnd() - 1; LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position); - auto it = interval_node_map_.Find(input_interval); + auto it = interval_node_map_.find(input_interval); if (it != interval_node_map_.end()) { InterferenceNode* input_node = it->second; CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position); @@ -1380,7 +1380,7 @@ void ColoringIteration::FindCoalesceOpportunities() { = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1); // TODO: Could we consider lifetime holes here? if (input_interval->GetEnd() == interval->GetStart()) { - auto it = interval_node_map_.Find(input_interval); + auto it = interval_node_map_.find(input_interval); if (it != interval_node_map_.end()) { InterferenceNode* input_node = it->second; CreateCoalesceOpportunity(node, @@ -1407,7 +1407,7 @@ void ColoringIteration::FindCoalesceOpportunities() { LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point); if (input_interval != nullptr && input_interval->HasHighInterval() == interval->HasHighInterval()) { - auto it = interval_node_map_.Find(input_interval); + auto it = interval_node_map_.find(input_interval); if (it != interval_node_map_.end()) { InterferenceNode* input_node = it->second; CreateCoalesceOpportunity(node, diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index a70b0664dc..7144775c2b 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -40,6 +40,12 @@ using Strategy = RegisterAllocator::Strategy; class RegisterAllocatorTest : public OptimizingUnitTest { protected: + void SetUp() OVERRIDE { + // This test is using the x86 ISA. + OverrideInstructionSetFeatures(InstructionSet::kX86, "default"); + OptimizingUnitTest::SetUp(); + } + // These functions need to access private variables of LocationSummary, so we declare it // as a member of RegisterAllocatorTest, which we make a friend class. void SameAsFirstInputHint(Strategy strategy); @@ -81,9 +87,7 @@ TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\ bool RegisterAllocatorTest::Check(const std::vector<uint16_t>& data, Strategy strategy) { HGraph* graph = CreateCFG(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); std::unique_ptr<RegisterAllocator> register_allocator = @@ -98,9 +102,7 @@ bool RegisterAllocatorTest::Check(const std::vector<uint16_t>& data, Strategy st */ TEST_F(RegisterAllocatorTest, ValidateIntervals) { HGraph* graph = CreateGraph(); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); ScopedArenaVector<LiveInterval*> intervals(GetScopedAllocator()->Adapter()); // Test with two intervals of the same range. @@ -324,9 +326,7 @@ void RegisterAllocatorTest::Loop3(Strategy strategy) { Instruction::GOTO | 0xF900); HGraph* graph = CreateCFG(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); std::unique_ptr<RegisterAllocator> register_allocator = @@ -359,9 +359,7 @@ TEST_F(RegisterAllocatorTest, FirstRegisterUse) { Instruction::RETURN_VOID); HGraph* graph = CreateCFG(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -412,9 +410,7 @@ void RegisterAllocatorTest::DeadPhi(Strategy strategy) { HGraph* graph = CreateCFG(data); SsaDeadPhiElimination(graph).Run(); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); std::unique_ptr<RegisterAllocator> register_allocator = @@ -438,9 +434,7 @@ TEST_F(RegisterAllocatorTest, FreeUntil) { HGraph* graph = CreateCFG(data); SsaDeadPhiElimination(graph).Run(); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); RegisterAllocatorLinearScan register_allocator(GetScopedAllocator(), &codegen, liveness); @@ -566,9 +560,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) { { HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -584,9 +576,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) { { HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -604,9 +594,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) { { HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -624,9 +612,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) { { HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -689,9 +675,7 @@ void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) { { HGraph* graph = BuildFieldReturn(&field, &ret); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -705,9 +689,7 @@ void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) { { HGraph* graph = BuildFieldReturn(&field, &ret); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -761,9 +743,7 @@ void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) { { HGraph* graph = BuildTwoSubs(&first_sub, &second_sub); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -778,9 +758,7 @@ void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) { { HGraph* graph = BuildTwoSubs(&first_sub, &second_sub); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -834,9 +812,7 @@ HGraph* RegisterAllocatorTest::BuildDiv(HInstruction** div) { void RegisterAllocatorTest::ExpectedExactInRegisterAndSameOutputHint(Strategy strategy) { HInstruction *div; HGraph* graph = BuildDiv(&div); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -934,9 +910,7 @@ TEST_F(RegisterAllocatorTest, SpillInactive) { new (GetAllocator()) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); // Populate the instructions in the liveness object, to please the register allocator. for (size_t i = 0; i < 32; ++i) { diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index 8e98f192d8..c7683e04a7 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -262,14 +262,14 @@ class SchedulingGraph : public ValueObject { std::unique_ptr<SchedulingNode> node( new (allocator_) SchedulingNode(instr, allocator_, is_scheduling_barrier)); SchedulingNode* result = node.get(); - nodes_map_.Insert(std::make_pair(instr, std::move(node))); + nodes_map_.insert(std::make_pair(instr, std::move(node))); contains_scheduling_barrier_ |= is_scheduling_barrier; AddDependencies(instr, is_scheduling_barrier); return result; } void Clear() { - nodes_map_.Clear(); + nodes_map_.clear(); contains_scheduling_barrier_ = false; } @@ -278,7 +278,7 @@ class SchedulingGraph : public ValueObject { } SchedulingNode* GetNode(const HInstruction* instr) const { - auto it = nodes_map_.Find(instr); + auto it = nodes_map_.find(instr); if (it == nodes_map_.end()) { return nullptr; } else { @@ -294,7 +294,7 @@ class SchedulingGraph : public ValueObject { bool HasImmediateOtherDependency(const HInstruction* node, const HInstruction* other) const; size_t Size() const { - return nodes_map_.Size(); + return nodes_map_.size(); } // Dump the scheduling graph, in dot file format, appending it to the file diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc index d4cae72c7e..7079e07ae1 100644 --- a/compiler/optimizing/scheduler_test.cc +++ b/compiler/optimizing/scheduler_test.cc @@ -192,7 +192,9 @@ class SchedulerTest : public OptimizingUnitTest { HInstructionScheduling scheduling(graph, target_config.GetInstructionSet()); scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); RunCode(target_config, + *compiler_options_, graph, [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); }, has_result, expected); diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc index 0d0f7cc748..dcc7f77fc2 100644 --- a/compiler/optimizing/select_generator.cc +++ b/compiler/optimizing/select_generator.cc @@ -45,7 +45,9 @@ static bool IsSimpleBlock(HBasicBlock* block) { HInstruction* instruction = it.Current(); if (instruction->IsControlFlow()) { return instruction->IsGoto() || instruction->IsReturn(); - } else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) { + } else if (instruction->CanBeMoved() && + !instruction->HasSideEffects() && + !instruction->CanThrow()) { if (instruction->IsSelect() && instruction->AsSelect()->GetCondition()->GetBlock() == block) { // Count one HCondition and HSelect in the same block as a single instruction. @@ -119,10 +121,14 @@ bool HSelectGenerator::Run() { // TODO(dbrazdil): This puts an instruction between If and its condition. // Implement moving of conditions to first users if possible. while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) { - true_block->GetFirstInstruction()->MoveBefore(if_instruction); + HInstruction* instr = true_block->GetFirstInstruction(); + DCHECK(!instr->CanThrow()); + instr->MoveBefore(if_instruction); } while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) { - false_block->GetFirstInstruction()->MoveBefore(if_instruction); + HInstruction* instr = false_block->GetFirstInstruction(); + DCHECK(!instr->CanThrow()); + instr->MoveBefore(if_instruction); } DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn()); DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn()); diff --git a/compiler/optimizing/select_generator_test.cc b/compiler/optimizing/select_generator_test.cc new file mode 100644 index 0000000000..6e6549737c --- /dev/null +++ b/compiler/optimizing/select_generator_test.cc @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "select_generator.h" + +#include "base/arena_allocator.h" +#include "builder.h" +#include "nodes.h" +#include "optimizing_unit_test.h" +#include "side_effects_analysis.h" + +namespace art { + +class SelectGeneratorTest : public ImprovedOptimizingUnitTest { + public: + void ConstructBasicGraphForSelect(HInstruction* instr) { + HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* then_block = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* else_block = new (GetAllocator()) HBasicBlock(graph_); + + graph_->AddBlock(if_block); + graph_->AddBlock(then_block); + graph_->AddBlock(else_block); + + entry_block_->ReplaceSuccessor(return_block_, if_block); + + if_block->AddSuccessor(then_block); + if_block->AddSuccessor(else_block); + then_block->AddSuccessor(return_block_); + else_block->AddSuccessor(return_block_); + + HParameterValue* bool_param = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 1, + DataType::Type::kBool); + entry_block_->AddInstruction(bool_param); + HIntConstant* const1 = graph_->GetIntConstant(1); + + if_block->AddInstruction(new (GetAllocator()) HIf(bool_param)); + + then_block->AddInstruction(instr); + then_block->AddInstruction(new (GetAllocator()) HGoto()); + + else_block->AddInstruction(new (GetAllocator()) HGoto()); + + HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32); + return_block_->AddPhi(phi); + phi->AddInput(instr); + phi->AddInput(const1); + } + + bool CheckGraphAndTrySelectGenerator() { + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + SideEffectsAnalysis side_effects(graph_); + side_effects.Run(); + return HSelectGenerator(graph_, /*handles*/ nullptr, /*stats*/ nullptr).Run(); + } +}; + +// HDivZeroCheck might throw and should not be hoisted from the conditional to an unconditional. +TEST_F(SelectGeneratorTest, testZeroCheck) { + InitGraph(); + HDivZeroCheck* instr = new (GetAllocator()) HDivZeroCheck(parameter_, 0); + ConstructBasicGraphForSelect(instr); + + ArenaVector<HInstruction*> current_locals({parameter_, graph_->GetIntConstant(1)}, + GetAllocator()->Adapter(kArenaAllocInstruction)); + ManuallyBuildEnvFor(instr, ¤t_locals); + + EXPECT_FALSE(CheckGraphAndTrySelectGenerator()); +} + +// Test that SelectGenerator succeeds with HAdd. +TEST_F(SelectGeneratorTest, testAdd) { + InitGraph(); + HAdd* instr = new (GetAllocator()) HAdd(DataType::Type::kInt32, parameter_, parameter_, 0); + ConstructBasicGraphForSelect(instr); + EXPECT_TRUE(CheckGraphAndTrySelectGenerator()); +} + +} // namespace art diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 6541043046..27482ac5bf 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -21,7 +21,6 @@ #include "base/enums.h" #include "class_linker.h" #include "code_generator.h" -#include "driver/compiler_driver.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "gc/heap.h" @@ -42,9 +41,7 @@ bool HSharpening::Run() { for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); if (instruction->IsInvokeStaticOrDirect()) { - SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), - codegen_, - compiler_driver_); + SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), codegen_); } // TODO: Move the sharpening of invoke-virtual/-interface/-super from HGraphBuilder // here. Rewrite it to avoid the CompilerDriver's reliance on verifier data @@ -70,21 +67,17 @@ static bool AOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& options) return IsInBootImage(method) && !options.GetCompilePic(); } -static bool BootImageAOTCanEmbedMethod(ArtMethod* method, CompilerDriver* compiler_driver) { - DCHECK(compiler_driver->GetCompilerOptions().IsBootImage()); - if (!compiler_driver->GetSupportBootImageFixup()) { - return false; - } +static bool BootImageAOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& compiler_options) { + DCHECK(compiler_options.IsBootImage()); ScopedObjectAccess soa(Thread::Current()); ObjPtr<mirror::Class> klass = method->GetDeclaringClass(); DCHECK(klass != nullptr); const DexFile& dex_file = klass->GetDexFile(); - return compiler_driver->IsImageClass(dex_file.StringByTypeIdx(klass->GetDexTypeIndex())); + return compiler_options.IsImageClass(dex_file.StringByTypeIdx(klass->GetDexTypeIndex())); } void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, - CodeGenerator* codegen, - CompilerDriver* compiler_driver) { + CodeGenerator* codegen) { if (invoke->IsStringInit()) { // Not using the dex cache arrays. But we could still try to use a better dispatch... // TODO: Use direct_method and direct_code for the appropriate StringFactory method. @@ -111,21 +104,29 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, // We don't optimize for debuggable as it would prevent us from obsoleting the method in some // situations. + const CompilerOptions& compiler_options = codegen->GetCompilerOptions(); if (callee == codegen->GetGraph()->GetArtMethod() && !codegen->GetGraph()->IsDebuggable()) { // Recursive call. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf; + } else if (compiler_options.IsBootImage()) { + if (!compiler_options.GetCompilePic()) { + // Test configuration, do not sharpen. + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall; + } else if (BootImageAOTCanEmbedMethod(callee, compiler_options)) { + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative; + } else { + // Use PC-relative access to the .bss methods array. + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBssEntry; + } + code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } else if (Runtime::Current()->UseJitCompilation() || - AOTCanEmbedMethod(callee, codegen->GetCompilerOptions())) { + AOTCanEmbedMethod(callee, compiler_options)) { // JIT or on-device AOT compilation referencing a boot image method. // Use the method address directly. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress; method_load_data = reinterpret_cast<uintptr_t>(callee); code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; - } else if (codegen->GetCompilerOptions().IsBootImage() && - BootImageAOTCanEmbedMethod(callee, compiler_driver)) { - method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative; - code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } else if (IsInBootImage(callee)) { // Use PC-relative access to the .data.bimg.rel.ro methods array. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo; @@ -153,7 +154,6 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( HLoadClass* load_class, CodeGenerator* codegen, - CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit) { Handle<mirror::Class> klass = load_class->GetClass(); DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kRuntimeCall || @@ -177,26 +177,27 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( bool is_in_boot_image = false; HLoadClass::LoadKind desired_load_kind = HLoadClass::LoadKind::kInvalid; Runtime* runtime = Runtime::Current(); - if (codegen->GetCompilerOptions().IsBootImage()) { + const CompilerOptions& compiler_options = codegen->GetCompilerOptions(); + if (compiler_options.IsBootImage()) { // Compiling boot image. Check if the class is a boot image class. DCHECK(!runtime->UseJitCompilation()); - if (!compiler_driver->GetSupportBootImageFixup()) { - // compiler_driver_test. Do not sharpen. + if (!compiler_options.GetCompilePic()) { + // Test configuration, do not sharpen. desired_load_kind = HLoadClass::LoadKind::kRuntimeCall; } else if ((klass != nullptr) && - compiler_driver->IsImageClass(dex_file.StringByTypeIdx(type_index))) { + compiler_options.IsImageClass(dex_file.StringByTypeIdx(type_index))) { is_in_boot_image = true; desired_load_kind = HLoadClass::LoadKind::kBootImageLinkTimePcRelative; } else { // Not a boot image class. - DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file)); + DCHECK(ContainsElement(compiler_options.GetDexFilesForOatFile(), &dex_file)); desired_load_kind = HLoadClass::LoadKind::kBssEntry; } } else { is_in_boot_image = (klass != nullptr) && runtime->GetHeap()->ObjectIsInBootImageSpace(klass.Get()); if (runtime->UseJitCompilation()) { - DCHECK(!codegen->GetCompilerOptions().GetCompilePic()); + DCHECK(!compiler_options.GetCompilePic()); if (is_in_boot_image) { // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787 desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; @@ -241,9 +242,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( return load_kind; } -static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass, - CodeGenerator* codegen, - CompilerDriver* compiler_driver) +static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass, CodeGenerator* codegen) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(!klass->IsProxyClass()); DCHECK(!klass->IsArrayClass()); @@ -252,7 +251,7 @@ static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass, // If we're JITting, try to assign a type check bitstring (fall through). } else if (codegen->GetCompilerOptions().IsBootImage()) { const char* descriptor = klass->GetDexFile().StringByTypeIdx(klass->GetDexTypeIndex()); - if (!compiler_driver->IsImageClass(descriptor)) { + if (!codegen->GetCompilerOptions().IsImageClass(descriptor)) { return false; } // If the target is a boot image class, try to assign a type check bitstring (fall through). @@ -265,7 +264,7 @@ static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass, // Try to assign a type check bitstring. MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); - if ((false) && // FIXME: Inliner does not respect compiler_driver->IsClassToCompile() + if ((false) && // FIXME: Inliner does not respect CompilerDriver::IsClassToCompile() // and we're hitting an unassigned bitstring in dex2oat_image_test. b/26687569 kIsDebugBuild && codegen->GetCompilerOptions().IsBootImage() && @@ -281,7 +280,6 @@ static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass, TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, CodeGenerator* codegen, - CompilerDriver* compiler_driver, bool needs_access_check) { if (klass == nullptr) { return TypeCheckKind::kUnresolvedCheck; @@ -299,7 +297,7 @@ TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, return TypeCheckKind::kExactCheck; } else if (kBitstringSubtypeCheckEnabled && !needs_access_check && - CanUseTypeCheckBitstring(klass, codegen, compiler_driver)) { + CanUseTypeCheckBitstring(klass, codegen)) { // TODO: We should not need the `!needs_access_check` check but getting rid of that // requires rewriting some optimizations in instruction simplifier. return TypeCheckKind::kBitstringCheck; @@ -313,7 +311,6 @@ TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, void HSharpening::ProcessLoadString( HLoadString* load_string, CodeGenerator* codegen, - CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit, VariableSizedHandleScope* handles) { DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall); @@ -332,17 +329,18 @@ void HSharpening::ProcessLoadString( : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); ObjPtr<mirror::String> string = nullptr; - if (codegen->GetCompilerOptions().IsBootImage()) { + const CompilerOptions& compiler_options = codegen->GetCompilerOptions(); + if (compiler_options.IsBootImage()) { // Compiling boot image. Resolve the string and allocate it if needed, to ensure // the string will be added to the boot image. DCHECK(!runtime->UseJitCompilation()); string = class_linker->ResolveString(string_index, dex_cache); CHECK(string != nullptr); - if (compiler_driver->GetSupportBootImageFixup()) { - DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file)); + if (compiler_options.GetCompilePic()) { + DCHECK(ContainsElement(compiler_options.GetDexFilesForOatFile(), &dex_file)); desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative; } else { - // compiler_driver_test. Do not sharpen. + // Test configuration, do not sharpen. desired_load_kind = HLoadString::LoadKind::kRuntimeCall; } } else if (runtime->UseJitCompilation()) { diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index 9ccbcaf220..cbac361891 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -23,7 +23,6 @@ namespace art { class CodeGenerator; -class CompilerDriver; class DexCompilationUnit; // Optimization that tries to improve the way we dispatch methods and access types, @@ -34,45 +33,37 @@ class HSharpening : public HOptimization { public: HSharpening(HGraph* graph, CodeGenerator* codegen, - CompilerDriver* compiler_driver, const char* name = kSharpeningPassName) : HOptimization(graph, name), - codegen_(codegen), - compiler_driver_(compiler_driver) { } + codegen_(codegen) { } bool Run() OVERRIDE; static constexpr const char* kSharpeningPassName = "sharpening"; // Used by Sharpening and InstructionSimplifier. - static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, - CodeGenerator* codegen, - CompilerDriver* compiler_driver); + static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, CodeGenerator* codegen); // Used by the builder and the inliner. static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class, CodeGenerator* codegen, - CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit) REQUIRES_SHARED(Locks::mutator_lock_); // Used by the builder. static TypeCheckKind ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, CodeGenerator* codegen, - CompilerDriver* compiler_driver, bool needs_access_check) REQUIRES_SHARED(Locks::mutator_lock_); // Used by the builder. static void ProcessLoadString(HLoadString* load_string, CodeGenerator* codegen, - CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit, VariableSizedHandleScope* handles); private: CodeGenerator* codegen_; - CompilerDriver* compiler_driver_; }; } // namespace art diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index dd54468217..dda29a1b4b 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -440,6 +440,62 @@ static bool HasAliasInEnvironments(HInstruction* instruction) { return false; } +void SsaBuilder::ReplaceUninitializedStringPhis() { + ScopedArenaHashSet<HInstruction*> seen_instructions( + local_allocator_->Adapter(kArenaAllocGraphBuilder)); + ScopedArenaVector<HInstruction*> worklist(local_allocator_->Adapter(kArenaAllocGraphBuilder)); + + // Iterate over all inputs and uses of the phi, recursively, until all related instructions + // have been visited. + for (const auto& pair : uninitialized_string_phis_) { + HPhi* string_phi = pair.first; + HInvoke* invoke = pair.second; + worklist.push_back(string_phi); + HNewInstance* found_instance = nullptr; + do { + HInstruction* current = worklist.back(); + worklist.pop_back(); + if (seen_instructions.find(current) != seen_instructions.end()) { + continue; + } + seen_instructions.insert(current); + if (current->IsNewInstance()) { + // If it is the first time we see the allocation, replace its uses. We don't register + // it through `RemoveRedundantUninitializedStrings`, as that method makes assumption about + // aliasing and environment uses that don't hold when the string escapes to phis. + // Note that this also means we will keep the (useless) allocation. + if (found_instance == nullptr) { + found_instance = current->AsNewInstance(); + } else { + DCHECK(found_instance == current); + } + } else if (current->IsPhi()) { + // Push all inputs to the worklist. Those should be Phis or NewInstance. + for (HInstruction* input : current->GetInputs()) { + DCHECK(input->IsPhi() || input->IsNewInstance()) << input->DebugName(); + worklist.push_back(input); + } + } else { + // The verifier prevents any other DEX uses of the uninitialized string. + DCHECK(current->IsEqual() || current->IsNotEqual()); + continue; + } + current->ReplaceUsesDominatedBy(invoke, invoke); + current->ReplaceEnvUsesDominatedBy(invoke, invoke); + // Push all users to the worklist. Now that we have replaced + // the uses dominated by the invokes, the remaining users should only + // be Phi, or Equal/NotEqual. + for (const HUseListNode<HInstruction*>& use : current->GetUses()) { + HInstruction* user = use.GetUser(); + DCHECK(user->IsPhi() || user->IsEqual() || user->IsNotEqual()) << user->DebugName(); + worklist.push_back(user); + } + } while (!worklist.empty()); + seen_instructions.clear(); + DCHECK(found_instance != nullptr); + } +} + void SsaBuilder::RemoveRedundantUninitializedStrings() { if (graph_->IsDebuggable()) { // Do not perform the optimization for consistency with the interpreter @@ -488,27 +544,32 @@ void SsaBuilder::RemoveRedundantUninitializedStrings() { GraphAnalysisResult SsaBuilder::BuildSsa() { DCHECK(!graph_->IsInSsaForm()); - // 1) Propagate types of phis. At this point, phis are typed void in the general + // Replace Phis that feed in a String.<init>, as well as their aliases, with + // the actual String allocation invocation. We do this first, as the phis stored in + // the data structure might get removed from the graph in later stages during `BuildSsa`. + ReplaceUninitializedStringPhis(); + + // Propagate types of phis. At this point, phis are typed void in the general // case, or float/double/reference if we created an equivalent phi. So we need // to propagate the types across phis to give them a correct type. If a type // conflict is detected in this stage, the phi is marked dead. RunPrimitiveTypePropagation(); - // 2) Now that the correct primitive types have been assigned, we can get rid + // Now that the correct primitive types have been assigned, we can get rid // of redundant phis. Note that we cannot do this phase before type propagation, // otherwise we could get rid of phi equivalents, whose presence is a requirement // for the type propagation phase. Note that this is to satisfy statement (a) // of the SsaBuilder (see ssa_builder.h). SsaRedundantPhiElimination(graph_).Run(); - // 3) Fix the type for null constants which are part of an equality comparison. + // Fix the type for null constants which are part of an equality comparison. // We need to do this after redundant phi elimination, to ensure the only cases // that we can see are reference comparison against 0. The redundant phi // elimination ensures we do not see a phi taking two 0 constants in a HEqual // or HNotEqual. FixNullConstantType(); - // 4) Compute type of reference type instructions. The pass assumes that + // Compute type of reference type instructions. The pass assumes that // NullConstant has been fixed up. ReferenceTypePropagation(graph_, class_loader_, @@ -516,7 +577,7 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { handles_, /* is_first_run */ true).Run(); - // 5) HInstructionBuilder duplicated ArrayGet instructions with ambiguous type + // HInstructionBuilder duplicated ArrayGet instructions with ambiguous type // (int/float or long/double) and marked ArraySets with ambiguous input type. // Now that RTP computed the type of the array input, the ambiguity can be // resolved and the correct equivalents kept. @@ -524,13 +585,13 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { return kAnalysisFailAmbiguousArrayOp; } - // 6) Mark dead phis. This will mark phis which are not used by instructions + // Mark dead phis. This will mark phis which are not used by instructions // or other live phis. If compiling as debuggable code, phis will also be kept // live if they have an environment use. SsaDeadPhiElimination dead_phi_elimimation(graph_); dead_phi_elimimation.MarkDeadPhis(); - // 7) Make sure environments use the right phi equivalent: a phi marked dead + // Make sure environments use the right phi equivalent: a phi marked dead // can have a phi equivalent that is not dead. In that case we have to replace // it with the live equivalent because deoptimization and try/catch rely on // environments containing values of all live vregs at that point. Note that @@ -539,14 +600,14 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { // environments to just reference one. FixEnvironmentPhis(); - // 8) Now that the right phis are used for the environments, we can eliminate + // Now that the right phis are used for the environments, we can eliminate // phis we do not need. Regardless of the debuggable status, this phase is /// necessary for statement (b) of the SsaBuilder (see ssa_builder.h), as well // as for the code generation, which does not deal with phis of conflicting // input types. dead_phi_elimimation.EliminateDeadPhis(); - // 9) HInstructionBuidler replaced uses of NewInstances of String with the + // HInstructionBuidler replaced uses of NewInstances of String with the // results of their corresponding StringFactory calls. Unless the String // objects are used before they are initialized, they can be replaced with // NullConstant. Note that this optimization is valid only if unsimplified diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 60831a9e6a..765544508e 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -61,7 +61,8 @@ class SsaBuilder : public ValueObject { local_allocator_(local_allocator), ambiguous_agets_(local_allocator->Adapter(kArenaAllocGraphBuilder)), ambiguous_asets_(local_allocator->Adapter(kArenaAllocGraphBuilder)), - uninitialized_strings_(local_allocator->Adapter(kArenaAllocGraphBuilder)) { + uninitialized_strings_(local_allocator->Adapter(kArenaAllocGraphBuilder)), + uninitialized_string_phis_(local_allocator->Adapter(kArenaAllocGraphBuilder)) { graph_->InitializeInexactObjectRTI(handles); } @@ -96,6 +97,10 @@ class SsaBuilder : public ValueObject { } } + void AddUninitializedStringPhi(HPhi* phi, HInvoke* invoke) { + uninitialized_string_phis_.push_back(std::make_pair(phi, invoke)); + } + private: void SetLoopHeaderPhiInputs(); void FixEnvironmentPhis(); @@ -118,6 +123,7 @@ class SsaBuilder : public ValueObject { HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget); void RemoveRedundantUninitializedStrings(); + void ReplaceUninitializedStringPhis(); HGraph* const graph_; Handle<mirror::ClassLoader> class_loader_; @@ -131,6 +137,7 @@ class SsaBuilder : public ValueObject { ScopedArenaVector<HArrayGet*> ambiguous_agets_; ScopedArenaVector<HArraySet*> ambiguous_asets_; ScopedArenaVector<HNewInstance*> uninitialized_strings_; + ScopedArenaVector<std::pair<HPhi*, HInvoke*>> uninitialized_string_phis_; DISALLOW_COPY_AND_ASSIGN(SsaBuilder); }; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index f6bd05269e..2f782f39fc 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -195,14 +195,19 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { // SsaLivenessAnalysis. for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* instruction = environment->GetInstructionAt(i); + if (instruction == nullptr) { + continue; + } bool should_be_live = ShouldBeLiveForEnvironment(current, instruction); + // If this environment use does not keep the instruction live, it does not + // affect the live range of that instruction. if (should_be_live) { CHECK(instruction->HasSsaIndex()) << instruction->DebugName(); live_in->SetBit(instruction->GetSsaIndex()); - } - if (instruction != nullptr) { - instruction->GetLiveInterval()->AddUse( - current, environment, i, /* actual_user */ nullptr, should_be_live); + instruction->GetLiveInterval()->AddUse(current, + environment, + i, + /* actual_user */ nullptr); } } } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index f83bb52b69..83ca5bd5fa 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -300,8 +300,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { void AddUse(HInstruction* instruction, HEnvironment* environment, size_t input_index, - HInstruction* actual_user = nullptr, - bool keep_alive = false) { + HInstruction* actual_user = nullptr) { bool is_environment = (environment != nullptr); LocationSummary* locations = instruction->GetLocations(); if (actual_user == nullptr) { @@ -359,12 +358,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { uses_.push_front(*new_use); } - if (is_environment && !keep_alive) { - // If this environment use does not keep the instruction live, it does not - // affect the live range of that instruction. - return; - } - size_t start_block_position = instruction->GetBlock()->GetLifetimeStart(); if (first_range_ == nullptr) { // First time we see a use of that interval. @@ -1157,8 +1150,11 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { * of an instruction that has a primitive type make the instruction live. * If the graph does not have the debuggable property, the environment * use has no effect, and may get a 'none' value after register allocation. + * (d) When compiling in OSR mode, all loops in the compiled method may be entered + * from the interpreter via SuspendCheck; such use in SuspendCheck makes the instruction + * live. * - * (b) and (c) are implemented through SsaLivenessAnalysis::ShouldBeLiveForEnvironment. + * (b), (c) and (d) are implemented through SsaLivenessAnalysis::ShouldBeLiveForEnvironment. */ class SsaLivenessAnalysis : public ValueObject { public: @@ -1259,14 +1255,18 @@ class SsaLivenessAnalysis : public ValueObject { // Returns whether `instruction` in an HEnvironment held by `env_holder` // should be kept live by the HEnvironment. static bool ShouldBeLiveForEnvironment(HInstruction* env_holder, HInstruction* instruction) { - if (instruction == nullptr) return false; + DCHECK(instruction != nullptr); // A value that's not live in compiled code may still be needed in interpreter, // due to code motion, etc. if (env_holder->IsDeoptimize()) return true; // A value live at a throwing instruction in a try block may be copied by // the exception handler to its location at the top of the catch block. if (env_holder->CanThrowIntoCatchBlock()) return true; - if (instruction->GetBlock()->GetGraph()->IsDebuggable()) return true; + HGraph* graph = instruction->GetBlock()->GetGraph(); + if (graph->IsDebuggable()) return true; + // When compiling in OSR mode, all loops in the compiled method may be entered + // from the interpreter via SuspendCheck; thus we need to preserve the environment. + if (env_holder->IsSuspendCheck() && graph->IsCompilingOsr()) return true; return instruction->GetType() == DataType::Type::kReference; } diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc index b9bfbaa173..a683c698d9 100644 --- a/compiler/optimizing/ssa_liveness_analysis_test.cc +++ b/compiler/optimizing/ssa_liveness_analysis_test.cc @@ -28,18 +28,11 @@ namespace art { class SsaLivenessAnalysisTest : public OptimizingUnitTest { - public: - SsaLivenessAnalysisTest() - : graph_(CreateGraph()), - compiler_options_(), - instruction_set_(kRuntimeISA) { - std::string error_msg; - instruction_set_features_ = - InstructionSetFeatures::FromVariant(instruction_set_, "default", &error_msg); - codegen_ = CodeGenerator::Create(graph_, - instruction_set_, - *instruction_set_features_, - compiler_options_); + protected: + void SetUp() OVERRIDE { + OptimizingUnitTest::SetUp(); + graph_ = CreateGraph(); + codegen_ = CodeGenerator::Create(graph_, *compiler_options_); CHECK(codegen_ != nullptr) << instruction_set_ << " is not a supported target architecture."; // Create entry block. entry_ = new (GetAllocator()) HBasicBlock(graph_); @@ -57,9 +50,6 @@ class SsaLivenessAnalysisTest : public OptimizingUnitTest { } HGraph* graph_; - CompilerOptions compiler_options_; - InstructionSet instruction_set_; - std::unique_ptr<const InstructionSetFeatures> instruction_set_features_; std::unique_ptr<CodeGenerator> codegen_; HBasicBlock* entry_; }; @@ -134,12 +124,12 @@ TEST_F(SsaLivenessAnalysisTest, TestAput) { static const char* const expected[] = { "ranges: { [2,21) }, uses: { 15 17 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 " "is_high: 0", - "ranges: { [4,21) }, uses: { 19 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 " + "ranges: { [4,21) }, uses: { 19 21 }, { } is_fixed: 0, is_split: 0 is_low: 0 " "is_high: 0", - "ranges: { [6,21) }, uses: { 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 " + "ranges: { [6,21) }, uses: { 21 }, { } is_fixed: 0, is_split: 0 is_low: 0 " "is_high: 0", // Environment uses do not keep the non-reference argument alive. - "ranges: { [8,10) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", + "ranges: { [8,10) }, uses: { }, { } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", // Environment uses keep the reference argument alive. "ranges: { [10,19) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", }; @@ -207,11 +197,11 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { static const char* const expected[] = { "ranges: { [2,23) }, uses: { 15 17 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 " "is_high: 0", - "ranges: { [4,23) }, uses: { 19 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 " + "ranges: { [4,23) }, uses: { 19 23 }, { 21 } is_fixed: 0, is_split: 0 is_low: 0 " "is_high: 0", - "ranges: { [6,23) }, uses: { 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", + "ranges: { [6,23) }, uses: { 23 }, { 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", // Environment use in HDeoptimize keeps even the non-reference argument alive. - "ranges: { [8,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", + "ranges: { [8,21) }, uses: { }, { 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", // Environment uses keep the reference argument alive. "ranges: { [10,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", }; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 3685ab2df4..3e1a36dc9b 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -31,35 +31,63 @@ namespace art { constexpr static bool kVerifyStackMaps = kIsDebugBuild; uint32_t StackMapStream::GetStackMapNativePcOffset(size_t i) { - return StackMap::UnpackNativePc(stack_maps_[i].packed_native_pc, instruction_set_); + return StackMap::UnpackNativePc(stack_maps_[i][StackMap::kPackedNativePc], instruction_set_); } void StackMapStream::SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) { - stack_maps_[i].packed_native_pc = StackMap::PackNativePc(native_pc_offset, instruction_set_); + stack_maps_[i][StackMap::kPackedNativePc] = + StackMap::PackNativePc(native_pc_offset, instruction_set_); +} + +void StackMapStream::BeginMethod(size_t frame_size_in_bytes, + size_t core_spill_mask, + size_t fp_spill_mask, + uint32_t num_dex_registers) { + DCHECK(!in_method_) << "Mismatched Begin/End calls"; + in_method_ = true; + DCHECK_EQ(frame_size_in_bytes_, 0u) << "BeginMethod was already called"; + + frame_size_in_bytes_ = frame_size_in_bytes; + core_spill_mask_ = core_spill_mask; + fp_spill_mask_ = fp_spill_mask; + num_dex_registers_ = num_dex_registers; +} + +void StackMapStream::EndMethod() { + DCHECK(in_method_) << "Mismatched Begin/End calls"; + in_method_ = false; } void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, uint32_t native_pc_offset, uint32_t register_mask, BitVector* stack_mask, - uint32_t num_dex_registers, - uint8_t inlining_depth) { + StackMap::Kind kind) { + DCHECK(in_method_) << "Call BeginMethod first"; DCHECK(!in_stack_map_) << "Mismatched Begin/End calls"; in_stack_map_ = true; - current_stack_map_ = StackMapEntry { - .packed_native_pc = StackMap::PackNativePc(native_pc_offset, instruction_set_), - .dex_pc = dex_pc, - .register_mask_index = kNoValue, - .stack_mask_index = kNoValue, - .inline_info_index = kNoValue, - .dex_register_mask_index = kNoValue, - .dex_register_map_index = kNoValue, - }; + current_stack_map_ = BitTableBuilder<StackMap>::Entry(); + current_stack_map_[StackMap::kKind] = static_cast<uint32_t>(kind); + current_stack_map_[StackMap::kPackedNativePc] = + StackMap::PackNativePc(native_pc_offset, instruction_set_); + current_stack_map_[StackMap::kDexPc] = dex_pc; + if (stack_maps_.size() > 0) { + // Check that non-catch stack maps are sorted by pc. + // Catch stack maps are at the end and may be unordered. + if (stack_maps_.back()[StackMap::kKind] == StackMap::Kind::Catch) { + DCHECK(current_stack_map_[StackMap::kKind] == StackMap::Kind::Catch); + } else if (current_stack_map_[StackMap::kKind] != StackMap::Kind::Catch) { + DCHECK_LE(stack_maps_.back()[StackMap::kPackedNativePc], + current_stack_map_[StackMap::kPackedNativePc]); + } + } if (register_mask != 0) { uint32_t shift = LeastSignificantBit(register_mask); - RegisterMaskEntry entry = { register_mask >> shift, shift }; - current_stack_map_.register_mask_index = register_masks_.Dedup(&entry); + BitTableBuilder<RegisterMask>::Entry entry; + entry[RegisterMask::kValue] = register_mask >> shift; + entry[RegisterMask::kShift] = shift; + current_stack_map_[StackMap::kRegisterMaskIndex] = register_masks_.Dedup(&entry); } // The compiler assumes the bit vector will be read during PrepareForFillIn(), // and it might modify the data before that. Therefore, just store the pointer. @@ -67,15 +95,24 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, lazy_stack_masks_.push_back(stack_mask); current_inline_infos_.clear(); current_dex_registers_.clear(); - expected_num_dex_registers_ = num_dex_registers; + expected_num_dex_registers_ = num_dex_registers_; if (kVerifyStackMaps) { size_t stack_map_index = stack_maps_.size(); // Create lambda method, which will be executed at the very end to verify data. // Parameters and local variables will be captured(stored) by the lambda "[=]". dchecks_.emplace_back([=](const CodeInfo& code_info) { + if (kind == StackMap::Kind::Default || kind == StackMap::Kind::OSR) { + StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, + instruction_set_); + CHECK_EQ(stack_map.Row(), stack_map_index); + } else if (kind == StackMap::Kind::Catch) { + StackMap stack_map = code_info.GetCatchStackMapForDexPc(dex_pc); + CHECK_EQ(stack_map.Row(), stack_map_index); + } StackMap stack_map = code_info.GetStackMapAt(stack_map_index); CHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), native_pc_offset); + CHECK_EQ(stack_map.GetKind(), static_cast<uint32_t>(kind)); CHECK_EQ(stack_map.GetDexPc(), dex_pc); CHECK_EQ(code_info.GetRegisterMaskOf(stack_map), register_mask); BitMemoryRegion seen_stack_mask = code_info.GetStackMaskOf(stack_map); @@ -83,9 +120,6 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, for (size_t b = 0; b < seen_stack_mask.size_in_bits(); b++) { CHECK_EQ(seen_stack_mask.LoadBit(b), stack_mask != nullptr && stack_mask->IsBitSet(b)); } - CHECK_EQ(stack_map.HasInlineInfo(), (inlining_depth != 0)); - CHECK_EQ(code_info.GetInlineDepthOf(stack_map), inlining_depth); - CHECK_EQ(stack_map.HasDexRegisterMap(), (num_dex_registers != 0)); }); } } @@ -93,35 +127,43 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, void StackMapStream::EndStackMapEntry() { DCHECK(in_stack_map_) << "Mismatched Begin/End calls"; in_stack_map_ = false; - DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size()); // Generate index into the InlineInfo table. + size_t inlining_depth = current_inline_infos_.size(); if (!current_inline_infos_.empty()) { - current_inline_infos_.back().is_last = InlineInfo::kLast; - current_stack_map_.inline_info_index = + current_inline_infos_.back()[InlineInfo::kIsLast] = InlineInfo::kLast; + current_stack_map_[StackMap::kInlineInfoIndex] = inline_infos_.Dedup(current_inline_infos_.data(), current_inline_infos_.size()); } - stack_maps_.Add(current_stack_map_); -} + // Generate delta-compressed dex register map. + size_t num_dex_registers = current_dex_registers_.size(); + if (!current_dex_registers_.empty()) { + DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size()); + CreateDexRegisterMap(); + } -void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) { - current_dex_registers_.push_back(DexRegisterLocation(kind, value)); + stack_maps_.Add(current_stack_map_); - // We have collected all the dex registers for StackMap/InlineInfo - create the map. - if (current_dex_registers_.size() == expected_num_dex_registers_) { - CreateDexRegisterMap(); + if (kVerifyStackMaps) { + size_t stack_map_index = stack_maps_.size() - 1; + dchecks_.emplace_back([=](const CodeInfo& code_info) { + StackMap stack_map = code_info.GetStackMapAt(stack_map_index); + CHECK_EQ(stack_map.HasDexRegisterMap(), (num_dex_registers != 0)); + CHECK_EQ(stack_map.HasInlineInfo(), (inlining_depth != 0)); + CHECK_EQ(code_info.GetInlineInfosOf(stack_map).size(), inlining_depth); + }); } } void StackMapStream::AddInvoke(InvokeType invoke_type, uint32_t dex_method_index) { - uint32_t packed_native_pc = current_stack_map_.packed_native_pc; + uint32_t packed_native_pc = current_stack_map_[StackMap::kPackedNativePc]; size_t invoke_info_index = invoke_infos_.size(); - invoke_infos_.Add(InvokeInfoEntry { - .packed_native_pc = packed_native_pc, - .invoke_type = invoke_type, - .method_info_index = method_infos_.Dedup(&dex_method_index), - }); + BitTableBuilder<InvokeInfo>::Entry entry; + entry[InvokeInfo::kPackedNativePc] = packed_native_pc; + entry[InvokeInfo::kInvokeType] = invoke_type; + entry[InvokeInfo::kMethodInfoIndex] = method_infos_.Dedup({dex_method_index}); + invoke_infos_.Add(entry); if (kVerifyStackMaps) { dchecks_.emplace_back([=](const CodeInfo& code_info) { @@ -129,7 +171,7 @@ void StackMapStream::AddInvoke(InvokeType invoke_type, uint32_t dex_method_index CHECK_EQ(invoke_info.GetNativePcOffset(instruction_set_), StackMap::UnpackNativePc(packed_native_pc, instruction_set_)); CHECK_EQ(invoke_info.GetInvokeType(), invoke_type); - CHECK_EQ(method_infos_[invoke_info.GetMethodInfoIndex()], dex_method_index); + CHECK_EQ(method_infos_[invoke_info.GetMethodInfoIndex()][0], dex_method_index); }); } } @@ -138,51 +180,45 @@ void StackMapStream::BeginInlineInfoEntry(ArtMethod* method, uint32_t dex_pc, uint32_t num_dex_registers, const DexFile* outer_dex_file) { + DCHECK(in_stack_map_) << "Call BeginStackMapEntry first"; DCHECK(!in_inline_info_) << "Mismatched Begin/End calls"; in_inline_info_ = true; DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size()); - InlineInfoEntry entry = { - .is_last = InlineInfo::kMore, - .dex_pc = dex_pc, - .method_info_index = kNoValue, - .art_method_hi = kNoValue, - .art_method_lo = kNoValue, - .dex_register_mask_index = kNoValue, - .dex_register_map_index = kNoValue, - }; + expected_num_dex_registers_ += num_dex_registers; + + BitTableBuilder<InlineInfo>::Entry entry; + entry[InlineInfo::kIsLast] = InlineInfo::kMore; + entry[InlineInfo::kDexPc] = dex_pc; + entry[InlineInfo::kNumberOfDexRegisters] = static_cast<uint32_t>(expected_num_dex_registers_); if (EncodeArtMethodInInlineInfo(method)) { - entry.art_method_hi = High32Bits(reinterpret_cast<uintptr_t>(method)); - entry.art_method_lo = Low32Bits(reinterpret_cast<uintptr_t>(method)); + entry[InlineInfo::kArtMethodHi] = High32Bits(reinterpret_cast<uintptr_t>(method)); + entry[InlineInfo::kArtMethodLo] = Low32Bits(reinterpret_cast<uintptr_t>(method)); } else { if (dex_pc != static_cast<uint32_t>(-1) && kIsDebugBuild) { ScopedObjectAccess soa(Thread::Current()); DCHECK(IsSameDexFile(*outer_dex_file, *method->GetDexFile())); } uint32_t dex_method_index = method->GetDexMethodIndexUnchecked(); - entry.method_info_index = method_infos_.Dedup(&dex_method_index); + entry[InlineInfo::kMethodInfoIndex] = method_infos_.Dedup({dex_method_index}); } current_inline_infos_.push_back(entry); - current_dex_registers_.clear(); - expected_num_dex_registers_ = num_dex_registers; - if (kVerifyStackMaps) { size_t stack_map_index = stack_maps_.size(); size_t depth = current_inline_infos_.size() - 1; dchecks_.emplace_back([=](const CodeInfo& code_info) { StackMap stack_map = code_info.GetStackMapAt(stack_map_index); - InlineInfo inline_info = code_info.GetInlineInfoAtDepth(stack_map, depth); + InlineInfo inline_info = code_info.GetInlineInfosOf(stack_map)[depth]; CHECK_EQ(inline_info.GetDexPc(), dex_pc); bool encode_art_method = EncodeArtMethodInInlineInfo(method); CHECK_EQ(inline_info.EncodesArtMethod(), encode_art_method); if (encode_art_method) { CHECK_EQ(inline_info.GetArtMethod(), method); } else { - CHECK_EQ(method_infos_[inline_info.GetMethodInfoIndex()], + CHECK_EQ(method_infos_[inline_info.GetMethodInfoIndex()][0], method->GetDexMethodIndexUnchecked()); } - CHECK_EQ(inline_info.HasDexRegisterMap(), (num_dex_registers != 0)); }); } } @@ -193,56 +229,68 @@ void StackMapStream::EndInlineInfoEntry() { DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size()); } -// Create dex register map (bitmap + indices + catalogue entries) -// based on the currently accumulated list of DexRegisterLocations. +// Create delta-compressed dex register map based on the current list of DexRegisterLocations. +// All dex registers for a stack map are concatenated - inlined registers are just appended. void StackMapStream::CreateDexRegisterMap() { - // Create mask and map based on current registers. + // These are fields rather than local variables so that we can reuse the reserved memory. temp_dex_register_mask_.ClearAllBits(); temp_dex_register_map_.clear(); + + // Ensure that the arrays that hold previous state are big enough to be safely indexed below. + if (previous_dex_registers_.size() < current_dex_registers_.size()) { + previous_dex_registers_.resize(current_dex_registers_.size(), DexRegisterLocation::None()); + dex_register_timestamp_.resize(current_dex_registers_.size(), 0u); + } + + // Set bit in the mask for each register that has been changed since the previous stack map. + // Modified registers are stored in the catalogue and the catalogue index added to the list. for (size_t i = 0; i < current_dex_registers_.size(); i++) { DexRegisterLocation reg = current_dex_registers_[i]; - if (reg.IsLive()) { - DexRegisterEntry entry = DexRegisterEntry { - .kind = static_cast<uint32_t>(reg.GetKind()), - .packed_value = DexRegisterInfo::PackValue(reg.GetKind(), reg.GetValue()), - }; + // Distance is difference between this index and the index of last modification. + uint32_t distance = stack_maps_.size() - dex_register_timestamp_[i]; + if (previous_dex_registers_[i] != reg || distance > kMaxDexRegisterMapSearchDistance) { + BitTableBuilder<DexRegisterInfo>::Entry entry; + entry[DexRegisterInfo::kKind] = static_cast<uint32_t>(reg.GetKind()); + entry[DexRegisterInfo::kPackedValue] = + DexRegisterInfo::PackValue(reg.GetKind(), reg.GetValue()); + uint32_t index = reg.IsLive() ? dex_register_catalog_.Dedup(&entry) : kNoValue; temp_dex_register_mask_.SetBit(i); - temp_dex_register_map_.push_back(dex_register_catalog_.Dedup(&entry)); + temp_dex_register_map_.push_back({index}); + previous_dex_registers_[i] = reg; + dex_register_timestamp_[i] = stack_maps_.size(); } } - // Set the mask and map for the current StackMap/InlineInfo. - uint32_t mask_index = StackMap::kNoValue; // Represents mask with all zero bits. + // Set the mask and map for the current StackMap (which includes inlined registers). if (temp_dex_register_mask_.GetNumberOfBits() != 0) { - mask_index = dex_register_masks_.Dedup(temp_dex_register_mask_.GetRawStorage(), - temp_dex_register_mask_.GetNumberOfBits()); + current_stack_map_[StackMap::kDexRegisterMaskIndex] = + dex_register_masks_.Dedup(temp_dex_register_mask_.GetRawStorage(), + temp_dex_register_mask_.GetNumberOfBits()); } - uint32_t map_index = dex_register_maps_.Dedup(temp_dex_register_map_.data(), - temp_dex_register_map_.size()); - if (!current_inline_infos_.empty()) { - current_inline_infos_.back().dex_register_mask_index = mask_index; - current_inline_infos_.back().dex_register_map_index = map_index; - } else { - current_stack_map_.dex_register_mask_index = mask_index; - current_stack_map_.dex_register_map_index = map_index; + if (!current_dex_registers_.empty()) { + current_stack_map_[StackMap::kDexRegisterMapIndex] = + dex_register_maps_.Dedup(temp_dex_register_map_.data(), + temp_dex_register_map_.size()); } if (kVerifyStackMaps) { size_t stack_map_index = stack_maps_.size(); - int32_t depth = current_inline_infos_.size() - 1; // We need to make copy of the current registers for later (when the check is run). - auto expected_dex_registers = std::make_shared<std::vector<DexRegisterLocation>>( + auto expected_dex_registers = std::make_shared<dchecked_vector<DexRegisterLocation>>( current_dex_registers_.begin(), current_dex_registers_.end()); dchecks_.emplace_back([=](const CodeInfo& code_info) { StackMap stack_map = code_info.GetStackMapAt(stack_map_index); - size_t num_dex_registers = expected_dex_registers->size(); - DexRegisterMap map = (depth == -1) - ? code_info.GetDexRegisterMapOf(stack_map, num_dex_registers) - : code_info.GetDexRegisterMapAtDepth(depth, stack_map, num_dex_registers); - CHECK_EQ(map.size(), num_dex_registers); - for (size_t r = 0; r < num_dex_registers; r++) { - CHECK_EQ(expected_dex_registers->at(r), map.Get(r)); + uint32_t expected_reg = 0; + for (DexRegisterLocation reg : code_info.GetDexRegisterMapOf(stack_map)) { + CHECK_EQ((*expected_dex_registers)[expected_reg++], reg); } + for (InlineInfo inline_info : code_info.GetInlineInfosOf(stack_map)) { + DexRegisterMap map = code_info.GetInlineDexRegisterMapOf(stack_map, inline_info); + for (DexRegisterLocation reg : map) { + CHECK_EQ((*expected_dex_registers)[expected_reg++], reg); + } + } + CHECK_EQ(expected_reg, expected_dex_registers->size()); }); } } @@ -251,7 +299,7 @@ void StackMapStream::FillInMethodInfo(MemoryRegion region) { { MethodInfo info(region.begin(), method_infos_.size()); for (size_t i = 0; i < method_infos_.size(); ++i) { - info.SetMethodIndex(i, method_infos_[i]); + info.SetMethodIndex(i, method_infos_[i][0]); } } if (kVerifyStackMaps) { @@ -260,53 +308,55 @@ void StackMapStream::FillInMethodInfo(MemoryRegion region) { const size_t count = info.NumMethodIndices(); DCHECK_EQ(count, method_infos_.size()); for (size_t i = 0; i < count; ++i) { - DCHECK_EQ(info.GetMethodIndex(i), method_infos_[i]); + DCHECK_EQ(info.GetMethodIndex(i), method_infos_[i][0]); } } } size_t StackMapStream::PrepareForFillIn() { - static_assert(sizeof(StackMapEntry) == StackMap::kCount * sizeof(uint32_t), "Layout"); - static_assert(sizeof(InvokeInfoEntry) == InvokeInfo::kCount * sizeof(uint32_t), "Layout"); - static_assert(sizeof(InlineInfoEntry) == InlineInfo::kCount * sizeof(uint32_t), "Layout"); - static_assert(sizeof(DexRegisterEntry) == DexRegisterInfo::kCount * sizeof(uint32_t), "Layout"); DCHECK_EQ(out_.size(), 0u); // Read the stack masks now. The compiler might have updated them. for (size_t i = 0; i < lazy_stack_masks_.size(); i++) { BitVector* stack_mask = lazy_stack_masks_[i]; if (stack_mask != nullptr && stack_mask->GetNumberOfBits() != 0) { - stack_maps_[i].stack_mask_index = + stack_maps_[i][StackMap::kStackMaskIndex] = stack_masks_.Dedup(stack_mask->GetRawStorage(), stack_mask->GetNumberOfBits()); } } - size_t bit_offset = 0; - stack_maps_.Encode(&out_, &bit_offset); - register_masks_.Encode(&out_, &bit_offset); - stack_masks_.Encode(&out_, &bit_offset); - invoke_infos_.Encode(&out_, &bit_offset); - inline_infos_.Encode(&out_, &bit_offset); - dex_register_masks_.Encode(&out_, &bit_offset); - dex_register_maps_.Encode(&out_, &bit_offset); - dex_register_catalog_.Encode(&out_, &bit_offset); - - return UnsignedLeb128Size(out_.size()) + out_.size(); + EncodeUnsignedLeb128(&out_, frame_size_in_bytes_); + EncodeUnsignedLeb128(&out_, core_spill_mask_); + EncodeUnsignedLeb128(&out_, fp_spill_mask_); + EncodeUnsignedLeb128(&out_, num_dex_registers_); + BitMemoryWriter<ScopedArenaVector<uint8_t>> out(&out_, out_.size() * kBitsPerByte); + stack_maps_.Encode(out); + register_masks_.Encode(out); + stack_masks_.Encode(out); + invoke_infos_.Encode(out); + inline_infos_.Encode(out); + dex_register_masks_.Encode(out); + dex_register_maps_.Encode(out); + dex_register_catalog_.Encode(out); + + return out_.size(); } void StackMapStream::FillInCodeInfo(MemoryRegion region) { DCHECK(in_stack_map_ == false) << "Mismatched Begin/End calls"; DCHECK(in_inline_info_ == false) << "Mismatched Begin/End calls"; DCHECK_NE(0u, out_.size()) << "PrepareForFillIn not called before FillIn"; - DCHECK_EQ(region.size(), UnsignedLeb128Size(out_.size()) + out_.size()); + DCHECK_EQ(region.size(), out_.size()); + + region.CopyFromVector(0, out_); - uint8_t* ptr = EncodeUnsignedLeb128(region.begin(), out_.size()); - region.CopyFromVector(ptr - region.begin(), out_); + // Verify that we can load the CodeInfo and check some essentials. + CodeInfo code_info(region); + CHECK_EQ(code_info.Size(), out_.size()); + CHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size()); // Verify all written data (usually only in debug builds). if (kVerifyStackMaps) { - CodeInfo code_info(region); - CHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size()); for (const auto& dcheck : dchecks_) { dcheck(code_info); } diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index d634c703ff..ed865b12f7 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -27,11 +27,10 @@ #include "dex_register_location.h" #include "method_info.h" #include "nodes.h" +#include "stack_map.h" namespace art { -class CodeInfo; - /** * Collects and builds stack maps for a method. All the stack maps * for a method are placed in a CodeInfo object. @@ -51,23 +50,31 @@ class StackMapStream : public ValueObject { out_(allocator->Adapter(kArenaAllocStackMapStream)), method_infos_(allocator), lazy_stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)), - in_stack_map_(false), - in_inline_info_(false), + current_stack_map_(), current_inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)), current_dex_registers_(allocator->Adapter(kArenaAllocStackMapStream)), + previous_dex_registers_(allocator->Adapter(kArenaAllocStackMapStream)), + dex_register_timestamp_(allocator->Adapter(kArenaAllocStackMapStream)), temp_dex_register_mask_(allocator, 32, true, kArenaAllocStackMapStream), temp_dex_register_map_(allocator->Adapter(kArenaAllocStackMapStream)) { } + void BeginMethod(size_t frame_size_in_bytes, + size_t core_spill_mask, + size_t fp_spill_mask, + uint32_t num_dex_registers); + void EndMethod(); + void BeginStackMapEntry(uint32_t dex_pc, uint32_t native_pc_offset, - uint32_t register_mask, - BitVector* sp_mask, - uint32_t num_dex_registers, - uint8_t inlining_depth); + uint32_t register_mask = 0, + BitVector* sp_mask = nullptr, + StackMap::Kind kind = StackMap::Kind::Default); void EndStackMapEntry(); - void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value); + void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) { + current_dex_registers_.push_back(DexRegisterLocation(kind, value)); + } void AddInvoke(InvokeType type, uint32_t dex_method_index); @@ -95,76 +102,42 @@ class StackMapStream : public ValueObject { private: static constexpr uint32_t kNoValue = -1; - // The fields must be uint32_t and mirror the StackMap accessor in stack_map.h! - struct StackMapEntry { - uint32_t packed_native_pc; - uint32_t dex_pc; - uint32_t register_mask_index; - uint32_t stack_mask_index; - uint32_t inline_info_index; - uint32_t dex_register_mask_index; - uint32_t dex_register_map_index; - }; - - // The fields must be uint32_t and mirror the InlineInfo accessor in stack_map.h! - struct InlineInfoEntry { - uint32_t is_last; - uint32_t dex_pc; - uint32_t method_info_index; - uint32_t art_method_hi; - uint32_t art_method_lo; - uint32_t dex_register_mask_index; - uint32_t dex_register_map_index; - }; - - // The fields must be uint32_t and mirror the InvokeInfo accessor in stack_map.h! - struct InvokeInfoEntry { - uint32_t packed_native_pc; - uint32_t invoke_type; - uint32_t method_info_index; - }; - - // The fields must be uint32_t and mirror the DexRegisterInfo accessor in stack_map.h! - struct DexRegisterEntry { - uint32_t kind; - uint32_t packed_value; - }; - - // The fields must be uint32_t and mirror the RegisterMask accessor in stack_map.h! - struct RegisterMaskEntry { - uint32_t value; - uint32_t shift; - }; - void CreateDexRegisterMap(); const InstructionSet instruction_set_; - BitTableBuilder<StackMapEntry> stack_maps_; - BitTableBuilder<RegisterMaskEntry> register_masks_; + uint32_t frame_size_in_bytes_ = 0; + uint32_t core_spill_mask_ = 0; + uint32_t fp_spill_mask_ = 0; + uint32_t num_dex_registers_ = 0; + BitTableBuilder<StackMap> stack_maps_; + BitTableBuilder<RegisterMask> register_masks_; BitmapTableBuilder stack_masks_; - BitTableBuilder<InvokeInfoEntry> invoke_infos_; - BitTableBuilder<InlineInfoEntry> inline_infos_; + BitTableBuilder<InvokeInfo> invoke_infos_; + BitTableBuilder<InlineInfo> inline_infos_; BitmapTableBuilder dex_register_masks_; - BitTableBuilder<uint32_t> dex_register_maps_; - BitTableBuilder<DexRegisterEntry> dex_register_catalog_; + BitTableBuilder<MaskInfo> dex_register_maps_; + BitTableBuilder<DexRegisterInfo> dex_register_catalog_; ScopedArenaVector<uint8_t> out_; - BitTableBuilder<uint32_t> method_infos_; + BitTableBuilderBase<1> method_infos_; ScopedArenaVector<BitVector*> lazy_stack_masks_; // Variables which track the current state between Begin/End calls; - bool in_stack_map_; - bool in_inline_info_; - StackMapEntry current_stack_map_; - ScopedArenaVector<InlineInfoEntry> current_inline_infos_; + bool in_method_ = false; + bool in_stack_map_ = false; + bool in_inline_info_ = false; + BitTableBuilder<StackMap>::Entry current_stack_map_; + ScopedArenaVector<BitTableBuilder<InlineInfo>::Entry> current_inline_infos_; ScopedArenaVector<DexRegisterLocation> current_dex_registers_; + ScopedArenaVector<DexRegisterLocation> previous_dex_registers_; + ScopedArenaVector<uint32_t> dex_register_timestamp_; // Stack map index of last change. size_t expected_num_dex_registers_; // Temporary variables used in CreateDexRegisterMap. // They are here so that we can reuse the reserved memory. ArenaBitVector temp_dex_register_mask_; - ScopedArenaVector<uint32_t> temp_dex_register_map_; + ScopedArenaVector<BitTableBuilder<DexRegisterMapInfo>::Entry> temp_dex_register_map_; // A set of lambda functions to be executed at the end to verify // the encoded data. It is generally only used in debug builds. diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 77aa3ef965..9ed90a4839 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -52,14 +52,16 @@ TEST(StackMapTest, Test1) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArenaBitVector sp_mask(&allocator, 0, false); size_t number_of_dex_registers = 2; - stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask); stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Short location. stream.EndStackMapEntry(); + stream.EndMethod(); size_t size = stream.PrepareForFillIn(); void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); @@ -81,16 +83,16 @@ TEST(StackMapTest, Test1) { ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask)); ASSERT_TRUE(stack_map.HasDexRegisterMap()); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_TRUE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters()); - ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind(0)); - ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind(1)); - ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0)); - ASSERT_EQ(-2, dex_register_map.GetConstant(1)); + ASSERT_EQ(Kind::kInStack, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind()); + ASSERT_EQ(0, dex_register_map[0].GetStackOffsetInBytes()); + ASSERT_EQ(-2, dex_register_map[1].GetConstant()); DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(0); DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(1); @@ -107,6 +109,7 @@ TEST(StackMapTest, Test2) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArtMethod art_method; ArenaBitVector sp_mask1(&allocator, 0, true); @@ -114,7 +117,7 @@ TEST(StackMapTest, Test2) { sp_mask1.SetBit(4); size_t number_of_dex_registers = 2; size_t number_of_dex_registers_in_inline_info = 0; - stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask1, number_of_dex_registers, 2); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.BeginInlineInfoEntry(&art_method, 3, number_of_dex_registers_in_inline_info); @@ -126,7 +129,7 @@ TEST(StackMapTest, Test2) { ArenaBitVector sp_mask2(&allocator, 0, true); sp_mask2.SetBit(3); sp_mask2.SetBit(8); - stream.BeginStackMapEntry(1, 128 * kPcAlign, 0xFF, &sp_mask2, number_of_dex_registers, 0); + stream.BeginStackMapEntry(1, 128 * kPcAlign, 0xFF, &sp_mask2); stream.AddDexRegisterEntry(Kind::kInRegister, 18); // Short location. stream.AddDexRegisterEntry(Kind::kInFpuRegister, 3); // Short location. stream.EndStackMapEntry(); @@ -134,7 +137,7 @@ TEST(StackMapTest, Test2) { ArenaBitVector sp_mask3(&allocator, 0, true); sp_mask3.SetBit(1); sp_mask3.SetBit(5); - stream.BeginStackMapEntry(2, 192 * kPcAlign, 0xAB, &sp_mask3, number_of_dex_registers, 0); + stream.BeginStackMapEntry(2, 192 * kPcAlign, 0xAB, &sp_mask3); stream.AddDexRegisterEntry(Kind::kInRegister, 6); // Short location. stream.AddDexRegisterEntry(Kind::kInRegisterHigh, 8); // Short location. stream.EndStackMapEntry(); @@ -142,11 +145,12 @@ TEST(StackMapTest, Test2) { ArenaBitVector sp_mask4(&allocator, 0, true); sp_mask4.SetBit(6); sp_mask4.SetBit(7); - stream.BeginStackMapEntry(3, 256 * kPcAlign, 0xCD, &sp_mask4, number_of_dex_registers, 0); + stream.BeginStackMapEntry(3, 256 * kPcAlign, 0xCD, &sp_mask4); stream.AddDexRegisterEntry(Kind::kInFpuRegister, 3); // Short location, same in stack map 2. stream.AddDexRegisterEntry(Kind::kInFpuRegisterHigh, 1); // Short location. stream.EndStackMapEntry(); + stream.EndMethod(); size_t size = stream.PrepareForFillIn(); void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); @@ -170,16 +174,16 @@ TEST(StackMapTest, Test2) { ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask1)); ASSERT_TRUE(stack_map.HasDexRegisterMap()); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_TRUE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters()); - ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind(0)); - ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind(1)); - ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0)); - ASSERT_EQ(-2, dex_register_map.GetConstant(1)); + ASSERT_EQ(Kind::kInStack, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind()); + ASSERT_EQ(0, dex_register_map[0].GetStackOffsetInBytes()); + ASSERT_EQ(-2, dex_register_map[1].GetConstant()); DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(0); DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(1); @@ -189,13 +193,12 @@ TEST(StackMapTest, Test2) { ASSERT_EQ(-2, location1.GetValue()); ASSERT_TRUE(stack_map.HasInlineInfo()); - InlineInfo inline_info0 = code_info.GetInlineInfoAtDepth(stack_map, 0); - InlineInfo inline_info1 = code_info.GetInlineInfoAtDepth(stack_map, 1); - ASSERT_EQ(2u, code_info.GetInlineDepthOf(stack_map)); - ASSERT_EQ(3u, inline_info0.GetDexPc()); - ASSERT_EQ(2u, inline_info1.GetDexPc()); - ASSERT_TRUE(inline_info0.EncodesArtMethod()); - ASSERT_TRUE(inline_info1.EncodesArtMethod()); + auto inline_infos = code_info.GetInlineInfosOf(stack_map); + ASSERT_EQ(2u, inline_infos.size()); + ASSERT_EQ(3u, inline_infos[0].GetDexPc()); + ASSERT_EQ(2u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); } // Second stack map. @@ -210,16 +213,16 @@ TEST(StackMapTest, Test2) { ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask2)); ASSERT_TRUE(stack_map.HasDexRegisterMap()); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_TRUE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters()); - ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind(0)); - ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind(1)); - ASSERT_EQ(18, dex_register_map.GetMachineRegister(0)); - ASSERT_EQ(3, dex_register_map.GetMachineRegister(1)); + ASSERT_EQ(Kind::kInRegister, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kInFpuRegister, dex_register_map[1].GetKind()); + ASSERT_EQ(18, dex_register_map[0].GetMachineRegister()); + ASSERT_EQ(3, dex_register_map[1].GetMachineRegister()); DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(2); DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(3); @@ -243,16 +246,16 @@ TEST(StackMapTest, Test2) { ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask3)); ASSERT_TRUE(stack_map.HasDexRegisterMap()); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_TRUE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters()); - ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind(0)); - ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map.GetLocationKind(1)); - ASSERT_EQ(6, dex_register_map.GetMachineRegister(0)); - ASSERT_EQ(8, dex_register_map.GetMachineRegister(1)); + ASSERT_EQ(Kind::kInRegister, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map[1].GetKind()); + ASSERT_EQ(6, dex_register_map[0].GetMachineRegister()); + ASSERT_EQ(8, dex_register_map[1].GetMachineRegister()); DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(4); DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(5); @@ -276,16 +279,16 @@ TEST(StackMapTest, Test2) { ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask4)); ASSERT_TRUE(stack_map.HasDexRegisterMap()); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_TRUE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters()); - ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind(0)); - ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map.GetLocationKind(1)); - ASSERT_EQ(3, dex_register_map.GetMachineRegister(0)); - ASSERT_EQ(1, dex_register_map.GetMachineRegister(1)); + ASSERT_EQ(Kind::kInFpuRegister, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map[1].GetKind()); + ASSERT_EQ(3, dex_register_map[0].GetMachineRegister()); + ASSERT_EQ(1, dex_register_map[1].GetMachineRegister()); DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(3); DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(6); @@ -303,6 +306,7 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArtMethod art_method; ArenaBitVector sp_mask1(&allocator, 0, true); @@ -310,7 +314,7 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { sp_mask1.SetBit(4); const size_t number_of_dex_registers = 2; const size_t number_of_dex_registers_in_inline_info = 2; - stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask1, number_of_dex_registers, 1); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.BeginInlineInfoEntry(&art_method, 3, number_of_dex_registers_in_inline_info); @@ -319,6 +323,7 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { stream.EndInlineInfoEntry(); stream.EndStackMapEntry(); + stream.EndMethod(); size_t size = stream.PrepareForFillIn(); void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); @@ -342,15 +347,16 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask1)); ASSERT_TRUE(stack_map.HasDexRegisterMap()); - DexRegisterMap map(code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers)); - ASSERT_TRUE(map.IsDexRegisterLive(0)); - ASSERT_TRUE(map.IsDexRegisterLive(1)); + DexRegisterMap map(code_info.GetDexRegisterMapOf(stack_map)); + ASSERT_EQ(number_of_dex_registers, map.size()); + ASSERT_TRUE(map[0].IsLive()); + ASSERT_TRUE(map[1].IsLive()); ASSERT_EQ(2u, map.GetNumberOfLiveDexRegisters()); - ASSERT_EQ(Kind::kInStack, map.GetLocationKind(0)); - ASSERT_EQ(Kind::kConstant, map.GetLocationKind(1)); - ASSERT_EQ(0, map.GetStackOffsetInBytes(0)); - ASSERT_EQ(-2, map.GetConstant(1)); + ASSERT_EQ(Kind::kInStack, map[0].GetKind()); + ASSERT_EQ(Kind::kConstant, map[1].GetKind()); + ASSERT_EQ(0, map[0].GetStackOffsetInBytes()); + ASSERT_EQ(-2, map[1].GetConstant()); DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(0); DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(1); @@ -358,13 +364,6 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { ASSERT_EQ(Kind::kConstant, location1.GetKind()); ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); - - // Test that the inline info dex register map deduplicated to the same offset as the stack map - // one. - ASSERT_TRUE(stack_map.HasInlineInfo()); - InlineInfo inline_info = code_info.GetInlineInfoAtDepth(stack_map, 0); - EXPECT_EQ(inline_info.GetDexRegisterMapIndex(), - stack_map.GetDexRegisterMapIndex()); } } @@ -373,14 +372,16 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 2; - stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask); stream.AddDexRegisterEntry(Kind::kNone, 0); // No location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.EndStackMapEntry(); + stream.EndMethod(); size_t size = stream.PrepareForFillIn(); void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); @@ -400,15 +401,15 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map)); ASSERT_TRUE(stack_map.HasDexRegisterMap()); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_FALSE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_FALSE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); ASSERT_EQ(1u, dex_register_map.GetNumberOfLiveDexRegisters()); - ASSERT_EQ(Kind::kNone, dex_register_map.GetLocationKind(0)); - ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind(1)); - ASSERT_EQ(-2, dex_register_map.GetConstant(1)); + ASSERT_EQ(Kind::kNone, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind()); + ASSERT_EQ(-2, dex_register_map[1].GetConstant()); DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(0); ASSERT_EQ(Kind::kConstant, location1.GetKind()); @@ -422,25 +423,27 @@ TEST(StackMapTest, TestShareDexRegisterMap) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 2; // First stack map. - stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask); stream.AddDexRegisterEntry(Kind::kInRegister, 0); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.EndStackMapEntry(); // Second stack map, which should share the same dex register map. - stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 65 * kPcAlign, 0x3, &sp_mask); stream.AddDexRegisterEntry(Kind::kInRegister, 0); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.EndStackMapEntry(); // Third stack map (doesn't share the dex register map). - stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 66 * kPcAlign, 0x3, &sp_mask); stream.AddDexRegisterEntry(Kind::kInRegister, 2); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.EndStackMapEntry(); + stream.EndMethod(); size_t size = stream.PrepareForFillIn(); void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); @@ -450,29 +453,28 @@ TEST(StackMapTest, TestShareDexRegisterMap) { // Verify first stack map. StackMap sm0 = ci.GetStackMapAt(0); - DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, number_of_dex_registers); - ASSERT_EQ(0, dex_registers0.GetMachineRegister(0)); - ASSERT_EQ(-2, dex_registers0.GetConstant(1)); + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0); + ASSERT_EQ(number_of_dex_registers, dex_registers0.size()); + ASSERT_EQ(0, dex_registers0[0].GetMachineRegister()); + ASSERT_EQ(-2, dex_registers0[1].GetConstant()); // Verify second stack map. StackMap sm1 = ci.GetStackMapAt(1); - DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1, number_of_dex_registers); - ASSERT_EQ(0, dex_registers1.GetMachineRegister(0)); - ASSERT_EQ(-2, dex_registers1.GetConstant(1)); + DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1); + ASSERT_EQ(number_of_dex_registers, dex_registers1.size()); + ASSERT_EQ(0, dex_registers1[0].GetMachineRegister()); + ASSERT_EQ(-2, dex_registers1[1].GetConstant()); // Verify third stack map. StackMap sm2 = ci.GetStackMapAt(2); - DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2, number_of_dex_registers); - ASSERT_EQ(2, dex_registers2.GetMachineRegister(0)); - ASSERT_EQ(-2, dex_registers2.GetConstant(1)); - - // Verify dex register map offsets. - ASSERT_EQ(sm0.GetDexRegisterMapIndex(), - sm1.GetDexRegisterMapIndex()); - ASSERT_NE(sm0.GetDexRegisterMapIndex(), - sm2.GetDexRegisterMapIndex()); - ASSERT_NE(sm1.GetDexRegisterMapIndex(), - sm2.GetDexRegisterMapIndex()); + DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2); + ASSERT_EQ(number_of_dex_registers, dex_registers2.size()); + ASSERT_EQ(2, dex_registers2[0].GetMachineRegister()); + ASSERT_EQ(-2, dex_registers2[1].GetConstant()); + + // Verify dex register mask offsets. + ASSERT_FALSE(sm1.HasDexRegisterMaskIndex()); // No delta. + ASSERT_TRUE(sm2.HasDexRegisterMaskIndex()); // Has delta. } TEST(StackMapTest, TestNoDexRegisterMap) { @@ -480,17 +482,19 @@ TEST(StackMapTest, TestNoDexRegisterMap) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 1); ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 0; - stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask); stream.EndStackMapEntry(); number_of_dex_registers = 1; - stream.BeginStackMapEntry(1, 68 * kPcAlign, 0x4, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(1, 68 * kPcAlign, 0x4, &sp_mask); stream.AddDexRegisterEntry(Kind::kNone, 0); stream.EndStackMapEntry(); + stream.EndMethod(); size_t size = stream.PrepareForFillIn(); void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); @@ -528,6 +532,7 @@ TEST(StackMapTest, InlineTest) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArtMethod art_method; ArenaBitVector sp_mask1(&allocator, 0, true); @@ -535,7 +540,7 @@ TEST(StackMapTest, InlineTest) { sp_mask1.SetBit(4); // First stack map. - stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask1, 2, 2); + stream.BeginStackMapEntry(0, 10 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kInStack, 0); stream.AddDexRegisterEntry(Kind::kConstant, 4); @@ -551,7 +556,7 @@ TEST(StackMapTest, InlineTest) { stream.EndStackMapEntry(); // Second stack map. - stream.BeginStackMapEntry(2, 22 * kPcAlign, 0x3, &sp_mask1, 2, 3); + stream.BeginStackMapEntry(2, 22 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kInStack, 56); stream.AddDexRegisterEntry(Kind::kConstant, 0); @@ -569,13 +574,13 @@ TEST(StackMapTest, InlineTest) { stream.EndStackMapEntry(); // Third stack map. - stream.BeginStackMapEntry(4, 56 * kPcAlign, 0x3, &sp_mask1, 2, 0); + stream.BeginStackMapEntry(4, 56 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kNone, 0); stream.AddDexRegisterEntry(Kind::kConstant, 4); stream.EndStackMapEntry(); // Fourth stack map. - stream.BeginStackMapEntry(6, 78 * kPcAlign, 0x3, &sp_mask1, 2, 3); + stream.BeginStackMapEntry(6, 78 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kInStack, 56); stream.AddDexRegisterEntry(Kind::kConstant, 0); @@ -591,6 +596,7 @@ TEST(StackMapTest, InlineTest) { stream.EndStackMapEntry(); + stream.EndMethod(); size_t size = stream.PrepareForFillIn(); void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); @@ -602,64 +608,66 @@ TEST(StackMapTest, InlineTest) { // Verify first stack map. StackMap sm0 = ci.GetStackMapAt(0); - DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, 2); - ASSERT_EQ(0, dex_registers0.GetStackOffsetInBytes(0)); - ASSERT_EQ(4, dex_registers0.GetConstant(1)); - - InlineInfo if0_0 = ci.GetInlineInfoAtDepth(sm0, 0); - InlineInfo if0_1 = ci.GetInlineInfoAtDepth(sm0, 1); - ASSERT_EQ(2u, ci.GetInlineDepthOf(sm0)); - ASSERT_EQ(2u, if0_0.GetDexPc()); - ASSERT_TRUE(if0_0.EncodesArtMethod()); - ASSERT_EQ(3u, if0_1.GetDexPc()); - ASSERT_TRUE(if0_1.EncodesArtMethod()); - - DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, sm0, 1); - ASSERT_EQ(8, dex_registers1.GetStackOffsetInBytes(0)); - - DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, sm0, 3); - ASSERT_EQ(16, dex_registers2.GetStackOffsetInBytes(0)); - ASSERT_EQ(20, dex_registers2.GetConstant(1)); - ASSERT_EQ(15, dex_registers2.GetMachineRegister(2)); + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0); + ASSERT_EQ(2u, dex_registers0.size()); + ASSERT_EQ(0, dex_registers0[0].GetStackOffsetInBytes()); + ASSERT_EQ(4, dex_registers0[1].GetConstant()); + + auto inline_infos = ci.GetInlineInfosOf(sm0); + ASSERT_EQ(2u, inline_infos.size()); + ASSERT_EQ(2u, inline_infos[0].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_EQ(3u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); + + DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm0, inline_infos[0]); + ASSERT_EQ(1u, dex_registers1.size()); + ASSERT_EQ(8, dex_registers1[0].GetStackOffsetInBytes()); + + DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm0, inline_infos[1]); + ASSERT_EQ(3u, dex_registers2.size()); + ASSERT_EQ(16, dex_registers2[0].GetStackOffsetInBytes()); + ASSERT_EQ(20, dex_registers2[1].GetConstant()); + ASSERT_EQ(15, dex_registers2[2].GetMachineRegister()); } { // Verify second stack map. StackMap sm1 = ci.GetStackMapAt(1); - DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm1, 2); - ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0)); - ASSERT_EQ(0, dex_registers0.GetConstant(1)); - - InlineInfo if1_0 = ci.GetInlineInfoAtDepth(sm1, 0); - InlineInfo if1_1 = ci.GetInlineInfoAtDepth(sm1, 1); - InlineInfo if1_2 = ci.GetInlineInfoAtDepth(sm1, 2); - ASSERT_EQ(3u, ci.GetInlineDepthOf(sm1)); - ASSERT_EQ(2u, if1_0.GetDexPc()); - ASSERT_TRUE(if1_0.EncodesArtMethod()); - ASSERT_EQ(3u, if1_1.GetDexPc()); - ASSERT_TRUE(if1_1.EncodesArtMethod()); - ASSERT_EQ(5u, if1_2.GetDexPc()); - ASSERT_TRUE(if1_2.EncodesArtMethod()); - - DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, sm1, 1); - ASSERT_EQ(12, dex_registers1.GetStackOffsetInBytes(0)); - - DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, sm1, 3); - ASSERT_EQ(80, dex_registers2.GetStackOffsetInBytes(0)); - ASSERT_EQ(10, dex_registers2.GetConstant(1)); - ASSERT_EQ(5, dex_registers2.GetMachineRegister(2)); - - ASSERT_FALSE(if1_2.HasDexRegisterMap()); + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm1); + ASSERT_EQ(2u, dex_registers0.size()); + ASSERT_EQ(56, dex_registers0[0].GetStackOffsetInBytes()); + ASSERT_EQ(0, dex_registers0[1].GetConstant()); + + auto inline_infos = ci.GetInlineInfosOf(sm1); + ASSERT_EQ(3u, inline_infos.size()); + ASSERT_EQ(2u, inline_infos[0].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_EQ(3u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); + ASSERT_EQ(5u, inline_infos[2].GetDexPc()); + ASSERT_TRUE(inline_infos[2].EncodesArtMethod()); + + DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm1, inline_infos[0]); + ASSERT_EQ(1u, dex_registers1.size()); + ASSERT_EQ(12, dex_registers1[0].GetStackOffsetInBytes()); + + DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm1, inline_infos[1]); + ASSERT_EQ(3u, dex_registers2.size()); + ASSERT_EQ(80, dex_registers2[0].GetStackOffsetInBytes()); + ASSERT_EQ(10, dex_registers2[1].GetConstant()); + ASSERT_EQ(5, dex_registers2[2].GetMachineRegister()); } { // Verify third stack map. StackMap sm2 = ci.GetStackMapAt(2); - DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm2, 2); - ASSERT_FALSE(dex_registers0.IsDexRegisterLive(0)); - ASSERT_EQ(4, dex_registers0.GetConstant(1)); + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm2); + ASSERT_EQ(2u, dex_registers0.size()); + ASSERT_FALSE(dex_registers0[0].IsLive()); + ASSERT_EQ(4, dex_registers0[1].GetConstant()); ASSERT_FALSE(sm2.HasInlineInfo()); } @@ -667,29 +675,28 @@ TEST(StackMapTest, InlineTest) { // Verify fourth stack map. StackMap sm3 = ci.GetStackMapAt(3); - DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm3, 2); - ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0)); - ASSERT_EQ(0, dex_registers0.GetConstant(1)); - - InlineInfo if2_0 = ci.GetInlineInfoAtDepth(sm3, 0); - InlineInfo if2_1 = ci.GetInlineInfoAtDepth(sm3, 1); - InlineInfo if2_2 = ci.GetInlineInfoAtDepth(sm3, 2); - ASSERT_EQ(3u, ci.GetInlineDepthOf(sm3)); - ASSERT_EQ(2u, if2_0.GetDexPc()); - ASSERT_TRUE(if2_0.EncodesArtMethod()); - ASSERT_EQ(5u, if2_1.GetDexPc()); - ASSERT_TRUE(if2_1.EncodesArtMethod()); - ASSERT_EQ(10u, if2_2.GetDexPc()); - ASSERT_TRUE(if2_2.EncodesArtMethod()); - - ASSERT_FALSE(if2_0.HasDexRegisterMap()); - - DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(1, sm3, 1); - ASSERT_EQ(2, dex_registers1.GetMachineRegister(0)); - - DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(2, sm3, 2); - ASSERT_FALSE(dex_registers2.IsDexRegisterLive(0)); - ASSERT_EQ(3, dex_registers2.GetMachineRegister(1)); + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm3); + ASSERT_EQ(2u, dex_registers0.size()); + ASSERT_EQ(56, dex_registers0[0].GetStackOffsetInBytes()); + ASSERT_EQ(0, dex_registers0[1].GetConstant()); + + auto inline_infos = ci.GetInlineInfosOf(sm3); + ASSERT_EQ(3u, inline_infos.size()); + ASSERT_EQ(2u, inline_infos[0].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_EQ(5u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); + ASSERT_EQ(10u, inline_infos[2].GetDexPc()); + ASSERT_TRUE(inline_infos[2].EncodesArtMethod()); + + DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm3, inline_infos[1]); + ASSERT_EQ(1u, dex_registers1.size()); + ASSERT_EQ(2, dex_registers1[0].GetMachineRegister()); + + DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm3, inline_infos[2]); + ASSERT_EQ(2u, dex_registers2.size()); + ASSERT_FALSE(dex_registers2[0].IsLive()); + ASSERT_EQ(3, dex_registers2[1].GetMachineRegister()); } } @@ -726,15 +733,17 @@ TEST(StackMapTest, TestDeduplicateStackMask) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 0); ArenaBitVector sp_mask(&allocator, 0, true); sp_mask.SetBit(1); sp_mask.SetBit(4); - stream.BeginStackMapEntry(0, 4 * kPcAlign, 0x3, &sp_mask, 0, 0); + stream.BeginStackMapEntry(0, 4 * kPcAlign, 0x3, &sp_mask); stream.EndStackMapEntry(); - stream.BeginStackMapEntry(0, 8 * kPcAlign, 0x3, &sp_mask, 0, 0); + stream.BeginStackMapEntry(0, 8 * kPcAlign, 0x3, &sp_mask); stream.EndStackMapEntry(); + stream.EndMethod(); size_t size = stream.PrepareForFillIn(); void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); @@ -754,19 +763,21 @@ TEST(StackMapTest, TestInvokeInfo) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 0); ArenaBitVector sp_mask(&allocator, 0, true); sp_mask.SetBit(1); - stream.BeginStackMapEntry(0, 4 * kPcAlign, 0x3, &sp_mask, 0, 0); + stream.BeginStackMapEntry(0, 4 * kPcAlign, 0x3, &sp_mask); stream.AddInvoke(kSuper, 1); stream.EndStackMapEntry(); - stream.BeginStackMapEntry(0, 8 * kPcAlign, 0x3, &sp_mask, 0, 0); + stream.BeginStackMapEntry(0, 8 * kPcAlign, 0x3, &sp_mask); stream.AddInvoke(kStatic, 3); stream.EndStackMapEntry(); - stream.BeginStackMapEntry(0, 16 * kPcAlign, 0x3, &sp_mask, 0, 0); + stream.BeginStackMapEntry(0, 16 * kPcAlign, 0x3, &sp_mask); stream.AddInvoke(kDirect, 65535); stream.EndStackMapEntry(); + stream.EndMethod(); const size_t code_info_size = stream.PrepareForFillIn(); MemoryRegion code_info_region(allocator.Alloc(code_info_size, kArenaAllocMisc), code_info_size); stream.FillInCodeInfo(code_info_region); diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc index 1b43618538..878967cc6e 100644 --- a/compiler/optimizing/superblock_cloner.cc +++ b/compiler/optimizing/superblock_cloner.cc @@ -72,12 +72,12 @@ static bool ArePhiInputsTheSame(const HPhi* phi) { // Returns whether two Edge sets are equal (ArenaHashSet doesn't have "Equal" method). static bool EdgeHashSetsEqual(const HEdgeSet* set1, const HEdgeSet* set2) { - if (set1->Size() != set2->Size()) { + if (set1->size() != set2->size()) { return false; } for (auto e : *set1) { - if (set2->Find(e) == set2->end()) { + if (set2->find(e) == set2->end()) { return false; } } @@ -472,8 +472,8 @@ void SuperblockCloner::RemapEdgesSuccessors() { continue; } - auto orig_redir = remap_orig_internal_->Find(HEdge(orig_block_id, orig_succ_id)); - auto copy_redir = remap_copy_internal_->Find(HEdge(orig_block_id, orig_succ_id)); + auto orig_redir = remap_orig_internal_->find(HEdge(orig_block_id, orig_succ_id)); + auto copy_redir = remap_copy_internal_->find(HEdge(orig_block_id, orig_succ_id)); // Due to construction all successors of copied block were set to original. if (copy_redir != remap_copy_internal_->end()) { @@ -864,9 +864,9 @@ bool SuperblockCloner::IsFastCase() const { EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) && EdgeHashSetsEqual(&remap_incoming, remap_incoming_); - remap_orig_internal.Clear(); - remap_copy_internal.Clear(); - remap_incoming.Clear(); + remap_orig_internal.clear(); + remap_copy_internal.clear(); + remap_incoming.clear(); // Check whether remapping info corresponds to loop peeling. CollectRemappingInfoForPeelUnroll(/* to_unroll*/ false, @@ -1022,16 +1022,16 @@ void CollectRemappingInfoForPeelUnroll(bool to_unroll, for (HBasicBlock* back_edge_block : loop_info->GetBackEdges()) { HEdge e = HEdge(back_edge_block, loop_header); if (to_unroll) { - remap_orig_internal->Insert(e); - remap_copy_internal->Insert(e); + remap_orig_internal->insert(e); + remap_copy_internal->insert(e); } else { - remap_copy_internal->Insert(e); + remap_copy_internal->insert(e); } } // Set up remap_incoming edges set. if (!to_unroll) { - remap_incoming->Insert(HEdge(loop_info->GetPreHeader(), loop_header)); + remap_incoming->insert(HEdge(loop_info->GetPreHeader(), loop_header)); } } diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc index df2e517aff..31114b6dcc 100644 --- a/compiler/optimizing/superblock_cloner_test.cc +++ b/compiler/optimizing/superblock_cloner_test.cc @@ -30,38 +30,8 @@ using HEdgeSet = SuperblockCloner::HEdgeSet; // This class provides methods and helpers for testing various cloning and copying routines: // individual instruction cloning and cloning of the more coarse-grain structures. -class SuperblockClonerTest : public OptimizingUnitTest { +class SuperblockClonerTest : public ImprovedOptimizingUnitTest { public: - SuperblockClonerTest() : graph_(CreateGraph()), - entry_block_(nullptr), - return_block_(nullptr), - exit_block_(nullptr), - parameter_(nullptr) {} - - void InitGraph() { - entry_block_ = new (GetAllocator()) HBasicBlock(graph_); - graph_->AddBlock(entry_block_); - graph_->SetEntryBlock(entry_block_); - - return_block_ = new (GetAllocator()) HBasicBlock(graph_); - graph_->AddBlock(return_block_); - - exit_block_ = new (GetAllocator()) HBasicBlock(graph_); - graph_->AddBlock(exit_block_); - graph_->SetExitBlock(exit_block_); - - entry_block_->AddSuccessor(return_block_); - return_block_->AddSuccessor(exit_block_); - - parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), - dex::TypeIndex(0), - 0, - DataType::Type::kInt32); - entry_block_->AddInstruction(parameter_); - return_block_->AddInstruction(new (GetAllocator()) HReturnVoid()); - exit_block_->AddInstruction(new (GetAllocator()) HExit()); - } - void CreateBasicLoopControlFlow(HBasicBlock* position, HBasicBlock* successor, /* out */ HBasicBlock** header_p, @@ -137,40 +107,6 @@ class SuperblockClonerTest : public OptimizingUnitTest { null_check->CopyEnvironmentFrom(env); bounds_check->CopyEnvironmentFrom(env); } - - HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction, - ArenaVector<HInstruction*>* current_locals) { - HEnvironment* environment = new (GetAllocator()) HEnvironment( - (GetAllocator()), - current_locals->size(), - graph_->GetArtMethod(), - instruction->GetDexPc(), - instruction); - - environment->CopyFrom(ArrayRef<HInstruction* const>(*current_locals)); - instruction->SetRawEnvironment(environment); - return environment; - } - - bool CheckGraph() { - GraphChecker checker(graph_); - checker.Run(); - if (!checker.IsValid()) { - for (const std::string& error : checker.GetErrors()) { - std::cout << error << std::endl; - } - return false; - } - return true; - } - - HGraph* graph_; - - HBasicBlock* entry_block_; - HBasicBlock* return_block_; - HBasicBlock* exit_block_; - - HInstruction* parameter_; }; TEST_F(SuperblockClonerTest, IndividualInstrCloner) { @@ -708,8 +644,8 @@ TEST_F(SuperblockClonerTest, FastCaseCheck) { orig_bb_set.SetBit(preheader->GetBlockId()); // Adjust incoming edges. - remap_incoming.Clear(); - remap_incoming.Insert(HEdge(preheader->GetSinglePredecessor(), preheader)); + remap_incoming.clear(); + remap_incoming.insert(HEdge(preheader->GetSinglePredecessor(), preheader)); HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner)); HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner)); diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc index 2c428fac7e..c6c764e3a9 100644 --- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc +++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc @@ -120,11 +120,10 @@ void ArmVIXLJNIMacroAssembler::BuildFrame(size_t frame_size, // Write out entry spills. int32_t offset = frame_size + kFramePointerSize; - for (size_t i = 0; i < entry_spills.size(); ++i) { - ArmManagedRegister reg = entry_spills.at(i).AsArm(); + for (const ManagedRegisterSpill& spill : entry_spills) { + ArmManagedRegister reg = spill.AsArm(); if (reg.IsNoRegister()) { // only increment stack offset. - ManagedRegisterSpill spill = entry_spills.at(i); offset += spill.getSize(); } else if (reg.IsCoreRegister()) { asm_.StoreToOffset(kStoreWord, AsVIXLRegister(reg), sp, offset); diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc index a5aa1c12b3..d6ce03387c 100644 --- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc @@ -719,11 +719,10 @@ void Arm64JNIMacroAssembler::BuildFrame(size_t frame_size, // Write out entry spills int32_t offset = frame_size + static_cast<size_t>(kArm64PointerSize); - for (size_t i = 0; i < entry_spills.size(); ++i) { - Arm64ManagedRegister reg = entry_spills.at(i).AsArm64(); + for (const ManagedRegisterSpill& spill : entry_spills) { + Arm64ManagedRegister reg = spill.AsArm64(); if (reg.IsNoRegister()) { // only increment stack offset. - ManagedRegisterSpill spill = entry_spills.at(i); offset += spill.getSize(); } else if (reg.IsXRegister()) { StoreToOffset(reg.AsXRegister(), SP, offset); diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 19c405e517..e76e98a2a3 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -153,7 +153,7 @@ const char* const VixlJniHelpersResults[] = { " 21c: f8d9 8034 ldr.w r8, [r9, #52] ; 0x34\n", " 220: 4770 bx lr\n", " 222: 4660 mov r0, ip\n", - " 224: f8d9 c2cc ldr.w ip, [r9, #716] ; 0x2cc\n", + " 224: f8d9 c2d0 ldr.w ip, [r9, #720] ; 0x2d0\n", " 228: 47e0 blx ip\n", nullptr }; diff --git a/compiler/utils/dedupe_set-inl.h b/compiler/utils/dedupe_set-inl.h index c866504e62..4e892f2616 100644 --- a/compiler/utils/dedupe_set-inl.h +++ b/compiler/utils/dedupe_set-inl.h @@ -71,13 +71,13 @@ class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard { const StoreKey* Add(Thread* self, size_t hash, const InKey& in_key) REQUIRES(!lock_) { MutexLock lock(self, lock_); HashedKey<InKey> hashed_in_key(hash, &in_key); - auto it = keys_.Find(hashed_in_key); + auto it = keys_.find(hashed_in_key); if (it != keys_.end()) { DCHECK(it->Key() != nullptr); return it->Key(); } const StoreKey* store_key = alloc_.Copy(in_key); - keys_.Insert(HashedKey<StoreKey> { hash, store_key }); + keys_.insert(HashedKey<StoreKey> { hash, store_key }); return store_key; } @@ -90,7 +90,7 @@ class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard { // Note: The total_probe_distance will be updated with the current state. // It may have been higher before a re-hash. global_stats->total_probe_distance += keys_.TotalProbeDistance(); - global_stats->total_size += keys_.Size(); + global_stats->total_size += keys_.size(); for (const HashedKey<StoreKey>& key : keys_) { auto it = stats.find(key.Hash()); if (it == stats.end()) { diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h index 2b7b2aa7ce..db9c36cc75 100644 --- a/compiler/utils/managed_register.h +++ b/compiler/utils/managed_register.h @@ -101,11 +101,11 @@ class ManagedRegisterSpill : public ManagedRegister { ManagedRegisterSpill(const ManagedRegister& other, int32_t size) : ManagedRegister(other), size_(size), spill_offset_(-1) { } - int32_t getSpillOffset() { + int32_t getSpillOffset() const { return spill_offset_; } - int32_t getSize() { + int32_t getSize() const { return size_; } diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index dce5b95fec..c0b6f988d4 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -4801,10 +4801,9 @@ void MipsAssembler::BuildFrame(size_t frame_size, // Write out entry spills. int32_t offset = frame_size + kFramePointerSize; - for (size_t i = 0; i < entry_spills.size(); ++i) { - MipsManagedRegister reg = entry_spills.at(i).AsMips(); + for (const ManagedRegisterSpill& spill : entry_spills) { + MipsManagedRegister reg = spill.AsMips(); if (reg.IsNoRegister()) { - ManagedRegisterSpill spill = entry_spills.at(i); offset += spill.getSize(); } else if (reg.IsCoreRegister()) { StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset); diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index bb1bb82fa5..5b1c5d9e01 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -3633,9 +3633,8 @@ void Mips64Assembler::BuildFrame(size_t frame_size, // Write out entry spills. int32_t offset = frame_size + kFramePointerSize; - for (size_t i = 0; i < entry_spills.size(); ++i) { - Mips64ManagedRegister reg = entry_spills[i].AsMips64(); - ManagedRegisterSpill spill = entry_spills.at(i); + for (const ManagedRegisterSpill& spill : entry_spills) { + Mips64ManagedRegister reg = spill.AsMips64(); int32_t size = spill.getSize(); if (reg.IsNoRegister()) { // only increment stack offset. diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 86f9010ea3..c2ce03b1f2 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -525,6 +525,58 @@ void X86Assembler::divss(XmmRegister dst, const Address& src) { EmitOperand(dst, src); } +void X86Assembler::vfmadd231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); + uint8_t byte_one = EmitVexByte1(false, false, false, 2); + uint8_t byte_two = EmitVexByte2(false, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + // Opcode field. + EmitUint8(0xB8); + EmitXmmRegisterOperand(acc, mul_right); +} + +void X86Assembler::vfmsub231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); + uint8_t byte_one = EmitVexByte1(false, false, false, 2); + uint8_t byte_two = EmitVexByte2(false, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + // Opcode field. + EmitUint8(0xBA); + EmitXmmRegisterOperand(acc, mul_right); +} + +void X86Assembler::vfmadd231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); + uint8_t byte_one = EmitVexByte1(false, false, false, 2); + uint8_t byte_two = EmitVexByte2(true, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + // Opcode field. + EmitUint8(0xB8); + EmitXmmRegisterOperand(acc, mul_right); +} + +void X86Assembler::vfmsub231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); + uint8_t byte_one = EmitVexByte1(false, false, false, 2); + uint8_t byte_two = EmitVexByte2(true, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + // Opcode field. + EmitUint8(0xBA); + EmitXmmRegisterOperand(acc, mul_right); +} + void X86Assembler::addps(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -2898,6 +2950,99 @@ void X86Assembler::EmitLabelLink(NearLabel* label) { } +uint8_t X86Assembler::EmitVexByteZero(bool is_two_byte) { + uint8_t vex_zero = 0xC0; + if (!is_two_byte) { + vex_zero |= 0xC4; + } else { + vex_zero |= 0xC5; + } + return vex_zero; +} + +uint8_t X86Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm ) { + // VEX Byte 1. + uint8_t vex_prefix = 0; + if (!r) { + vex_prefix |= 0x80; // VEX.R . + } + if (!x) { + vex_prefix |= 0x40; // VEX.X . + } + if (!b) { + vex_prefix |= 0x20; // VEX.B . + } + + // VEX.mmmmm. + switch (mmmmm) { + case 1: + // Implied 0F leading opcode byte. + vex_prefix |= 0x01; + break; + case 2: + // Implied leading 0F 38 opcode byte. + vex_prefix |= 0x02; + break; + case 3: + // Implied leading OF 3A opcode byte. + vex_prefix |= 0x03; + break; + default: + LOG(FATAL) << "unknown opcode bytes"; + } + return vex_prefix; +} + +uint8_t X86Assembler::EmitVexByte2(bool w, int l, X86ManagedRegister operand, int pp) { + uint8_t vex_prefix = 0; + // VEX Byte 2. + if (w) { + vex_prefix |= 0x80; + } + + // VEX.vvvv. + if (operand.IsXmmRegister()) { + XmmRegister vvvv = operand.AsXmmRegister(); + int inverted_reg = 15-static_cast<int>(vvvv); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } else if (operand.IsCpuRegister()) { + Register vvvv = operand.AsCpuRegister(); + int inverted_reg = 15 - static_cast<int>(vvvv); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } + + // VEX.L. + if (l == 256) { + vex_prefix |= 0x04; + } + + // VEX.pp. + switch (pp) { + case 0: + // SIMD Pefix - None. + vex_prefix |= 0x00; + break; + case 1: + // SIMD Prefix - 66. + vex_prefix |= 0x01; + break; + case 2: + // SIMD Prefix - F3. + vex_prefix |= 0x02; + break; + case 3: + // SIMD Prefix - F2. + vex_prefix |= 0x03; + break; + default: + LOG(FATAL) << "unknown SIMD Prefix"; + } + + return vex_prefix; +} + void X86Assembler::EmitGenericShift(int reg_or_opcode, const Operand& operand, const Immediate& imm) { diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index e42c4c986a..8c9ce82687 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -397,6 +397,12 @@ class X86Assembler FINAL : public Assembler { void divss(XmmRegister dst, XmmRegister src); void divss(XmmRegister dst, const Address& src); + // FMA Mac Instructions + void vfmadd231ps(XmmRegister dst, XmmRegister src1, XmmRegister src2); + void vfmadd231pd(XmmRegister dst, XmmRegister src1, XmmRegister src2); + void vfmsub231ps(XmmRegister dst, XmmRegister src1, XmmRegister src2); + void vfmsub231pd(XmmRegister dst, XmmRegister src1, XmmRegister src2); + void addps(XmmRegister dst, XmmRegister src); // no addr variant (for now) void subps(XmmRegister dst, XmmRegister src); void mulps(XmmRegister dst, XmmRegister src); @@ -834,6 +840,11 @@ class X86Assembler FINAL : public Assembler { void EmitLabelLink(Label* label); void EmitLabelLink(NearLabel* label); + // Emit a 3 byte VEX Prefix + uint8_t EmitVexByteZero(bool is_two_byte); + uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm); + uint8_t EmitVexByte2(bool w , int l , X86ManagedRegister vvv, int pp); + void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm); void EmitGenericShift(int rm, const Operand& operand, Register shifter); diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc index 7e29c4aa26..dd99f03aa7 100644 --- a/compiler/utils/x86/jni_macro_assembler_x86.cc +++ b/compiler/utils/x86/jni_macro_assembler_x86.cc @@ -67,8 +67,7 @@ void X86JNIMacroAssembler::BuildFrame(size_t frame_size, cfi().AdjustCFAOffset(kFramePointerSize); DCHECK_EQ(static_cast<size_t>(cfi().GetCurrentCFAOffset()), frame_size); - for (size_t i = 0; i < entry_spills.size(); ++i) { - ManagedRegisterSpill spill = entry_spills.at(i); + for (const ManagedRegisterSpill& spill : entry_spills) { if (spill.AsX86().IsCpuRegister()) { int offset = frame_size + spill.getSpillOffset(); __ movl(Address(ESP, offset), spill.AsX86().AsCpuRegister()); diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index bd31561937..9983eaeeea 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -603,6 +603,56 @@ void X86_64Assembler::divss(XmmRegister dst, const Address& src) { } +void X86_64Assembler::vfmadd231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); + uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2); + uint8_t byte_two = EmitVexByte2(false, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + // Opcode field. + EmitUint8(0xB8); + EmitXmmRegisterOperand(acc.LowBits(), mul_right); +} + + +void X86_64Assembler::vfmsub231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); + uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2); + uint8_t byte_two = EmitVexByte2(false, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + // Opcode field + EmitUint8(0xBA); + EmitXmmRegisterOperand(acc.LowBits(), mul_right); +} + +void X86_64Assembler::vfmadd231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); + uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2); + uint8_t byte_two = EmitVexByte2(true, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + EmitUint8(0xB8); + EmitXmmRegisterOperand(acc.LowBits(), mul_right); +} + +void X86_64Assembler::vfmsub231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); + uint8_t byte_one = EmitVexByte1(acc.NeedsRex(), false, mul_right.NeedsRex(), 2); + uint8_t byte_two = EmitVexByte2(true, 128, X86_64ManagedRegister::FromXmmRegister(mul_left.AsFloatRegister()), 1); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + EmitUint8(0xBA); + EmitXmmRegisterOperand(acc.LowBits(), mul_right); +} void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(dst, src); @@ -3544,6 +3594,98 @@ void X86_64Assembler::EmitLabelLink(NearLabel* label) { label->LinkTo(position); } +uint8_t X86_64Assembler::EmitVexByteZero(bool is_two_byte) { + uint8_t vex_zero = 0xC0; + if (!is_two_byte) { + vex_zero |= 0xC4; + } else { + vex_zero |= 0xC5; + } + return vex_zero; +} + +uint8_t X86_64Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm) { + // VEX Byte 1. + uint8_t vex_prefix = 0; + if (!r) { + vex_prefix |= 0x80; // VEX.R . + } + if (!x) { + vex_prefix |= 0x40; // VEX.X . + } + if (!b) { + vex_prefix |= 0x20; // VEX.B . + } + + // VEX.mmmmm. + switch (mmmmm) { + case 1: + // Implied 0F leading opcode byte. + vex_prefix |= 0x01; + break; + case 2: + // Implied leading 0F 38 opcode byte. + vex_prefix |= 0x02; + break; + case 3: + // Implied leading OF 3A opcode byte. + vex_prefix |= 0x03; + break; + default: + LOG(FATAL) << "unknown opcode bytes"; + } + + return vex_prefix; +} + +uint8_t X86_64Assembler::EmitVexByte2(bool w, int l, X86_64ManagedRegister operand, int pp) { + // VEX Byte 2. + uint8_t vex_prefix = 0; + if (w) { + vex_prefix |= 0x80; + } + // VEX.vvvv. + if (operand.IsXmmRegister()) { + XmmRegister vvvv = operand.AsXmmRegister(); + int inverted_reg = 15-static_cast<int>(vvvv.AsFloatRegister()); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } else if (operand.IsCpuRegister()) { + CpuRegister vvvv = operand.AsCpuRegister(); + int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister()); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } + + // VEX.L. + if (l == 256) { + vex_prefix |= 0x04; + } + + // VEX.pp. + switch (pp) { + case 0: + // SIMD Pefix - None. + vex_prefix |= 0x00; + break; + case 1: + // SIMD Prefix - 66. + vex_prefix |= 0x01; + break; + case 2: + // SIMD Prefix - F3. + vex_prefix |= 0x02; + break; + case 3: + // SIMD Prefix - F2. + vex_prefix |= 0x03; + break; + default: + LOG(FATAL) << "unknown SIMD Prefix"; + } + + return vex_prefix; +} void X86_64Assembler::EmitGenericShift(bool wide, int reg_or_opcode, diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index e4d72a7ba2..d5779aa786 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -436,6 +436,16 @@ class X86_64Assembler FINAL : public Assembler { void divss(XmmRegister dst, XmmRegister src); void divss(XmmRegister dst, const Address& src); + // Mac Instructions + // For reference look at the Instruction reference volume 2C. + // The below URL is broken down in two lines. + // https://www.intel.com/content/www/us/en/architecture-and-technology/ + // 64-ia-32-architectures-software-developer-vol-2c-manual.html + void vfmadd231ps(XmmRegister acc, XmmRegister left, XmmRegister right); + void vfmadd231pd(XmmRegister acc, XmmRegister left, XmmRegister right); + void vfmsub231ps(XmmRegister acc, XmmRegister left, XmmRegister right); + void vfmsub231pd(XmmRegister acc, XmmRegister left, XmmRegister right); + void addps(XmmRegister dst, XmmRegister src); // no addr variant (for now) void subps(XmmRegister dst, XmmRegister src); void mulps(XmmRegister dst, XmmRegister src); @@ -921,6 +931,11 @@ class X86_64Assembler FINAL : public Assembler { void EmitLabelLink(Label* label); void EmitLabelLink(NearLabel* label); + // Emit a 3 byte VEX Prefix. + uint8_t EmitVexByteZero(bool is_two_byte); + uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm); + uint8_t EmitVexByte2(bool w , int l , X86_64ManagedRegister operand, int pp); + void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm); void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter); diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc index 9486cb44c5..f6b2f9df34 100644 --- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc +++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc @@ -75,8 +75,7 @@ void X86_64JNIMacroAssembler::BuildFrame(size_t frame_size, __ movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister()); - for (size_t i = 0; i < entry_spills.size(); ++i) { - ManagedRegisterSpill spill = entry_spills.at(i); + for (const ManagedRegisterSpill& spill : entry_spills) { if (spill.AsX86_64().IsCpuRegister()) { if (spill.getSize() == 8) { __ movq(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc index 3fe2ec0ac0..c223549710 100644 --- a/compiler/verifier_deps_test.cc +++ b/compiler/verifier_deps_test.cc @@ -129,7 +129,7 @@ class VerifierDepsTest : public CommonCompilerTest { for (const DexFile* dex_file : dex_files_) { compiler_driver_->GetVerificationResults()->AddDexFile(dex_file); } - compiler_driver_->SetDexFilesForOatFile(dex_files_); + SetDexFilesForOatFile(dex_files_); } void LoadDexFile(ScopedObjectAccess& soa) REQUIRES_SHARED(Locks::mutator_lock_) { |