diff options
169 files changed, 10069 insertions, 1805 deletions
diff --git a/Android.mk b/Android.mk index fcf70ff2eb..0d0003abb0 100644 --- a/Android.mk +++ b/Android.mk @@ -122,6 +122,16 @@ include $(art_path)/build/Android.gtest.mk include $(art_path)/test/Android.run-test.mk include $(art_path)/benchmark/Android.mk +TEST_ART_ADB_ROOT_AND_REMOUNT := \ + (adb root && \ + adb wait-for-device remount && \ + ((adb shell touch /system/testfile && \ + (adb shell rm /system/testfile || true)) || \ + (adb disable-verity && \ + adb reboot && \ + adb wait-for-device root && \ + adb wait-for-device remount))) + # Sync test files to the target, depends upon all things that must be pushed to the target. .PHONY: test-art-target-sync # Check if we need to sync. In case ART_TEST_ANDROID_ROOT is not empty, @@ -130,12 +140,11 @@ include $(art_path)/benchmark/Android.mk ifneq ($(ART_TEST_NO_SYNC),true) ifeq ($(ART_TEST_ANDROID_ROOT),) test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS) - adb root - adb wait-for-device remount + $(TEST_ART_ADB_ROOT_AND_REMOUNT) adb sync else test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS) - adb root + $(TEST_ART_ADB_ROOT_AND_REMOUNT) adb wait-for-device push $(ANDROID_PRODUCT_OUT)/system $(ART_TEST_ANDROID_ROOT) adb push $(ANDROID_PRODUCT_OUT)/data /data endif @@ -374,8 +383,7 @@ oat-target: $(ART_TARGET_DEPENDENCIES) $(DEFAULT_DEX_PREOPT_INSTALLED_IMAGE) $(O .PHONY: oat-target-sync oat-target-sync: oat-target - adb root - adb wait-for-device remount + $(TEST_ART_ADB_ROOT_AND_REMOUNT) adb sync ######################################################################## diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk index cd9d18d578..a93d8a882a 100644 --- a/build/Android.common_build.mk +++ b/build/Android.common_build.mk @@ -118,7 +118,8 @@ endif ART_TARGET_CLANG_arm := false ART_TARGET_CLANG_arm64 := ART_TARGET_CLANG_mips := -ART_TARGET_CLANG_mips64 := +# b/25928358, illegal instruction on mips64r6 with -O0 +ART_TARGET_CLANG_mips64 := false ART_TARGET_CLANG_x86 := ART_TARGET_CLANG_x86_64 := diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 0afec2d5ef..dcde5abbca 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -188,6 +188,7 @@ RUNTIME_GTEST_COMMON_SRC_FILES := \ runtime/gc/accounting/card_table_test.cc \ runtime/gc/accounting/mod_union_table_test.cc \ runtime/gc/accounting/space_bitmap_test.cc \ + runtime/gc/collector/immune_spaces_test.cc \ runtime/gc/heap_test.cc \ runtime/gc/reference_queue_test.cc \ runtime/gc/space/dlmalloc_space_base_test.cc \ diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc index f34b5edcc4..529143d93d 100644 --- a/cmdline/cmdline_parser_test.cc +++ b/cmdline/cmdline_parser_test.cc @@ -457,8 +457,10 @@ TEST_F(CmdlineParserTest, TestJitOptions) { EXPECT_SINGLE_PARSE_VALUE(false, "-Xusejit:false", M::UseJIT); } { - EXPECT_SINGLE_PARSE_VALUE(MemoryKiB(16 * KB), "-Xjitcodecachesize:16K", M::JITCodeCacheCapacity); - EXPECT_SINGLE_PARSE_VALUE(MemoryKiB(16 * MB), "-Xjitcodecachesize:16M", M::JITCodeCacheCapacity); + EXPECT_SINGLE_PARSE_VALUE( + MemoryKiB(16 * KB), "-Xjitinitialsize:16K", M::JITCodeCacheInitialCapacity); + EXPECT_SINGLE_PARSE_VALUE( + MemoryKiB(16 * MB), "-Xjitmaxsize:16M", M::JITCodeCacheMaxCapacity); } { EXPECT_SINGLE_PARSE_VALUE(12345u, "-Xjitthreshold:12345", M::JITCompileThreshold); diff --git a/compiler/Android.mk b/compiler/Android.mk index 42ddfd83ab..564bd7e7bb 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -81,6 +81,7 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/load_store_elimination.cc \ optimizing/locations.cc \ optimizing/nodes.cc \ + optimizing/nodes_arm64.cc \ optimizing/optimization.cc \ optimizing/optimizing_compiler.cc \ optimizing/parallel_move_resolver.cc \ @@ -219,7 +220,8 @@ LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips := \ utils/mips/assembler_mips.h LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips64 := \ - $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips) + $(LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_mips) \ + utils/mips64/assembler_mips64.h LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_x86 := LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES_x86_64 := \ diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index a121f8b7a0..1b57b7d1d2 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -128,6 +128,8 @@ class CommonCompilerTest : public CommonRuntimeTest { #define TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS() \ if (kUseReadBarrier && GetCompilerKind() == Compiler::kOptimizing) { \ switch (GetInstructionSet()) { \ + case kArm64: \ + case kThumb2: \ case kX86: \ case kX86_64: \ /* Instruction set has read barrier support. */ \ diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index e42a73723b..d67087edd9 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -1114,25 +1114,23 @@ bool CompilerDriver::CanAssumeClassIsLoaded(mirror::Class* klass) { } bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) { - if (IsBootImage() && - IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) { - { - ScopedObjectAccess soa(Thread::Current()); - mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache( - soa.Self(), dex_file, false); - mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx); - if (resolved_class == nullptr) { - // Erroneous class. - stats_->TypeNotInDexCache(); - return false; - } - } + bool result = false; + if ((IsBootImage() && + IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) || + Runtime::Current()->UseJit()) { + ScopedObjectAccess soa(Thread::Current()); + mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache( + soa.Self(), dex_file, false); + mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx); + result = (resolved_class != nullptr); + } + + if (result) { stats_->TypeInDexCache(); - return true; } else { stats_->TypeNotInDexCache(); - return false; } + return result; } bool CompilerDriver::CanAssumeStringIsPresentInDexCache(const DexFile& dex_file, diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index dae785b688..d90d6100b9 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -482,6 +482,10 @@ class CompilerDriver { return &compiled_method_storage_; } + // Can we assume that the klass is loaded? + bool CanAssumeClassIsLoaded(mirror::Class* klass) + SHARED_REQUIRES(Locks::mutator_lock_); + private: // Return whether the declaring class of `resolved_member` is // available to `referrer_class` for read or write access using two @@ -516,10 +520,6 @@ class CompilerDriver { bool CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class, mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_); - // Can we assume that the klass is loaded? - bool CanAssumeClassIsLoaded(mirror::Class* klass) - SHARED_REQUIRES(Locks::mutator_lock_); - // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics. // The only external contract is that unresolved method has flags 0 and resolved non-0. enum { diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h index d9b367bdf1..a551e4b495 100644 --- a/compiler/dwarf/debug_info_entry_writer.h +++ b/compiler/dwarf/debug_info_entry_writer.h @@ -20,6 +20,7 @@ #include <cstdint> #include <unordered_map> +#include "base/casts.h" #include "dwarf/dwarf_constants.h" #include "dwarf/writer.h" #include "leb128.h" @@ -47,9 +48,9 @@ struct FNVHash { * It also handles generation of abbreviations. * * Usage: - * StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes); + * StartTag(DW_TAG_compile_unit); * WriteStrp(DW_AT_producer, "Compiler name", debug_str); - * StartTag(DW_TAG_subprogram, DW_CHILDREN_no); + * StartTag(DW_TAG_subprogram); * WriteStrp(DW_AT_name, "Foo", debug_str); * EndTag(); * EndTag(); @@ -59,36 +60,40 @@ class DebugInfoEntryWriter FINAL : private Writer<Vector> { static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); public: + static constexpr size_t kCompilationUnitHeaderSize = 11; + // Start debugging information entry. - void StartTag(Tag tag, Children children) { - DCHECK(has_children) << "This tag can not have nested tags"; + // Returns offset of the entry in compilation unit. + size_t StartTag(Tag tag) { if (inside_entry_) { // Write abbrev code for the previous entry. - this->UpdateUleb128(abbrev_code_offset_, EndAbbrev()); + // Parent entry is finalized before any children are written. + this->UpdateUleb128(abbrev_code_offset_, EndAbbrev(DW_CHILDREN_yes)); inside_entry_ = false; } - StartAbbrev(tag, children); + StartAbbrev(tag); // Abbrev code placeholder of sufficient size. abbrev_code_offset_ = this->data()->size(); this->PushUleb128(NextAbbrevCode()); depth_++; inside_entry_ = true; - has_children = (children == DW_CHILDREN_yes); + return abbrev_code_offset_ + kCompilationUnitHeaderSize; } // End debugging information entry. void EndTag() { DCHECK_GT(depth_, 0); if (inside_entry_) { - // Write abbrev code for this tag. - this->UpdateUleb128(abbrev_code_offset_, EndAbbrev()); + // Write abbrev code for this entry. + this->UpdateUleb128(abbrev_code_offset_, EndAbbrev(DW_CHILDREN_no)); inside_entry_ = false; - } - if (has_children) { - this->PushUint8(0); // End of children. + // This entry has no children and so there is no terminator. + } else { + // The entry has been already finalized so it must be parent entry + // and we need to write the terminator required by DW_CHILDREN_yes. + this->PushUint8(0); } depth_--; - has_children = true; // Parent tag obviously has children. } void WriteAddr(Attribute attrib, uint64_t value) { @@ -101,10 +106,16 @@ class DebugInfoEntryWriter FINAL : private Writer<Vector> { } } - void WriteBlock(Attribute attrib, const void* ptr, int size) { + void WriteBlock(Attribute attrib, const void* ptr, size_t num_bytes) { AddAbbrevAttribute(attrib, DW_FORM_block); - this->PushUleb128(size); - this->PushData(ptr, size); + this->PushUleb128(num_bytes); + this->PushData(ptr, num_bytes); + } + + void WriteExprLoc(Attribute attrib, const void* ptr, size_t num_bytes) { + AddAbbrevAttribute(attrib, DW_FORM_exprloc); + this->PushUleb128(dchecked_integral_cast<uint32_t>(num_bytes)); + this->PushData(ptr, num_bytes); } void WriteData1(Attribute attrib, uint8_t value) { @@ -127,6 +138,11 @@ class DebugInfoEntryWriter FINAL : private Writer<Vector> { this->PushUint64(value); } + void WriteSecOffset(Attribute attrib, uint32_t offset) { + AddAbbrevAttribute(attrib, DW_FORM_sec_offset); + this->PushUint32(offset); + } + void WriteSdata(Attribute attrib, int value) { AddAbbrevAttribute(attrib, DW_FORM_sdata); this->PushSleb128(value); @@ -147,12 +163,12 @@ class DebugInfoEntryWriter FINAL : private Writer<Vector> { this->PushUint8(value ? 1 : 0); } - void WriteRef4(Attribute attrib, int cu_offset) { + void WriteRef4(Attribute attrib, uint32_t cu_offset) { AddAbbrevAttribute(attrib, DW_FORM_ref4); this->PushUint32(cu_offset); } - void WriteRef(Attribute attrib, int cu_offset) { + void WriteRef(Attribute attrib, uint32_t cu_offset) { AddAbbrevAttribute(attrib, DW_FORM_ref_udata); this->PushUleb128(cu_offset); } @@ -162,16 +178,21 @@ class DebugInfoEntryWriter FINAL : private Writer<Vector> { this->PushString(value); } - void WriteStrp(Attribute attrib, int address) { + void WriteStrp(Attribute attrib, size_t debug_str_offset) { AddAbbrevAttribute(attrib, DW_FORM_strp); - this->PushUint32(address); + this->PushUint32(dchecked_integral_cast<uint32_t>(debug_str_offset)); } - void WriteStrp(Attribute attrib, const char* value, std::vector<uint8_t>* debug_str) { + void WriteStrp(Attribute attrib, const char* str, size_t len, + std::vector<uint8_t>* debug_str) { AddAbbrevAttribute(attrib, DW_FORM_strp); - int address = debug_str->size(); - debug_str->insert(debug_str->end(), value, value + strlen(value) + 1); - this->PushUint32(address); + this->PushUint32(debug_str->size()); + debug_str->insert(debug_str->end(), str, str + len); + debug_str->push_back(0); + } + + void WriteStrp(Attribute attrib, const char* str, std::vector<uint8_t>* debug_str) { + WriteStrp(attrib, str, strlen(str), debug_str); } bool Is64bit() const { return is64bit_; } @@ -180,7 +201,11 @@ class DebugInfoEntryWriter FINAL : private Writer<Vector> { return patch_locations_; } + int Depth() const { return depth_; } + using Writer<Vector>::data; + using Writer<Vector>::size; + using Writer<Vector>::UpdateUint32; DebugInfoEntryWriter(bool is64bitArch, Vector* debug_abbrev, @@ -196,16 +221,17 @@ class DebugInfoEntryWriter FINAL : private Writer<Vector> { } ~DebugInfoEntryWriter() { + DCHECK(!inside_entry_); DCHECK_EQ(depth_, 0); } private: // Start abbreviation declaration. - void StartAbbrev(Tag tag, Children children) { - DCHECK(!inside_entry_); + void StartAbbrev(Tag tag) { current_abbrev_.clear(); EncodeUnsignedLeb128(¤t_abbrev_, tag); - current_abbrev_.push_back(children); + has_children_offset_ = current_abbrev_.size(); + current_abbrev_.push_back(0); // Place-holder for DW_CHILDREN. } // Add attribute specification. @@ -220,8 +246,9 @@ class DebugInfoEntryWriter FINAL : private Writer<Vector> { } // End abbreviation declaration and return its code. - int EndAbbrev() { - DCHECK(inside_entry_); + int EndAbbrev(Children has_children) { + DCHECK(!current_abbrev_.empty()); + current_abbrev_[has_children_offset_] = has_children; auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_), NextAbbrevCode())); int abbrev_code = it.first->second; @@ -241,6 +268,7 @@ class DebugInfoEntryWriter FINAL : private Writer<Vector> { // Fields for writing and deduplication of abbrevs. Writer<Vector> debug_abbrev_; Vector current_abbrev_; + size_t has_children_offset_ = 0; std::unordered_map<Vector, int, FNVHash<Vector> > abbrev_codes_; @@ -250,7 +278,6 @@ class DebugInfoEntryWriter FINAL : private Writer<Vector> { int depth_ = 0; size_t abbrev_code_offset_ = 0; // Location to patch once we know the code. bool inside_entry_ = false; // Entry ends at first child (if any). - bool has_children = true; std::vector<uintptr_t> patch_locations_; }; diff --git a/compiler/dwarf/dedup_vector.h b/compiler/dwarf/dedup_vector.h new file mode 100644 index 0000000000..7fb21b76e2 --- /dev/null +++ b/compiler/dwarf/dedup_vector.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DWARF_DEDUP_VECTOR_H_ +#define ART_COMPILER_DWARF_DEDUP_VECTOR_H_ + +#include <vector> +#include <unordered_map> + +namespace art { +namespace dwarf { + class DedupVector { + public: + // Returns an offset to previously inserted identical block of data, + // or appends the data at the end of the vector and returns offset to it. + size_t Insert(const uint8_t* ptr, size_t num_bytes) { + // See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function + uint32_t hash = 2166136261u; + for (size_t i = 0; i < num_bytes; i++) { + hash = (hash ^ ptr[i]) * 16777619u; + } + // Try to find existing copy of the data. + const auto& range = hash_to_offset_.equal_range(hash); + for (auto it = range.first; it != range.second; ++it) { + const size_t offset = it->second; + if (offset + num_bytes <= vector_.size() && + memcmp(vector_.data() + offset, ptr, num_bytes) == 0) { + return offset; + } + } + // Append the data at the end of the vector. + const size_t new_offset = vector_.size(); + hash_to_offset_.emplace(hash, new_offset); + vector_.insert(vector_.end(), ptr, ptr + num_bytes); + return new_offset; + } + + const std::vector<uint8_t>& Data() const { return vector_; } + + private: + struct IdentityHash { + size_t operator()(uint32_t v) const { return v; } + }; + + // We store the full hash as the key to simplify growing of the table. + // It avoids storing or referencing the actual data in the hash-table. + std::unordered_multimap<uint32_t, size_t, IdentityHash> hash_to_offset_; + + std::vector<uint8_t> vector_; + }; +} // namespace dwarf +} // namespace art + +#endif // ART_COMPILER_DWARF_DEDUP_VECTOR_H_ diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc index 6bb22eda2f..e9cd421da9 100644 --- a/compiler/dwarf/dwarf_test.cc +++ b/compiler/dwarf/dwarf_test.cc @@ -285,7 +285,7 @@ TEST_F(DwarfTest, DebugInfo) { constexpr bool is64bit = false; DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_data_); DW_CHECK("Contents of the .debug_info section:"); - info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes); + info.StartTag(dwarf::DW_TAG_compile_unit); DW_CHECK("Abbrev Number: 1 (DW_TAG_compile_unit)"); info.WriteStrp(dwarf::DW_AT_producer, "Compiler name", &debug_str_data_); DW_CHECK_NEXT("DW_AT_producer : (indirect string, offset: 0x0): Compiler name"); @@ -293,7 +293,7 @@ TEST_F(DwarfTest, DebugInfo) { DW_CHECK_NEXT("DW_AT_low_pc : 0x1000000"); info.WriteAddr(dwarf::DW_AT_high_pc, 0x02000000); DW_CHECK_NEXT("DW_AT_high_pc : 0x2000000"); - info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no); + info.StartTag(dwarf::DW_TAG_subprogram); DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)"); info.WriteStrp(dwarf::DW_AT_name, "Foo", &debug_str_data_); DW_CHECK_NEXT("DW_AT_name : (indirect string, offset: 0xe): Foo"); @@ -302,7 +302,7 @@ TEST_F(DwarfTest, DebugInfo) { info.WriteAddr(dwarf::DW_AT_high_pc, 0x01020000); DW_CHECK_NEXT("DW_AT_high_pc : 0x1020000"); info.EndTag(); // DW_TAG_subprogram - info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no); + info.StartTag(dwarf::DW_TAG_subprogram); DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)"); info.WriteStrp(dwarf::DW_AT_name, "Bar", &debug_str_data_); DW_CHECK_NEXT("DW_AT_name : (indirect string, offset: 0x12): Bar"); @@ -313,7 +313,7 @@ TEST_F(DwarfTest, DebugInfo) { info.EndTag(); // DW_TAG_subprogram info.EndTag(); // DW_TAG_compile_unit // Test that previous list was properly terminated and empty children. - info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes); + info.StartTag(dwarf::DW_TAG_compile_unit); info.EndTag(); // DW_TAG_compile_unit // The abbrev table is just side product, but check it as well. @@ -327,7 +327,7 @@ TEST_F(DwarfTest, DebugInfo) { DW_CHECK_NEXT("DW_AT_name DW_FORM_strp"); DW_CHECK_NEXT("DW_AT_low_pc DW_FORM_addr"); DW_CHECK_NEXT("DW_AT_high_pc DW_FORM_addr"); - DW_CHECK("3 DW_TAG_compile_unit [has children]"); + DW_CHECK("3 DW_TAG_compile_unit [no children]"); std::vector<uintptr_t> debug_info_patches; std::vector<uintptr_t> expected_patches { 16, 20, 29, 33, 42, 46 }; // NOLINT diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h index 633e2f7d88..f76f76f7b6 100644 --- a/compiler/dwarf/headers.h +++ b/compiler/dwarf/headers.h @@ -134,10 +134,11 @@ void WriteDebugInfoCU(uint32_t debug_abbrev_offset, Writer<> writer(debug_info); size_t start = writer.data()->size(); writer.PushUint32(0); // Length placeholder. - writer.PushUint16(3); // Version. + writer.PushUint16(4); // Version. writer.PushUint32(debug_abbrev_offset); writer.PushUint8(entries.Is64bit() ? 8 : 4); size_t entries_offset = writer.data()->size(); + DCHECK_EQ(entries_offset, DebugInfoEntryWriter<Vector>::kCompilationUnitHeaderSize); writer.PushData(*entries.data()); writer.UpdateUint32(start, writer.data()->size() - start - 4); // Copy patch locations and make them relative to .debug_info section. @@ -166,10 +167,7 @@ void WriteDebugLineTable(const std::vector<std::string>& include_directories, Writer<> writer(debug_line); size_t header_start = writer.data()->size(); writer.PushUint32(0); // Section-length placeholder. - // Claim DWARF-2 version even though we use some DWARF-3 features. - // DWARF-2 consumers will ignore the unknown opcodes. - // This is what clang currently does. - writer.PushUint16(2); // .debug_line version. + writer.PushUint16(3); // .debug_line version. size_t header_length_pos = writer.data()->size(); writer.PushUint32(0); // Header-length placeholder. writer.PushUint8(1 << opcodes.GetCodeFactorBits()); diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h index 70452377dd..b67e8ddc9d 100644 --- a/compiler/dwarf/register.h +++ b/compiler/dwarf/register.h @@ -35,9 +35,10 @@ class Reg { // Arm64 mapping is correct since we already do this there. // libunwind might struggle with the new mapping as well. - static Reg ArmCore(int num) { return Reg(num); } + static Reg ArmCore(int num) { return Reg(num); } // R0-R15. static Reg ArmFp(int num) { return Reg(64 + num); } // S0–S31. - static Reg Arm64Core(int num) { return Reg(num); } + static Reg ArmDp(int num) { return Reg(256 + num); } // D0–D31. + static Reg Arm64Core(int num) { return Reg(num); } // X0-X31. static Reg Arm64Fp(int num) { return Reg(64 + num); } // V0-V31. static Reg MipsCore(int num) { return Reg(num); } static Reg Mips64Core(int num) { return Reg(num); } diff --git a/compiler/dwarf/writer.h b/compiler/dwarf/writer.h index 00b9dfa303..d2add7f026 100644 --- a/compiler/dwarf/writer.h +++ b/compiler/dwarf/writer.h @@ -114,9 +114,9 @@ class Writer { data_->insert(data_->end(), value, value + strlen(value) + 1); } - void PushData(const void* ptr, size_t size) { + void PushData(const void* ptr, size_t num_bytes) { const char* p = reinterpret_cast<const char*>(ptr); - data_->insert(data_->end(), p, p + size); + data_->insert(data_->end(), p, p + num_bytes); } template<typename Vector2> @@ -164,6 +164,10 @@ class Writer { return data_; } + size_t size() const { + return data_->size(); + } + explicit Writer(Vector* buffer) : data_(buffer) { } private: diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc index e1ab340b28..81f574f01d 100644 --- a/compiler/elf_writer_debug.cc +++ b/compiler/elf_writer_debug.cc @@ -19,18 +19,59 @@ #include <unordered_set> #include "base/casts.h" +#include "base/stl_util.h" #include "compiled_method.h" #include "driver/compiler_driver.h" #include "dex_file-inl.h" +#include "dwarf/dedup_vector.h" #include "dwarf/headers.h" #include "dwarf/register.h" #include "elf_builder.h" #include "oat_writer.h" #include "utils.h" +#include "stack_map.h" namespace art { namespace dwarf { +static Reg GetDwarfCoreReg(InstructionSet isa, int machine_reg) { + switch (isa) { + case kArm: + case kThumb2: + return Reg::ArmCore(machine_reg); + case kArm64: + return Reg::Arm64Core(machine_reg); + case kX86: + return Reg::X86Core(machine_reg); + case kX86_64: + return Reg::X86_64Core(machine_reg); + case kMips: + return Reg::MipsCore(machine_reg); + case kMips64: + return Reg::Mips64Core(machine_reg); + default: + LOG(FATAL) << "Unknown instruction set: " << isa; + UNREACHABLE(); + } +} + +static Reg GetDwarfFpReg(InstructionSet isa, int machine_reg) { + switch (isa) { + case kArm: + case kThumb2: + return Reg::ArmFp(machine_reg); + case kArm64: + return Reg::Arm64Fp(machine_reg); + case kX86: + return Reg::X86Fp(machine_reg); + case kX86_64: + return Reg::X86_64Fp(machine_reg); + default: + LOG(FATAL) << "Unknown instruction set: " << isa; + UNREACHABLE(); + } +} + static void WriteCIE(InstructionSet isa, CFIFormat format, std::vector<uint8_t>* buffer) { @@ -249,10 +290,441 @@ void WriteCFISection(ElfBuilder<ElfTypes>* builder, } } +struct CompilationUnit { + std::vector<const OatWriter::DebugInfo*> methods_; + size_t debug_line_offset_ = 0; + uint32_t low_pc_ = 0xFFFFFFFFU; + uint32_t high_pc_ = 0; +}; + +// Helper class to write .debug_info and its supporting sections. template<typename ElfTypes> class DebugInfoWriter { typedef typename ElfTypes::Addr Elf_Addr; + // Helper class to write one compilation unit. + // It holds helper methods and temporary state. + class CompilationUnitWriter { + public: + explicit CompilationUnitWriter(DebugInfoWriter* owner) + : owner_(owner), + info_(Is64BitInstructionSet(owner_->builder_->GetIsa()), &debug_abbrev_) { + } + + void Write(const CompilationUnit& compilation_unit) { + CHECK(!compilation_unit.methods_.empty()); + const Elf_Addr text_address = owner_->builder_->GetText()->GetAddress(); + + info_.StartTag(DW_TAG_compile_unit); + info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat")); + info_.WriteData1(DW_AT_language, DW_LANG_Java); + info_.WriteAddr(DW_AT_low_pc, text_address + compilation_unit.low_pc_); + info_.WriteUdata(DW_AT_high_pc, compilation_unit.high_pc_ - compilation_unit.low_pc_); + info_.WriteSecOffset(DW_AT_stmt_list, compilation_unit.debug_line_offset_); + + const char* last_dex_class_desc = nullptr; + for (auto mi : compilation_unit.methods_) { + const DexFile* dex = mi->dex_file_; + const DexFile::MethodId& dex_method = dex->GetMethodId(mi->dex_method_index_); + const DexFile::ProtoId& dex_proto = dex->GetMethodPrototype(dex_method); + const DexFile::TypeList* dex_params = dex->GetProtoParameters(dex_proto); + const char* dex_class_desc = dex->GetMethodDeclaringClassDescriptor(dex_method); + + // Enclose the method in correct class definition. + if (last_dex_class_desc != dex_class_desc) { + if (last_dex_class_desc != nullptr) { + EndClassTag(last_dex_class_desc); + } + size_t offset = StartClassTag(dex_class_desc); + type_cache_.emplace(dex_class_desc, offset); + // Check that each class is defined only once. + bool unique = owner_->defined_dex_classes_.insert(dex_class_desc).second; + CHECK(unique) << "Redefinition of " << dex_class_desc; + last_dex_class_desc = dex_class_desc; + } + + std::vector<const char*> param_names; + if (mi->code_item_ != nullptr) { + const uint8_t* stream = dex->GetDebugInfoStream(mi->code_item_); + if (stream != nullptr) { + DecodeUnsignedLeb128(&stream); // line. + uint32_t parameters_size = DecodeUnsignedLeb128(&stream); + for (uint32_t i = 0; i < parameters_size; ++i) { + uint32_t id = DecodeUnsignedLeb128P1(&stream); + param_names.push_back(mi->dex_file_->StringDataByIdx(id)); + } + } + } + + int start_depth = info_.Depth(); + info_.StartTag(DW_TAG_subprogram); + WriteName(dex->GetMethodName(dex_method)); + info_.WriteAddr(DW_AT_low_pc, text_address + mi->low_pc_); + info_.WriteUdata(DW_AT_high_pc, mi->high_pc_ - mi->low_pc_); + uint8_t frame_base[] = { DW_OP_call_frame_cfa }; + info_.WriteExprLoc(DW_AT_frame_base, &frame_base, sizeof(frame_base)); + WriteLazyType(dex->GetReturnTypeDescriptor(dex_proto)); + if (dex_params != nullptr) { + uint32_t vreg = mi->code_item_ == nullptr ? 0 : + mi->code_item_->registers_size_ - mi->code_item_->ins_size_; + if ((mi->access_flags_ & kAccStatic) == 0) { + info_.StartTag(DW_TAG_formal_parameter); + WriteName("this"); + info_.WriteFlag(DW_AT_artificial, true); + WriteLazyType(dex_class_desc); + const bool is64bitValue = false; + WriteRegLocation(mi, vreg, is64bitValue, compilation_unit.low_pc_); + vreg++; + info_.EndTag(); + } + for (uint32_t i = 0; i < dex_params->Size(); ++i) { + info_.StartTag(DW_TAG_formal_parameter); + // Parameter names may not be always available. + if (i < param_names.size() && param_names[i] != nullptr) { + WriteName(param_names[i]); + } + // Write the type. + const char* type_desc = dex->StringByTypeIdx(dex_params->GetTypeItem(i).type_idx_); + WriteLazyType(type_desc); + // Write the stack location of the parameter. + const bool is64bitValue = type_desc[0] == 'D' || type_desc[0] == 'J'; + WriteRegLocation(mi, vreg, is64bitValue, compilation_unit.low_pc_); + vreg += is64bitValue ? 2 : 1; + info_.EndTag(); + } + if (mi->code_item_ != nullptr) { + CHECK_EQ(vreg, mi->code_item_->registers_size_); + } + } + info_.EndTag(); + CHECK_EQ(info_.Depth(), start_depth); // Balanced start/end. + } + if (last_dex_class_desc != nullptr) { + EndClassTag(last_dex_class_desc); + } + CHECK_EQ(info_.Depth(), 1); + FinishLazyTypes(); + info_.EndTag(); // DW_TAG_compile_unit + std::vector<uint8_t> buffer; + buffer.reserve(info_.data()->size() + KB); + const size_t offset = owner_->builder_->GetDebugInfo()->GetSize(); + const size_t debug_abbrev_offset = + owner_->debug_abbrev_.Insert(debug_abbrev_.data(), debug_abbrev_.size()); + WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_); + owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size()); + } + + // Write table into .debug_loc which describes location of dex register. + // The dex register might be valid only at some points and it might + // move between machine registers and stack. + void WriteRegLocation(const OatWriter::DebugInfo* method_info, uint16_t vreg, + bool is64bitValue, uint32_t compilation_unit_low_pc) { + using Kind = DexRegisterLocation::Kind; + bool is_optimizing = method_info->compiled_method_->GetQuickCode().size() > 0 && + method_info->compiled_method_->GetVmapTable().size() > 0 && + method_info->compiled_method_->GetGcMap().size() == 0 && + method_info->code_item_ != nullptr; + if (!is_optimizing) { + return; + } + + Writer<> writer(&owner_->debug_loc_); + info_.WriteSecOffset(DW_AT_location, writer.size()); + + const InstructionSet isa = owner_->builder_->GetIsa(); + const bool is64bit = Is64BitInstructionSet(isa); + const CodeInfo code_info(method_info->compiled_method_->GetVmapTable().data()); + const StackMapEncoding encoding = code_info.ExtractEncoding(); + DexRegisterLocation last_reg_lo = DexRegisterLocation::None(); + DexRegisterLocation last_reg_hi = DexRegisterLocation::None(); + size_t offset_of_last_end_address = 0; + for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) { + StackMap stack_map = code_info.GetStackMapAt(s, encoding); + DCHECK(stack_map.IsValid()); + + // Find the location of the dex register. + DexRegisterLocation reg_lo = DexRegisterLocation::None(); + DexRegisterLocation reg_hi = DexRegisterLocation::None(); + if (stack_map.HasDexRegisterMap(encoding)) { + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf( + stack_map, encoding, method_info->code_item_->registers_size_); + reg_lo = dex_register_map.GetDexRegisterLocation( + vreg, method_info->code_item_->registers_size_, code_info, encoding); + if (is64bitValue) { + reg_hi = dex_register_map.GetDexRegisterLocation( + vreg + 1, method_info->code_item_->registers_size_, code_info, encoding); + } + } + if ((reg_lo == last_reg_lo && reg_hi == last_reg_hi) || + reg_lo.GetKind() == Kind::kNone) { + // Skip identical or undefined locations. + continue; + } + last_reg_lo = reg_lo; + last_reg_hi = reg_hi; + + // Translate dex register location to DWARF expression. + // Note that 64-bit value might be split to two distinct locations. + // (for example, two 32-bit machine registers, or even stack and register) + uint8_t buffer[64]; + uint8_t* pos = buffer; + for (int piece = 0; piece < (is64bitValue ? 2 : 1); piece++) { + DexRegisterLocation reg_loc = (piece == 0 ? reg_lo : reg_hi); + const Kind kind = reg_loc.GetKind(); + const int32_t value = reg_loc.GetValue(); + if (kind == Kind::kInStack) { + const size_t frame_size = method_info->compiled_method_->GetFrameSizeInBytes(); + *(pos++) = DW_OP_fbreg; + // The stack offset is relative to SP. Make it relative to CFA. + pos = EncodeSignedLeb128(pos, value - frame_size); + if (piece == 0 && reg_hi.GetKind() == Kind::kInStack && + reg_hi.GetValue() == value + 4) { + break; // the high word is correctly implied by the low word. + } + } else if (kind == Kind::kInRegister) { + pos = WriteOpReg(pos, GetDwarfCoreReg(isa, value).num()); + if (piece == 0 && reg_hi.GetKind() == Kind::kInRegisterHigh && + reg_hi.GetValue() == value) { + break; // the high word is correctly implied by the low word. + } + } else if (kind == Kind::kInFpuRegister) { + if ((isa == kArm || isa == kThumb2) && + piece == 0 && reg_hi.GetKind() == Kind::kInFpuRegister && + reg_hi.GetValue() == value + 1 && value % 2 == 0) { + // Translate S register pair to D register (e.g. S4+S5 to D2). + pos = WriteOpReg(pos, Reg::ArmDp(value / 2).num()); + break; + } + if (isa == kMips || isa == kMips64) { + // TODO: Find what the DWARF floating point register numbers are on MIPS. + break; + } + pos = WriteOpReg(pos, GetDwarfFpReg(isa, value).num()); + if (piece == 0 && reg_hi.GetKind() == Kind::kInFpuRegisterHigh && + reg_hi.GetValue() == reg_lo.GetValue()) { + break; // the high word is correctly implied by the low word. + } + } else if (kind == Kind::kConstant) { + *(pos++) = DW_OP_consts; + pos = EncodeSignedLeb128(pos, value); + *(pos++) = DW_OP_stack_value; + } else if (kind == Kind::kNone) { + break; + } else { + // kInStackLargeOffset and kConstantLargeValue are hidden by GetKind(). + // kInRegisterHigh and kInFpuRegisterHigh should be handled by + // the special cases above and they should not occur alone. + LOG(ERROR) << "Unexpected register location kind: " + << DexRegisterLocation::PrettyDescriptor(kind); + break; + } + if (is64bitValue) { + // Write the marker which is needed by split 64-bit values. + // This code is skipped by the special cases. + *(pos++) = DW_OP_piece; + pos = EncodeUnsignedLeb128(pos, 4); + } + } + + // Write end address for previous entry. + const uint32_t pc = method_info->low_pc_ + stack_map.GetNativePcOffset(encoding); + if (offset_of_last_end_address != 0) { + if (is64bit) { + writer.UpdateUint64(offset_of_last_end_address, pc - compilation_unit_low_pc); + } else { + writer.UpdateUint32(offset_of_last_end_address, pc - compilation_unit_low_pc); + } + } + offset_of_last_end_address = 0; + + DCHECK_LE(static_cast<size_t>(pos - buffer), sizeof(buffer)); + if (pos > buffer) { + // Write start/end address. + if (is64bit) { + writer.PushUint64(pc - compilation_unit_low_pc); + offset_of_last_end_address = writer.size(); + writer.PushUint64(method_info->high_pc_ - compilation_unit_low_pc); + } else { + writer.PushUint32(pc - compilation_unit_low_pc); + offset_of_last_end_address = writer.size(); + writer.PushUint32(method_info->high_pc_ - compilation_unit_low_pc); + } + // Write the expression. + writer.PushUint16(pos - buffer); + writer.PushData(buffer, pos - buffer); + } else { + // Otherwise leave the address range undefined. + } + } + // Write end-of-list entry. + if (is64bit) { + writer.PushUint64(0); + writer.PushUint64(0); + } else { + writer.PushUint32(0); + writer.PushUint32(0); + } + } + + // Some types are difficult to define as we go since they need + // to be enclosed in the right set of namespaces. Therefore we + // just define all types lazily at the end of compilation unit. + void WriteLazyType(const char* type_descriptor) { + DCHECK(type_descriptor != nullptr); + if (type_descriptor[0] != 'V') { + lazy_types_.emplace(type_descriptor, info_.size()); + info_.WriteRef4(DW_AT_type, 0); + } + } + + void FinishLazyTypes() { + for (const auto& lazy_type : lazy_types_) { + info_.UpdateUint32(lazy_type.second, WriteType(lazy_type.first)); + } + lazy_types_.clear(); + } + + private: + void WriteName(const char* name) { + info_.WriteStrp(DW_AT_name, owner_->WriteString(name)); + } + + // Helper which writes DWARF expression referencing a register. + static uint8_t* WriteOpReg(uint8_t* buffer, uint32_t dwarf_reg_num) { + if (dwarf_reg_num < 32) { + *(buffer++) = DW_OP_reg0 + dwarf_reg_num; + } else { + *(buffer++) = DW_OP_regx; + buffer = EncodeUnsignedLeb128(buffer, dwarf_reg_num); + } + return buffer; + } + + // Convert dex type descriptor to DWARF. + // Returns offset in the compilation unit. + size_t WriteType(const char* desc) { + const auto& it = type_cache_.find(desc); + if (it != type_cache_.end()) { + return it->second; + } + + size_t offset; + if (*desc == 'L') { + // Class type. For example: Lpackage/name; + offset = StartClassTag(desc); + info_.WriteFlag(DW_AT_declaration, true); + EndClassTag(desc); + } else if (*desc == '[') { + // Array type. + size_t element_type = WriteType(desc + 1); + offset = info_.StartTag(DW_TAG_array_type); + info_.WriteRef(DW_AT_type, element_type); + info_.EndTag(); + } else { + // Primitive types. + const char* name; + uint32_t encoding; + uint32_t byte_size; + switch (*desc) { + case 'B': + name = "byte"; + encoding = DW_ATE_signed; + byte_size = 1; + break; + case 'C': + name = "char"; + encoding = DW_ATE_UTF; + byte_size = 2; + break; + case 'D': + name = "double"; + encoding = DW_ATE_float; + byte_size = 8; + break; + case 'F': + name = "float"; + encoding = DW_ATE_float; + byte_size = 4; + break; + case 'I': + name = "int"; + encoding = DW_ATE_signed; + byte_size = 4; + break; + case 'J': + name = "long"; + encoding = DW_ATE_signed; + byte_size = 8; + break; + case 'S': + name = "short"; + encoding = DW_ATE_signed; + byte_size = 2; + break; + case 'Z': + name = "boolean"; + encoding = DW_ATE_boolean; + byte_size = 1; + break; + case 'V': + LOG(FATAL) << "Void type should not be encoded"; + UNREACHABLE(); + default: + LOG(FATAL) << "Unknown dex type descriptor: " << desc; + UNREACHABLE(); + } + offset = info_.StartTag(DW_TAG_base_type); + WriteName(name); + info_.WriteData1(DW_AT_encoding, encoding); + info_.WriteData1(DW_AT_byte_size, byte_size); + info_.EndTag(); + } + + type_cache_.emplace(desc, offset); + return offset; + } + + // Start DW_TAG_class_type tag nested in DW_TAG_namespace tags. + // Returns offset of the class tag in the compilation unit. + size_t StartClassTag(const char* desc) { + DCHECK(desc != nullptr && desc[0] == 'L'); + // Enclose the type in namespace tags. + const char* end; + for (desc = desc + 1; (end = strchr(desc, '/')) != nullptr; desc = end + 1) { + info_.StartTag(DW_TAG_namespace); + WriteName(std::string(desc, end - desc).c_str()); + } + // Start the class tag. + size_t offset = info_.StartTag(DW_TAG_class_type); + end = strchr(desc, ';'); + CHECK(end != nullptr); + WriteName(std::string(desc, end - desc).c_str()); + return offset; + } + + void EndClassTag(const char* desc) { + DCHECK(desc != nullptr && desc[0] == 'L'); + // End the class tag. + info_.EndTag(); + // Close namespace tags. + const char* end; + for (desc = desc + 1; (end = strchr(desc, '/')) != nullptr; desc = end + 1) { + info_.EndTag(); + } + } + + // For access to the ELF sections. + DebugInfoWriter<ElfTypes>* owner_; + // Debug abbrevs for this compilation unit only. + std::vector<uint8_t> debug_abbrev_; + // Temporary buffer to create and store the entries. + DebugInfoEntryWriter<> info_; + // Cache of already translated type descriptors. + std::map<const char*, size_t, CStringLess> type_cache_; // type_desc -> definition_offset. + // 32-bit references which need to be resolved to a type later. + std::multimap<const char*, size_t, CStringLess> lazy_types_; // type_desc -> patch_offset. + }; + public: explicit DebugInfoWriter(ElfBuilder<ElfTypes>* builder) : builder_(builder) { } @@ -261,54 +733,31 @@ class DebugInfoWriter { builder_->GetDebugInfo()->Start(); } - void Write(const std::vector<const OatWriter::DebugInfo*>& method_infos, - size_t debug_line_offset) { - const bool is64bit = Is64BitInstructionSet(builder_->GetIsa()); - const Elf_Addr text_address = builder_->GetText()->GetAddress(); - uint32_t cunit_low_pc = 0xFFFFFFFFU; - uint32_t cunit_high_pc = 0; - for (auto method_info : method_infos) { - cunit_low_pc = std::min(cunit_low_pc, method_info->low_pc_); - cunit_high_pc = std::max(cunit_high_pc, method_info->high_pc_); - } - - size_t debug_abbrev_offset = debug_abbrev_.size(); - DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_); - info.StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes); - info.WriteStrp(DW_AT_producer, "Android dex2oat", &debug_str_); - info.WriteData1(DW_AT_language, DW_LANG_Java); - info.WriteAddr(DW_AT_low_pc, text_address + cunit_low_pc); - info.WriteAddr(DW_AT_high_pc, text_address + cunit_high_pc); - info.WriteData4(DW_AT_stmt_list, debug_line_offset); - for (auto method_info : method_infos) { - std::string method_name = PrettyMethod(method_info->dex_method_index_, - *method_info->dex_file_, true); - info.StartTag(DW_TAG_subprogram, DW_CHILDREN_no); - info.WriteStrp(DW_AT_name, method_name.data(), &debug_str_); - info.WriteAddr(DW_AT_low_pc, text_address + method_info->low_pc_); - info.WriteAddr(DW_AT_high_pc, text_address + method_info->high_pc_); - info.EndTag(); // DW_TAG_subprogram - } - info.EndTag(); // DW_TAG_compile_unit - std::vector<uint8_t> buffer; - buffer.reserve(info.data()->size() + KB); - size_t offset = builder_->GetDebugInfo()->GetSize(); - WriteDebugInfoCU(debug_abbrev_offset, info, offset, &buffer, &debug_info_patches_); - builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size()); + void WriteCompilationUnit(const CompilationUnit& compilation_unit) { + CompilationUnitWriter writer(this); + writer.Write(compilation_unit); } void End() { builder_->GetDebugInfo()->End(); builder_->WritePatches(".debug_info.oat_patches", &debug_info_patches_); - builder_->WriteSection(".debug_abbrev", &debug_abbrev_); - builder_->WriteSection(".debug_str", &debug_str_); + builder_->WriteSection(".debug_abbrev", &debug_abbrev_.Data()); + builder_->WriteSection(".debug_str", &debug_str_.Data()); + builder_->WriteSection(".debug_loc", &debug_loc_); } private: + size_t WriteString(const char* str) { + return debug_str_.Insert(reinterpret_cast<const uint8_t*>(str), strlen(str) + 1); + } + ElfBuilder<ElfTypes>* builder_; std::vector<uintptr_t> debug_info_patches_; - std::vector<uint8_t> debug_abbrev_; - std::vector<uint8_t> debug_str_; + DedupVector debug_abbrev_; + DedupVector debug_str_; + std::vector<uint8_t> debug_loc_; + + std::unordered_set<const char*> defined_dex_classes_; // For CHECKs only. }; template<typename ElfTypes> @@ -325,15 +774,11 @@ class DebugLineWriter { // Write line table for given set of methods. // Returns the number of bytes written. - size_t Write(const std::vector<const OatWriter::DebugInfo*>& method_infos) { + size_t WriteCompilationUnit(CompilationUnit& compilation_unit) { const bool is64bit = Is64BitInstructionSet(builder_->GetIsa()); const Elf_Addr text_address = builder_->GetText()->GetAddress(); - uint32_t cunit_low_pc = 0xFFFFFFFFU; - uint32_t cunit_high_pc = 0; - for (auto method_info : method_infos) { - cunit_low_pc = std::min(cunit_low_pc, method_info->low_pc_); - cunit_high_pc = std::max(cunit_high_pc, method_info->high_pc_); - } + + compilation_unit.debug_line_offset_ = builder_->GetDebugLine()->GetSize(); std::vector<FileEntry> files; std::unordered_map<std::string, size_t> files_map; @@ -358,11 +803,17 @@ class DebugLineWriter { break; } DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits_); - opcodes.SetAddress(text_address + cunit_low_pc); + opcodes.SetAddress(text_address + compilation_unit.low_pc_); if (dwarf_isa != -1) { opcodes.SetISA(dwarf_isa); } - for (const OatWriter::DebugInfo* mi : method_infos) { + for (const OatWriter::DebugInfo* mi : compilation_unit.methods_) { + // Ignore function if we have already generated line table for the same address. + // It would confuse the debugger and the DWARF specification forbids it. + if (mi->deduped_) { + continue; + } + struct DebugInfoCallbacks { static bool NewPosition(void* ctx, uint32_t address, uint32_t line) { auto* context = reinterpret_cast<DebugInfoCallbacks*>(ctx); @@ -461,7 +912,7 @@ class DebugLineWriter { opcodes.AddRow(method_address, 0); } } - opcodes.AdvancePC(text_address + cunit_high_pc); + opcodes.AdvancePC(text_address + compilation_unit.high_pc_); opcodes.EndSequence(); std::vector<uint8_t> buffer; buffer.reserve(opcodes.data()->size() + KB); @@ -484,36 +935,28 @@ class DebugLineWriter { template<typename ElfTypes> void WriteDebugSections(ElfBuilder<ElfTypes>* builder, const std::vector<OatWriter::DebugInfo>& method_infos) { - struct CompilationUnit { - std::vector<const OatWriter::DebugInfo*> methods_; - size_t debug_line_offset_ = 0; - }; - // Group the methods into compilation units based on source file. std::vector<CompilationUnit> compilation_units; const char* last_source_file = nullptr; for (const OatWriter::DebugInfo& mi : method_infos) { - // Attribute given instruction range only to single method. - // Otherwise the debugger might get really confused. - if (!mi.deduped_) { - auto& dex_class_def = mi.dex_file_->GetClassDef(mi.class_def_index_); - const char* source_file = mi.dex_file_->GetSourceFile(dex_class_def); - if (compilation_units.empty() || source_file != last_source_file) { - compilation_units.push_back(CompilationUnit()); - } - compilation_units.back().methods_.push_back(&mi); - last_source_file = source_file; + auto& dex_class_def = mi.dex_file_->GetClassDef(mi.class_def_index_); + const char* source_file = mi.dex_file_->GetSourceFile(dex_class_def); + if (compilation_units.empty() || source_file != last_source_file) { + compilation_units.push_back(CompilationUnit()); } + CompilationUnit& cu = compilation_units.back(); + cu.methods_.push_back(&mi); + cu.low_pc_ = std::min(cu.low_pc_, mi.low_pc_); + cu.high_pc_ = std::max(cu.high_pc_, mi.high_pc_); + last_source_file = source_file; } // Write .debug_line section. { DebugLineWriter<ElfTypes> line_writer(builder); line_writer.Start(); - size_t offset = 0; for (auto& compilation_unit : compilation_units) { - compilation_unit.debug_line_offset_ = offset; - offset += line_writer.Write(compilation_unit.methods_); + line_writer.WriteCompilationUnit(compilation_unit); } line_writer.End(); } @@ -523,7 +966,7 @@ void WriteDebugSections(ElfBuilder<ElfTypes>* builder, DebugInfoWriter<ElfTypes> info_writer(builder); info_writer.Start(); for (const auto& compilation_unit : compilation_units) { - info_writer.Write(compilation_unit.methods_, compilation_unit.debug_line_offset_); + info_writer.WriteCompilationUnit(compilation_unit); } info_writer.End(); } diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 3d9e7e7cda..341742e4dc 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -330,10 +330,20 @@ void ImageWriter::SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) { } void ImageWriter::PrepareDexCacheArraySlots() { + // Prepare dex cache array starts based on the ordering specified in the CompilerDriver. + uint32_t size = 0u; + for (const DexFile* dex_file : compiler_driver_.GetDexFilesForOatFile()) { + dex_cache_array_starts_.Put(dex_file, size); + DexCacheArraysLayout layout(target_ptr_size_, dex_file); + size += layout.Size(); + } + // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned() + // when AssignImageBinSlot() assigns their indexes out or order. + bin_slot_sizes_[kBinDexCacheArray] = size; + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); Thread* const self = Thread::Current(); ReaderMutexLock mu(self, *class_linker->DexLock()); - uint32_t size = 0u; for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) { mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root)); @@ -341,22 +351,18 @@ void ImageWriter::PrepareDexCacheArraySlots() { continue; } const DexFile* dex_file = dex_cache->GetDexFile(); - dex_cache_array_starts_.Put(dex_file, size); DexCacheArraysLayout layout(target_ptr_size_, dex_file); DCHECK(layout.Valid()); + uint32_t start = dex_cache_array_starts_.Get(dex_file); DCHECK_EQ(dex_file->NumTypeIds() != 0u, dex_cache->GetResolvedTypes() != nullptr); - AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(), size + layout.TypesOffset()); + AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(), start + layout.TypesOffset()); DCHECK_EQ(dex_file->NumMethodIds() != 0u, dex_cache->GetResolvedMethods() != nullptr); - AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(), size + layout.MethodsOffset()); + AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(), start + layout.MethodsOffset()); DCHECK_EQ(dex_file->NumFieldIds() != 0u, dex_cache->GetResolvedFields() != nullptr); - AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(), size + layout.FieldsOffset()); + AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(), start + layout.FieldsOffset()); DCHECK_EQ(dex_file->NumStringIds() != 0u, dex_cache->GetStrings() != nullptr); - AddDexCacheArrayRelocation(dex_cache->GetStrings(), size + layout.StringsOffset()); - size += layout.Size(); + AddDexCacheArrayRelocation(dex_cache->GetStrings(), start + layout.StringsOffset()); } - // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned() - // when AssignImageBinSlot() assigns their indexes out or order. - bin_slot_sizes_[kBinDexCacheArray] = size; } void ImageWriter::AddDexCacheArrayRelocation(void* array, size_t offset) { @@ -586,6 +592,17 @@ bool ImageWriter::IsBootClassLoaderNonImageClass(mirror::Class* klass) { } bool ImageWriter::ContainsBootClassLoaderNonImageClass(mirror::Class* klass) { + bool early_exit = false; + std::unordered_set<mirror::Class*> visited; + return ContainsBootClassLoaderNonImageClassInternal(klass, &early_exit, &visited); +} + +bool ImageWriter::ContainsBootClassLoaderNonImageClassInternal( + mirror::Class* klass, + bool* early_exit, + std::unordered_set<mirror::Class*>* visited) { + DCHECK(early_exit != nullptr); + DCHECK(visited != nullptr); if (klass == nullptr) { return false; } @@ -594,14 +611,22 @@ bool ImageWriter::ContainsBootClassLoaderNonImageClass(mirror::Class* klass) { // Already computed, return the found value. return found->second; } - // Place holder value to prevent infinite recursion. - prune_class_memo_.emplace(klass, false); + // Circular dependencies, return false but do not store the result in the memoization table. + if (visited->find(klass) != visited->end()) { + *early_exit = true; + return false; + } + visited->emplace(klass); bool result = IsBootClassLoaderNonImageClass(klass); + bool my_early_exit = false; // Only for ourselves, ignore caller. if (!result) { // Check interfaces since these wont be visited through VisitReferences.) mirror::IfTable* if_table = klass->GetIfTable(); for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) { - result = result || ContainsBootClassLoaderNonImageClass(if_table->GetInterface(i)); + result = result || ContainsBootClassLoaderNonImageClassInternal( + if_table->GetInterface(i), + &my_early_exit, + visited); } } // Check static fields and their classes. @@ -615,16 +640,38 @@ bool ImageWriter::ContainsBootClassLoaderNonImageClass(mirror::Class* klass) { mirror::Object* ref = klass->GetFieldObject<mirror::Object>(field_offset); if (ref != nullptr) { if (ref->IsClass()) { - result = result || ContainsBootClassLoaderNonImageClass(ref->AsClass()); + result = result || + ContainsBootClassLoaderNonImageClassInternal( + ref->AsClass(), + &my_early_exit, + visited); } - result = result || ContainsBootClassLoaderNonImageClass(ref->GetClass()); + result = result || + ContainsBootClassLoaderNonImageClassInternal( + ref->GetClass(), + &my_early_exit, + visited); } field_offset = MemberOffset(field_offset.Uint32Value() + sizeof(mirror::HeapReference<mirror::Object>)); } } - result = result || ContainsBootClassLoaderNonImageClass(klass->GetSuperClass()); - prune_class_memo_[klass] = result; + result = result || + ContainsBootClassLoaderNonImageClassInternal( + klass->GetSuperClass(), + &my_early_exit, + visited); + // Erase the element we stored earlier since we are exiting the function. + auto it = visited->find(klass); + DCHECK(it != visited->end()); + visited->erase(it); + // Only store result if it is true or none of the calls early exited due to circular + // dependencies. If visited is empty then we are the root caller, in this case the cycle was in + // a child call and we can remember the result. + if (result == true || !my_early_exit || visited->empty()) { + prune_class_memo_[klass] = result; + } + *early_exit |= my_early_exit; return result; } diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 22cb91a56d..889cd10dc4 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -343,6 +343,12 @@ class ImageWriter FINAL { bool ContainsBootClassLoaderNonImageClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_); + // early_exit is true if we had a cyclic dependency anywhere down the chain. + bool ContainsBootClassLoaderNonImageClassInternal(mirror::Class* klass, + bool* early_exit, + std::unordered_set<mirror::Class*>* visited) + SHARED_REQUIRES(Locks::mutator_lock_); + static Bin BinTypeForNativeRelocationType(NativeObjectRelocationType type); uintptr_t NativeOffsetInImage(void* obj); diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc index f985745e7a..f0cafc847f 100644 --- a/compiler/optimizing/boolean_simplifier.cc +++ b/compiler/optimizing/boolean_simplifier.cc @@ -61,40 +61,6 @@ static bool NegatesCondition(HInstruction* input_true, HInstruction* input_false && input_false->IsIntConstant() && input_false->AsIntConstant()->IsOne(); } -// Returns an instruction with the opposite boolean value from 'cond'. -static HInstruction* GetOppositeCondition(HInstruction* cond) { - HGraph* graph = cond->GetBlock()->GetGraph(); - ArenaAllocator* allocator = graph->GetArena(); - - if (cond->IsCondition()) { - HInstruction* lhs = cond->InputAt(0); - HInstruction* rhs = cond->InputAt(1); - switch (cond->AsCondition()->GetOppositeCondition()) { // get *opposite* - case kCondEQ: return new (allocator) HEqual(lhs, rhs); - case kCondNE: return new (allocator) HNotEqual(lhs, rhs); - case kCondLT: return new (allocator) HLessThan(lhs, rhs); - case kCondLE: return new (allocator) HLessThanOrEqual(lhs, rhs); - case kCondGT: return new (allocator) HGreaterThan(lhs, rhs); - case kCondGE: return new (allocator) HGreaterThanOrEqual(lhs, rhs); - case kCondB: return new (allocator) HBelow(lhs, rhs); - case kCondBE: return new (allocator) HBelowOrEqual(lhs, rhs); - case kCondA: return new (allocator) HAbove(lhs, rhs); - case kCondAE: return new (allocator) HAboveOrEqual(lhs, rhs); - } - } else if (cond->IsIntConstant()) { - HIntConstant* int_const = cond->AsIntConstant(); - if (int_const->IsZero()) { - return graph->GetIntConstant(1); - } else { - DCHECK(int_const->IsOne()); - return graph->GetIntConstant(0); - } - } - // General case when 'cond' is another instruction of type boolean, - // as verified by SSAChecker. - return new (allocator) HBooleanNot(cond); -} - void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) { DCHECK(block->EndsWithIf()); @@ -126,10 +92,7 @@ void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) { HInstruction* replacement; if (NegatesCondition(true_value, false_value)) { - replacement = GetOppositeCondition(if_condition); - if (replacement->GetBlock() == nullptr) { - block->InsertInstructionBefore(replacement, if_instruction); - } + replacement = graph_->InsertOppositeCondition(if_condition, if_instruction); } else if (PreservesCondition(true_value, false_value)) { replacement = if_condition; } else { diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 3257de1858..d7754e8ea9 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -876,12 +876,96 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, clinit_check); } +bool HGraphBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) { + bool finalizable; + bool can_throw = NeedsAccessCheck(type_index, &finalizable); + + // Only the non-resolved entrypoint handles the finalizable class case. If we + // need access checks, then we haven't resolved the method and the class may + // again be finalizable. + QuickEntrypointEnum entrypoint = (finalizable || can_throw) + ? kQuickAllocObject + : kQuickAllocObjectInitialized; + + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<3> hs(soa.Self()); + Handle<mirror::DexCache> dex_cache(hs.NewHandle( + dex_compilation_unit_->GetClassLinker()->FindDexCache( + soa.Self(), *dex_compilation_unit_->GetDexFile()))); + Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index))); + const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile(); + Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle( + outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file))); + + if (outer_dex_cache.Get() != dex_cache.Get()) { + // We currently do not support inlining allocations across dex files. + return false; + } + + HLoadClass* load_class = new (arena_) HLoadClass( + graph_->GetCurrentMethod(), + type_index, + outer_dex_file, + IsOutermostCompilingClass(type_index), + dex_pc, + /*needs_access_check*/ can_throw, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, type_index)); + + current_block_->AddInstruction(load_class); + HInstruction* cls = load_class; + if (!IsInitialized(resolved_class)) { + cls = new (arena_) HClinitCheck(load_class, dex_pc); + current_block_->AddInstruction(cls); + } + + current_block_->AddInstruction(new (arena_) HNewInstance( + cls, + graph_->GetCurrentMethod(), + dex_pc, + type_index, + *dex_compilation_unit_->GetDexFile(), + can_throw, + finalizable, + entrypoint)); + return true; +} + +static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class) + SHARED_REQUIRES(Locks::mutator_lock_) { + return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class); +} + +bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls) const { + if (cls.Get() == nullptr) { + return false; + } + + // `CanAssumeClassIsLoaded` will return true if we're JITting, or will + // check whether the class is in an image for the AOT compilation. + if (cls->IsInitialized() && + compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) { + return true; + } + + if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) { + return true; + } + + // TODO: We should walk over the inlined methods, but we don't pass + // that information to the builder. + if (IsSubClass(GetCompilingClass(), cls.Get())) { + return true; + } + + return false; +} + HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( uint32_t dex_pc, uint32_t method_idx, HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) { ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<4> hs(soa.Self()); + StackHandleScope<5> hs(soa.Self()); Handle<mirror::DexCache> dex_cache(hs.NewHandle( dex_compilation_unit_->GetClassLinker()->FindDexCache( soa.Self(), *dex_compilation_unit_->GetDexFile()))); @@ -896,6 +980,7 @@ HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle( outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file))); Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass())); + Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass())); // The index at which the method's class is stored in the DexCache's type array. uint32_t storage_index = DexFile::kDexNoIndex; @@ -913,41 +998,21 @@ HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke( HClinitCheck* clinit_check = nullptr; - if (!outer_class->IsInterface() - && outer_class->IsSubClass(resolved_method->GetDeclaringClass())) { - // If the outer class is the declaring class or a subclass - // of the declaring class, no class initialization is needed - // before the static method call. - // Note that in case of inlining, we do not need to add clinit checks - // to calls that satisfy this subclass check with any inlined methods. This - // will be detected by the optimization passes. + if (IsInitialized(resolved_method_class)) { *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; } else if (storage_index != DexFile::kDexNoIndex) { - // If the method's class type index is available, check - // whether we should add an explicit class initialization - // check for its declaring class before the static method call. - - // TODO: find out why this check is needed. - bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache( - *outer_compilation_unit_->GetDexFile(), storage_index); - bool is_initialized = - resolved_method->GetDeclaringClass()->IsInitialized() && is_in_dex_cache; - - if (is_initialized) { - *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; - } else { - *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; - HLoadClass* load_class = new (arena_) HLoadClass( - graph_->GetCurrentMethod(), - storage_index, - *dex_compilation_unit_->GetDexFile(), - is_outer_class, - dex_pc, - /*needs_access_check*/ false); - current_block_->AddInstruction(load_class); - clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); - current_block_->AddInstruction(clinit_check); - } + *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; + HLoadClass* load_class = new (arena_) HLoadClass( + graph_->GetCurrentMethod(), + storage_index, + outer_dex_file, + is_outer_class, + dex_pc, + /*needs_access_check*/ false, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index)); + current_block_->AddInstruction(load_class); + clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); + current_block_->AddInstruction(clinit_check); } return clinit_check; } @@ -1272,7 +1337,7 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, uint16_t field_index = instruction.VRegB_21c(); ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<4> hs(soa.Self()); + StackHandleScope<5> hs(soa.Self()); Handle<mirror::DexCache> dex_cache(hs.NewHandle( dex_compilation_unit_->GetClassLinker()->FindDexCache( soa.Self(), *dex_compilation_unit_->GetDexFile()))); @@ -1318,26 +1383,26 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, } } - // TODO: find out why this check is needed. - bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache( - *outer_compilation_unit_->GetDexFile(), storage_index); - bool is_initialized = resolved_field->GetDeclaringClass()->IsInitialized() && is_in_dex_cache; - + bool is_in_cache = + compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index); HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(), storage_index, - *dex_compilation_unit_->GetDexFile(), + outer_dex_file, is_outer_class, dex_pc, - /*needs_access_check*/ false); + /*needs_access_check*/ false, + is_in_cache); current_block_->AddInstruction(constant); HInstruction* cls = constant; - if (!is_initialized && !is_outer_class) { + + Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass())); + if (!IsInitialized(klass)) { cls = new (arena_) HClinitCheck(constant, dex_pc); current_block_->AddInstruction(cls); } - uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex(); + uint16_t class_def_index = klass->GetDexClassDefIndex(); if (is_put) { // We need to keep the class alive before loading the value. Temporaries temps(graph_); @@ -1601,19 +1666,20 @@ void HGraphBuilder::BuildTypeCheck(const Instruction& instruction, ScopedObjectAccess soa(Thread::Current()); StackHandleScope<2> hs(soa.Self()); + const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); Handle<mirror::DexCache> dex_cache(hs.NewHandle( - dex_compilation_unit_->GetClassLinker()->FindDexCache( - soa.Self(), *dex_compilation_unit_->GetDexFile()))); + dex_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), dex_file))); Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index))); HInstruction* object = LoadLocal(reference, Primitive::kPrimNot, dex_pc); HLoadClass* cls = new (arena_) HLoadClass( graph_->GetCurrentMethod(), type_index, - *dex_compilation_unit_->GetDexFile(), + dex_file, IsOutermostCompilingClass(type_index), dex_pc, - !can_access); + !can_access, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index)); current_block_->AddInstruction(cls); // The class needs a temporary before being used by the type check. @@ -2509,20 +2575,9 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 current_block_->AddInstruction(fake_string); UpdateLocal(register_index, fake_string, dex_pc); } else { - bool finalizable; - bool can_throw = NeedsAccessCheck(type_index, &finalizable); - QuickEntrypointEnum entrypoint = can_throw - ? kQuickAllocObjectWithAccessCheck - : kQuickAllocObject; - - current_block_->AddInstruction(new (arena_) HNewInstance( - graph_->GetCurrentMethod(), - dex_pc, - type_index, - *dex_compilation_unit_->GetDexFile(), - can_throw, - finalizable, - entrypoint)); + if (!BuildNewInstance(type_index, dex_pc)) { + return false; + } UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc); } break; @@ -2750,10 +2805,11 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 current_block_->AddInstruction(new (arena_) HLoadClass( graph_->GetCurrentMethod(), type_index, - *dex_compilation_unit_->GetDexFile(), + *dex_file_, IsOutermostCompilingClass(type_index), dex_pc, - !can_access)); + !can_access, + compiler_driver_->CanAssumeTypeIsPresentInDexCache(*dex_file_, type_index))); UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc); break; } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index f857ef0e12..5ada93f684 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -308,6 +308,13 @@ class HGraphBuilder : public ValueObject { uint32_t method_idx, HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement); + // Build a HNewInstance instruction. + bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc); + + // Return whether the compiler can assume `cls` is initialized. + bool IsInitialized(Handle<mirror::Class> cls) const + SHARED_REQUIRES(Locks::mutator_lock_); + ArenaAllocator* const arena_; // A list of the size of the dex code holding block information for diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 77d53fcd8f..0baa0e30dc 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -383,11 +383,11 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( HInvokeStaticOrDirect* call = invoke->AsInvokeStaticOrDirect(); switch (call->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - locations->SetInAt(call->GetCurrentMethodInputIndex(), visitor->GetMethodLocation()); + locations->SetInAt(call->GetSpecialInputIndex(), visitor->GetMethodLocation()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: locations->AddTemp(visitor->GetMethodLocation()); - locations->SetInAt(call->GetCurrentMethodInputIndex(), Location::RequiresRegister()); + locations->SetInAt(call->GetSpecialInputIndex(), Location::RequiresRegister()); break; default: locations->AddTemp(visitor->GetMethodLocation()); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 655bbb8a8e..a98d9c68b7 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -34,6 +34,9 @@ namespace art { +template<class MirrorType> +class GcRoot; + namespace arm { static bool ExpectedPairLayout(Location location) { @@ -74,6 +77,7 @@ class NullCheckSlowPathARM : public SlowPathCode { } arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -98,6 +102,7 @@ class DivZeroCheckSlowPathARM : public SlowPathCode { } arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -120,6 +125,7 @@ class SuspendCheckSlowPathARM : public SlowPathCode { SaveLiveRegisters(codegen, instruction_->GetLocations()); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ b(GetReturnLabel()); @@ -176,6 +182,7 @@ class BoundsCheckSlowPathARM : public SlowPathCode { Primitive::kPrimInt); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } bool IsFatal() const OVERRIDE { return true; } @@ -211,6 +218,11 @@ class LoadClassSlowPathARM : public SlowPathCode { ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType); arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } // Move the class to the desired location. Location out = locations->Out(); @@ -257,6 +269,7 @@ class LoadStringSlowPathARM : public SlowPathCode { __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex()); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); RestoreLiveRegisters(codegen, locations); @@ -286,15 +299,6 @@ class TypeCheckSlowPathARM : public SlowPathCode { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); - if (instruction_->IsCheckCast()) { - // The codegen for the instruction overwrites `temp`, so put it back in place. - Register obj = locations->InAt(0).AsRegister<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); - } - if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } @@ -315,6 +319,8 @@ class TypeCheckSlowPathARM : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); } else { DCHECK(instruction_->IsCheckCast()); @@ -322,6 +328,7 @@ class TypeCheckSlowPathARM : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>(); } if (!is_fatal_) { @@ -354,6 +361,7 @@ class DeoptimizationSlowPathARM : public SlowPathCode { uint32_t dex_pc = deoptimize->GetDexPc(); CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; } @@ -396,6 +404,7 @@ class ArraySetSlowPathARM : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ b(GetExitLabel()); } @@ -408,6 +417,221 @@ class ArraySetSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM); }; +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { + public: + ReadBarrierForHeapReferenceSlowPathARM(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : instruction_(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ LoadFromOffset(kLoadWord, out, out, offset); + // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(!instruction_->IsInvoke() || + (instruction_->IsInvokeStaticOrDirect() && + instruction_->GetLocations()->Intrinsified())); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + Register index_reg = index_.AsRegister<Register>(); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); + if (codegen->IsCoreCalleeSaveRegister(index_reg)) { + // We are about to change the value of `index_reg` (see the + // calls to art::arm::Thumb2Assembler::Lsl and + // art::arm::Thumb2Assembler::AddConstant below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen); + __ Mov(free_reg, index_reg); + index_reg = free_reg; + index = Location::RegisterLocation(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ Lsl(index_reg, index_reg, TIMES_4); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ AddConstant(index_reg, index_reg, offset_); + } else { + DCHECK(instruction_->IsInvoke()); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegisterPair()); + // UnsafeGet's offset location is a register pair, the low + // part contains the correct offset. + index = index_.ToLow(); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove(obj_, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + __ LoadImmediate(calling_convention.GetRegisterAt(2), offset_); + } + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + arm_codegen->Move32(out_, Location::RegisterLocation(R0)); + + RestoreLiveRegisters(codegen, locations); + __ b(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM"; } + + private: + Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(ref_.AsRegister<Register>()); + size_t obj = static_cast<int>(obj_.AsRegister<Register>()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { + return static_cast<Register>(i); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on ARM + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free caller-save register"; + UNREACHABLE(); + } + + HInstruction* const instruction_; + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathARM : public SlowPathCode { + public: + ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root) + : instruction_(instruction), out_(out), root_(root) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + arm_codegen->Move32(out_, Location::RegisterLocation(R0)); + + RestoreLiveRegisters(codegen, locations); + __ b(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM"; } + + private: + HInstruction* const instruction_; + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM); +}; + #undef __ #define __ down_cast<ArmAssembler*>(GetAssembler())-> @@ -581,7 +805,7 @@ Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const { LOG(FATAL) << "Unreachable type " << type; } - return Location(); + return Location::NoLocation(); } void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const { @@ -820,7 +1044,7 @@ Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type t LOG(FATAL) << "Unexpected parameter type " << type; break; } - return Location(); + return Location::NoLocation(); } Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) const { @@ -847,7 +1071,7 @@ Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type } case Primitive::kPrimVoid: - return Location(); + return Location::NoLocation(); } UNREACHABLE(); @@ -1762,29 +1986,39 @@ void LocationsBuilderARM::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); + LocationSummary* locations = invoke->GetLocations(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register hidden_reg = locations->GetTemp(1).AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value(); - LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // Set the hidden argument. - __ LoadImmediate(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(), - invoke->GetDexMethodIndex()); + // Set the hidden argument. This is safe to do this here, as R12 + // won't be modified thereafter, before the `blx` (call) instruction. + DCHECK_EQ(R12, hidden_reg); + __ LoadImmediate(hidden_reg, invoke->GetDexMethodIndex()); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex()); + // /* HeapReference<Class> */ temp = temp->klass_ __ LoadFromOffset(kLoadWord, temp, temp, class_offset); } else { + // /* HeapReference<Class> */ temp = receiver->klass_ __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetImtEntryAt(method_offset); - uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArmWordSize).Int32Value(); + uint32_t entry_point = + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value(); __ LoadFromOffset(kLoadWord, temp, temp, method_offset); // LR = temp->GetEntryPoint(); __ LoadFromOffset(kLoadWord, LR, temp, entry_point); @@ -2188,6 +2422,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); break; case Primitive::kPrimDouble: @@ -2196,6 +2431,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); break; default: @@ -2241,6 +2477,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickL2f, float, int64_t>(); break; case Primitive::kPrimDouble: @@ -2763,6 +3000,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(R0, out.AsRegister<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), div, div->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>(); } break; } @@ -2777,6 +3015,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); break; } @@ -2905,22 +3144,26 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) { DCHECK_EQ(R1, out.AsRegister<Register>()); codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>(); } break; } case Primitive::kPrimLong: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); break; } case Primitive::kPrimFloat: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); break; } case Primitive::kPrimDouble: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickFmod, double, double, double>(); break; } @@ -3139,7 +3382,19 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { __ mov(o_l, ShifterOperand(high)); __ LoadImmediate(o_h, 0); } - } else { // shift_value < 32 + } else if (shift_value == 1) { + if (op->IsShl()) { + __ Lsls(o_l, low, 1); + __ adc(o_h, high, ShifterOperand(high)); + } else if (op->IsShr()) { + __ Asrs(o_h, high, 1); + __ Rrx(o_l, low); + } else { + __ Lsrs(o_h, high, 1); + __ Rrx(o_l, low); + } + } else { + DCHECK(2 <= shift_value && shift_value < 32) << shift_value; if (op->IsShl()) { __ Lsl(o_h, high, shift_value); __ orr(o_h, o_h, ShifterOperand(low, LSR, 32 - shift_value)); @@ -3191,20 +3446,19 @@ void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(R0)); } void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) { @@ -3226,6 +3480,7 @@ void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); } void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) { @@ -3407,6 +3662,9 @@ void InstructionCodeGeneratorARM::GenerateWideAtomicLoad(Register addr, Register out_lo, Register out_hi) { if (offset != 0) { + // Ensure `out_lo` is different from `addr`, so that loading + // `offset` into `out_lo` does not clutter `addr`. + DCHECK_NE(out_lo, addr); __ LoadImmediate(out_lo, offset); __ add(IP, addr, ShifterOperand(out_lo)); addr = IP; @@ -3594,14 +3852,26 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (field_info.GetFieldType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_field_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); bool volatile_for_double = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimDouble) && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); - bool overlap = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong); + // The output overlaps in case of volatile long: we don't want the + // code generated by GenerateWideAtomicLoad to overwrite the + // object's location. Likewise, in the case of an object field get + // with read barriers enabled, we do not want the load to overwrite + // the object's location, as we need it to emit the read barrier. + bool overlap = (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) || + object_field_get_with_read_barrier; if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); @@ -3667,7 +3937,8 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = instruction->GetLocations(); - Register base = locations->InAt(0).AsRegister<Register>(); + Location base_loc = locations->InAt(0); + Register base = base_loc.AsRegister<Register>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); @@ -3747,7 +4018,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, } if (field_type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(out.AsRegister<Register>()); + codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); } } @@ -3891,20 +4162,31 @@ void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) { } void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Location index = locations->InAt(1); Primitive::Type type = instruction->GetType(); @@ -3967,8 +4249,9 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimInt: case Primitive::kPrimNot: { - static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes."); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes."); uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Register out = locations->Out().AsRegister<Register>(); if (index.IsConstant()) { @@ -4031,8 +4314,17 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { codegen_->MaybeRecordImplicitNullCheck(instruction); if (type == Primitive::kPrimNot) { - Register out = locations->Out().AsRegister<Register>(); - __ MaybeUnpoisonHeapReference(out); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + Location out = locations->Out(); + if (index.IsConstant()) { + uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); + } else { + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); + } } } @@ -4041,11 +4333,16 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - bool may_need_runtime_call = instruction->NeedsTypeCheck(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool object_array_set_with_read_barrier = + kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(value_type)) { @@ -4053,7 +4350,6 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { } else { locations->SetInAt(2, Location::RequiresRegister()); } - if (needs_write_barrier) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. @@ -4063,10 +4359,11 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register array = locations->InAt(0).AsRegister<Register>(); + Location array_loc = locations->InAt(0); + Register array = array_loc.AsRegister<Register>(); Location index = locations->InAt(1); Primitive::Type value_type = instruction->GetComponentType(); - bool may_need_runtime_call = locations->CanCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -4103,7 +4400,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimNot: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Register value = locations->InAt(2).AsRegister<Register>(); + Location value_loc = locations->InAt(2); + Register value = value_loc.AsRegister<Register>(); Register source = value; if (instruction->InputAt(2)->IsNullConstant()) { @@ -4117,6 +4415,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ StoreToOffset(kStoreWord, source, IP, data_offset); } + DCHECK(!needs_write_barrier); + DCHECK(!may_need_runtime_call_for_type_check); break; } @@ -4129,7 +4429,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { Label done; SlowPathCode* slow_path = nullptr; - if (may_need_runtime_call) { + if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { @@ -4149,23 +4449,63 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ Bind(&non_zero); } - __ LoadFromOffset(kLoadWord, temp1, array, class_offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); - __ MaybeUnpoisonHeapReference(temp1); - __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); - __ LoadFromOffset(kLoadWord, temp2, value, class_offset); - // No need to poison/unpoison, we're comparing two poisoined references. - __ cmp(temp1, ShifterOperand(temp2)); - if (instruction->StaticTypeOfArrayIsObjectArray()) { - Label do_put; - __ b(&do_put, EQ); - __ MaybeUnpoisonHeapReference(temp1); - __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); - // No need to poison/unpoison, we're comparing against null. - __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); - __ Bind(&do_put); + if (kEmitCompilerReadBarrier) { + // When read barriers are enabled, the type checking + // instrumentation requires two read barriers: + // + // __ Mov(temp2, temp1); + // // /* HeapReference<Class> */ temp1 = temp1->component_type_ + // __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); + // codegen_->GenerateReadBarrier( + // instruction, temp1_loc, temp1_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = value->klass_ + // __ LoadFromOffset(kLoadWord, temp2, value, class_offset); + // codegen_->GenerateReadBarrier( + // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc); + // + // __ cmp(temp1, ShifterOperand(temp2)); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled. + __ b(slow_path->GetEntryLabel()); } else { - __ b(slow_path->GetEntryLabel(), NE); + // /* HeapReference<Class> */ temp1 = array->klass_ + __ LoadFromOffset(kLoadWord, temp1, array, class_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ LoadFromOffset(kLoadWord, temp2, value, class_offset); + // If heap poisoning is enabled, no need to unpoison `temp1` + // nor `temp2`, as we are comparing two poisoned references. + __ cmp(temp1, ShifterOperand(temp2)); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + Label do_put; + __ b(&do_put, EQ); + // If heap poisoning is enabled, the `temp1` reference has + // not been unpoisoned yet; unpoison it now. + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + // If heap poisoning is enabled, no need to unpoison + // `temp1`, as we are comparing against null below. + __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ b(slow_path->GetEntryLabel(), NE); + } } } @@ -4189,7 +4529,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ StoreToOffset(kStoreWord, source, IP, data_offset); } - if (!may_need_runtime_call) { + if (!may_need_runtime_call_for_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -4618,7 +4958,8 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(R0)); + Location::RegisterLocation(R0), + /* code_generator_supports_read_barrier */ true); } void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { @@ -4629,33 +4970,59 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); + if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - __ LoadFromOffset( - kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ AddConstant(out, current_method, declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); + } } else { - DCHECK(cls->CanCallRuntime()); + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value()); - __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); - // TODO: We will need a read barrier here. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); + size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &out[type_index] + __ AddConstant(out, out, cache_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); } else { - __ Bind(slow_path->GetExitLabel()); + // /* GcRoot<mirror::Class> */ out = out[type_index] + __ LoadFromOffset(kLoadWord, out, out, cache_offset); + } + + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -4701,13 +5068,35 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { codegen_->AddSlowPath(slow_path); LocationSummary* locations = load->GetLocations(); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); - __ LoadFromOffset( - kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); + + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ AddConstant(out, current_method, declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); + } + + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); - // TODO: We will need a read barrier here. + + size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::String>* */ out = &out[string_index] + __ AddConstant(out, out, cache_offset); + // /* mirror::String* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::String> */ out = out[string_index] + __ LoadFromOffset(kLoadWord, out, out, cache_offset); + } + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -4746,45 +5135,50 @@ void LocationsBuilderARM::VisitThrow(HThrow* instruction) { void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) { codegen_->InvokeRuntime( QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // The out register is used as a temporary, so it overlaps with the inputs. - // Note that TypeCheckSlowPathARM uses this register too. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetOut(Location::RegisterLocation(R0)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // The "out" register is used as a temporary, so it overlaps with the inputs. + // Note that TypeCheckSlowPathARM uses this register too. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + // When read barriers are enabled, we need a temporary register for + // some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -4798,15 +5192,9 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ CompareAndBranchIfZero(obj, &zero); } - // In case of an interface/unresolved check, we put the object class into the object register. - // This is safe, as the register is caller-save, and the object must be in another - // register if it survives the runtime call. - Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || - (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) - ? obj - : out; - __ LoadFromOffset(kLoadWord, target, obj, class_offset); - __ MaybeUnpoisonHeapReference(target); + // /* HeapReference<Class> */ out = obj->klass_ + __ LoadFromOffset(kLoadWord, out, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); switch (instruction->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: { @@ -4817,13 +5205,23 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ b(&done); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. Label loop; __ Bind(&loop); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ LoadFromOffset(kLoadWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done); __ cmp(out, ShifterOperand(cls)); @@ -4834,14 +5232,24 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. Label loop, success; __ Bind(&loop); __ cmp(out, ShifterOperand(cls)); __ b(&success, EQ); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ LoadFromOffset(kLoadWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ CompareAndBranchIfNonZero(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ b(&done); @@ -4852,14 +5260,24 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. Label exact_check; __ cmp(out, ShifterOperand(cls)); __ b(&exact_check, EQ); - // Otherwise, we need to check that the object's class is a non primitive array. + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = temp_loc.AsRegister<Register>(); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->component_type_ __ LoadFromOffset(kLoadWord, out, out, component_offset); - __ MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -4870,11 +5288,12 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ b(&done); break; } + case TypeCheckKind::kArrayCheck: { __ cmp(out, ShifterOperand(cls)); DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM( - instruction, /* is_fatal */ false); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, + /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ b(slow_path->GetEntryLabel(), NE); __ LoadImmediate(out, 1); @@ -4883,13 +5302,25 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - default: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction, - instruction->GetDexPc(), - nullptr); + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved & interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, + /* is_fatal */ false); + codegen_->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel()); if (zero.IsLinked()) { __ b(&done); } @@ -4915,57 +5346,61 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // Note that TypeCheckSlowPathARM uses this register too. + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Note that TypeCheckSlowPathARM uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { locations->AddTemp(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } } void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); - Register temp = locations->WillCall() - ? Register(kNoRegister) - : locations->GetTemp(0).AsRegister<Register>(); - + Location temp_loc = locations->GetTemp(0); + Register temp = temp_loc.AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - SlowPathCode* slow_path = nullptr; - if (!locations->WillCall()) { - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM( - instruction, !locations->CanCall()); - codegen_->AddSlowPath(slow_path); - } + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCode* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); Label done; // Avoid null check if we know obj is not null. @@ -4973,76 +5408,159 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { __ CompareAndBranchIfZero(obj, &done); } - if (locations->WillCall()) { - __ LoadFromOffset(kLoadWord, obj, obj, class_offset); - __ MaybeUnpoisonHeapReference(obj); - } else { - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); - } + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { __ cmp(temp, ShifterOperand(cls)); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ b(slow_path->GetEntryLabel(), NE); + __ b(type_check_slow_path->GetEntryLabel(), NE); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - Label loop; + Label loop, compare_classes; __ Bind(&loop); + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); - // Jump to the slow path to throw the exception. - __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // to the `compare_classes` label to compare it with the checked + // class. + __ CompareAndBranchIfNonZero(temp, &compare_classes); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); + + __ Bind(&compare_classes); __ cmp(temp, ShifterOperand(cls)); __ b(&loop, NE); break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. Label loop; __ Bind(&loop); __ cmp(temp, ShifterOperand(cls)); __ b(&done, EQ); + + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // back at the beginning of the loop. __ CompareAndBranchIfNonZero(temp, &loop); - // Jump to the slow path to throw the exception. - __ b(slow_path->GetEntryLabel()); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. + Label check_non_primitive_component_type; __ cmp(temp, ShifterOperand(cls)); __ b(&done, EQ); - // Otherwise, we need to check that the object's class is a non primitive array. + + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = temp2_loc.AsRegister<Register>(); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->component_type_ __ LoadFromOffset(kLoadWord, temp, temp, component_offset); - __ MaybeUnpoisonHeapReference(temp); - __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier( + instruction, temp_loc, temp_loc, temp2_loc, component_offset); + + // If the component type is not null (i.e. the object is indeed + // an array), jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive + // type. + __ CompareAndBranchIfNonZero(temp, &check_non_primitive_component_type); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); + + __ Bind(&check_non_primitive_component_type); __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot"); + __ CompareAndBranchIfZero(temp, &done); + // Same comment as above regarding `temp` and the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + __ LoadFromOffset(kLoadWord, temp, obj, class_offset); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ b(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - default: - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction, - instruction->GetDexPc(), - nullptr); + // We always go into the type check slow path for the unresolved & + // interface check cases. + // + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ b(type_check_slow_path->GetEntryLabel()); break; } __ Bind(&done); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } + __ Bind(type_check_slow_path->GetExitLabel()); } void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) { @@ -5058,6 +5576,11 @@ void InstructionCodeGeneratorARM::VisitMonitorOperation(HMonitorOperation* instr instruction, instruction->GetDexPc(), nullptr); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction, AND); } @@ -5216,6 +5739,82 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr } } +void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCode* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + // TODO: When read barrier has a fast path, add it here. + /* Currently the read barrier call is inserted after the original load. + * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the + * original load. This load-load ordering is required by the read barrier. + * The fast path/slow path (for Baker's algorithm) should look like: + * + * bool isGray = obj.LockWord & kReadBarrierMask; + * lfence; // load fence or artificial data dependence to prevent load-load reordering + * ref = obj.field; // this is the original load + * if (isGray) { + * ref = Mark(ref); // ideally the slow path just does Mark(ref) + * } + */ + + __ b(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM::MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrier(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(out.AsRegister<Register>()); + } +} + +void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root); + AddSlowPath(slow_path); + + // TODO: Implement a fast path for ReadBarrierForRoot, performing + // the following operation (for Baker's algorithm): + // + // if (thread.tls32_.is_gc_marking) { + // root = Mark(root); + // } + + __ b(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method) { @@ -5273,7 +5872,7 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, invoke->GetStringInitOffset()); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress()); @@ -5288,7 +5887,7 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, LOG(FATAL) << "Unsupported"; UNREACHABLE(); case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register method_reg; Register reg = temp.AsRegister<Register>(); if (current_method.IsRegister()) { @@ -5299,10 +5898,11 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, method_reg = reg; __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset); } - // temp = current_method->dex_cache_resolved_methods_; - __ LoadFromOffset( - kLoadWord, reg, method_reg, ArtMethod::DexCacheResolvedMethodsOffset( - kArmPointerSize).Int32Value()); + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; + __ LoadFromOffset(kLoadWord, + reg, + method_reg, + ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value()); // temp = temp[index_in_cache] uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index; __ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache)); @@ -5346,10 +5946,17 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // temp = object->GetClass(); DCHECK(receiver.IsRegister()); + // /* HeapReference<Class> */ temp = receiver->klass_ __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetMethodAt(method_offset); uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 32bfe0f0be..89de4f801d 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -373,6 +373,51 @@ class CodeGeneratorARM : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + // Generate a read barrier for a heap reference within `instruction`. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap reference. + // If heap poisoning is enabled, also unpoison the reference in `out`. + void MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction`. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + private: using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index d1bddf673a..ac16268834 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -42,6 +42,9 @@ using namespace vixl; // NOLINT(build/namespaces) namespace art { +template<class MirrorType> +class GcRoot; + namespace arm64 { using helpers::CPURegisterFrom; @@ -431,15 +434,6 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { __ Bind(GetEntryLabel()); - if (instruction_->IsCheckCast()) { - // The codegen for the instruction overwrites `temp`, so put it back in place. - Register obj = InputRegisterAt(instruction_, 0); - Register temp = WRegisterFrom(locations->GetTemp(0)); - uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - __ Ldr(temp, HeapOperand(obj, class_offset)); - arm64_codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp); - } - if (!is_fatal_) { SaveLiveRegisters(codegen, locations); } @@ -454,11 +448,11 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { if (instruction_->IsInstanceOf()) { arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t, + const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t, - const mirror::Class*, const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this); @@ -494,6 +488,7 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { uint32_t dex_pc = deoptimize->GetDexPc(); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } @@ -571,6 +566,271 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { } } +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : instruction_(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ Ldr(out, HeapOperand(out, class_offset); + // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(!instruction_->IsInvoke() || + (instruction_->IsInvokeStaticOrDirect() && + instruction_->GetLocations()->Intrinsified())); + + __ Bind(GetEntryLabel()); + + // Note: In the case of a HArrayGet instruction, when the base + // address is a HArm64IntermediateAddress instruction, it does not + // point to the array object itself, but to an offset within this + // object. However, the read barrier entry point needs the array + // object address to be passed as first argument. So we + // temporarily set back `obj_` to that address, and restore its + // initial value later. + if (instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) { + if (kIsDebugBuild) { + HArm64IntermediateAddress* intermediate_address = + instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress(); + uint32_t intermediate_address_offset = + intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64(); + DCHECK_EQ(intermediate_address_offset, offset_); + DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_); + } + Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt); + __ Sub(obj_reg, obj_reg, offset_); + } + + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + Register index_reg = RegisterFrom(index_, Primitive::kPrimInt); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg())); + if (codegen->IsCoreCalleeSaveRegister(index_.reg())) { + // We are about to change the value of `index_reg` (see the + // calls to vixl::MacroAssembler::Lsl and + // vixl::MacroAssembler::Mov below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen); + __ Mov(free_reg.W(), index_reg); + index_reg = free_reg; + index = LocationFrom(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type)); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ Add(index_reg, index_reg, Operand(offset_)); + } else { + DCHECK(instruction_->IsInvoke()); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegisterPair()); + // UnsafeGet's offset location is a register pair, the low + // part contains the correct offset. + index = index_.ToLow(); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + LocationFrom(calling_convention.GetRegisterAt(0)), + type, + nullptr); + parallel_move.AddMove(obj_, + LocationFrom(calling_convention.GetRegisterAt(1)), + type, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + LocationFrom(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_); + } + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + + // Restore the value of `obj_` when it corresponds to a + // HArm64IntermediateAddress instruction. + if (instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) { + if (kIsDebugBuild) { + HArm64IntermediateAddress* intermediate_address = + instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress(); + uint32_t intermediate_address_offset = + intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64(); + DCHECK_EQ(intermediate_address_offset, offset_); + DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_); + } + Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt); + __ Add(obj_reg, obj_reg, offset_); + } + + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; } + + private: + Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(XRegisterFrom(ref_).code()); + size_t obj = static_cast<int>(XRegisterFrom(obj_).code()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { + return Register(VIXLRegCodeFromART(i), kXRegSize); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on ARM64 + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free register"; + UNREACHABLE(); + } + + HInstruction* const instruction_; + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) + : instruction_(instruction), out_(out), root_(root) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + // The argument of the ReadBarrierForRootSlow is not a managed + // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`; + // thus we need a 64-bit move here, and we cannot use + // + // arm64_codegen->MoveLocation( + // LocationFrom(calling_convention.GetRegisterAt(0)), + // root_, + // type); + // + // which would emit a 32-bit move, as `type` is a (32-bit wide) + // reference type (`Primitive::kPrimNot`). + __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_)); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; } + + private: + HInstruction* const instruction_; + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64); +}; + #undef __ Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) { @@ -1401,13 +1661,25 @@ void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) { } void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) { + DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); + + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_field_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps for an object field get when read barriers + // are enabled: we do not want the load to overwrite the object's + // location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } @@ -1436,7 +1708,11 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, } if (field_type == Primitive::kPrimNot) { - GetAssembler()->MaybeUnpoisonHeapReference(OutputCPURegister(instruction).W()); + LocationSummary* locations = instruction->GetLocations(); + Location base = locations->InAt(0); + Location out = locations->Out(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset); } } @@ -1613,6 +1889,82 @@ void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) { HandleBinaryOp(instruction); } +void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp( + HArm64DataProcWithShifterOp* instruction) { + DCHECK(instruction->GetType() == Primitive::kPrimInt || + instruction->GetType() == Primitive::kPrimLong); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + if (instruction->GetInstrKind() == HInstruction::kNeg) { + locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant())); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp( + HArm64DataProcWithShifterOp* instruction) { + Primitive::Type type = instruction->GetType(); + HInstruction::InstructionKind kind = instruction->GetInstrKind(); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + Register out = OutputRegister(instruction); + Register left; + if (kind != HInstruction::kNeg) { + left = InputRegisterAt(instruction, 0); + } + // If this `HArm64DataProcWithShifterOp` was created by merging a type conversion as the + // shifter operand operation, the IR generating `right_reg` (input to the type + // conversion) can have a different type from the current instruction's type, + // so we manually indicate the type. + Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type); + int64_t shift_amount = (type == Primitive::kPrimInt) + ? static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxIntShiftValue) + : static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxLongShiftValue); + + Operand right_operand(0); + + HArm64DataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); + if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind)) { + right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind)); + } else { + right_operand = Operand(right_reg, helpers::ShiftFromOpKind(op_kind), shift_amount); + } + + // Logical binary operations do not support extension operations in the + // operand. Note that VIXL would still manage if it was passed by generating + // the extension as a separate instruction. + // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`. + DCHECK(!right_operand.IsExtendedRegister() || + (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor && + kind != HInstruction::kNeg)); + switch (kind) { + case HInstruction::kAdd: + __ Add(out, left, right_operand); + break; + case HInstruction::kAnd: + __ And(out, left, right_operand); + break; + case HInstruction::kNeg: + DCHECK(instruction->InputAt(0)->AsConstant()->IsZero()); + __ Neg(out, right_operand); + break; + case HInstruction::kOr: + __ Orr(out, left, right_operand); + break; + case HInstruction::kSub: + __ Sub(out, left, right_operand); + break; + case HInstruction::kXor: + __ Eor(out, left, right_operand); + break; + default: + LOG(FATAL) << "Unexpected operation kind: " << kind; + UNREACHABLE(); + } +} + void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -1628,23 +1980,75 @@ void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress( Operand(InputOperandAt(instruction, 1))); } +void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); + locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex, + Location::RequiresRegister()); + locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); + locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) { + Register res = OutputRegister(instr); + Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex); + Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex); + Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex); + + // Avoid emitting code that could trigger Cortex A53's erratum 835769. + // This fixup should be carried out for all multiply-accumulate instructions: + // madd, msub, smaddl, smsubl, umaddl and umsubl. + if (instr->GetType() == Primitive::kPrimLong && + codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) { + MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler(); + vixl::Instruction* prev = masm->GetCursorAddress<vixl::Instruction*>() - vixl::kInstructionSize; + if (prev->IsLoadOrStore()) { + // Make sure we emit only exactly one nop. + vixl::CodeBufferCheckScope scope(masm, + vixl::kInstructionSize, + vixl::CodeBufferCheckScope::kCheck, + vixl::CodeBufferCheckScope::kExactSize); + __ nop(); + } + } + + if (instr->GetOpKind() == HInstruction::kAdd) { + __ Madd(res, mul_left, mul_right, accumulator); + } else { + DCHECK(instr->GetOpKind() == HInstruction::kSub); + __ Msub(res, mul_left, mul_right, accumulator); + } +} + void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut( + Location::RequiresRegister(), + object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { Primitive::Type type = instruction->GetType(); Register obj = InputRegisterAt(instruction, 0); - Location index = instruction->GetLocations()->InAt(1); - size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); + LocationSummary* locations = instruction->GetLocations(); + Location index = locations->InAt(1); + uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(); MemOperand source = HeapOperand(obj); CPURegister dest = OutputCPURegister(instruction); @@ -1676,8 +2080,22 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { codegen_->Load(type, dest, source); codegen_->MaybeRecordImplicitNullCheck(instruction); - if (instruction->GetType() == Primitive::kPrimNot) { - GetAssembler()->MaybeUnpoisonHeapReference(dest.W()); + if (type == Primitive::kPrimNot) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + Location obj_loc = locations->InAt(0); + Location out = locations->Out(); + if (index.IsConstant()) { + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); + } else { + // Note: when `obj_loc` is a HArm64IntermediateAddress, it does + // not contain the base address of the array object, which is + // needed by the read barrier entry point. So the read barrier + // slow path will temporarily set back `obj_loc` to the right + // address (see ReadBarrierForHeapReferenceSlowPathARM64::EmitNativeCode). + codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index); + } } } @@ -1695,12 +2113,19 @@ void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) } void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { + Primitive::Type value_type = instruction->GetComponentType(); + + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + bool object_array_set_with_read_barrier = + kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - instruction->NeedsTypeCheck() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { + if (Primitive::IsFloatingPointType(value_type)) { locations->SetInAt(2, Location::RequiresFpuRegister()); } else { locations->SetInAt(2, Location::RequiresRegister()); @@ -1710,7 +2135,7 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { Primitive::Type value_type = instruction->GetComponentType(); LocationSummary* locations = instruction->GetLocations(); - bool may_need_runtime_call = locations->CanCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -1724,7 +2149,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { BlockPoolsScope block_pools(masm); if (!needs_write_barrier) { - DCHECK(!may_need_runtime_call); + DCHECK(!may_need_runtime_call_for_type_check); if (index.IsConstant()) { offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); destination = HeapOperand(array, offset); @@ -1774,7 +2199,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); - if (may_need_runtime_call) { + if (may_need_runtime_call_for_type_check) { slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { @@ -1789,26 +2214,66 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { __ Bind(&non_zero); } - Register temp2 = temps.AcquireSameSizeAs(array); - __ Ldr(temp, HeapOperand(array, class_offset)); - codegen_->MaybeRecordImplicitNullCheck(instruction); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - __ Ldr(temp, HeapOperand(temp, component_offset)); - __ Ldr(temp2, HeapOperand(Register(value), class_offset)); - // No need to poison/unpoison, we're comparing two poisoned references. - __ Cmp(temp, temp2); - if (instruction->StaticTypeOfArrayIsObjectArray()) { - vixl::Label do_put; - __ B(eq, &do_put); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - __ Ldr(temp, HeapOperand(temp, super_offset)); - // No need to unpoison, we're comparing against null. - __ Cbnz(temp, slow_path->GetEntryLabel()); - __ Bind(&do_put); + if (kEmitCompilerReadBarrier) { + // When read barriers are enabled, the type checking + // instrumentation requires two read barriers: + // + // __ Mov(temp2, temp); + // // /* HeapReference<Class> */ temp = temp->component_type_ + // __ Ldr(temp, HeapOperand(temp, component_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp_loc, temp_loc, temp2_loc, component_offset); + // + // // /* HeapReference<Class> */ temp2 = value->klass_ + // __ Ldr(temp2, HeapOperand(Register(value), class_offset)); + // codegen_->GenerateReadBarrier( + // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc); + // + // __ Cmp(temp, temp2); + // + // However, the second read barrier may trash `temp`, as it + // is a temporary register, and as such would not be saved + // along with live registers before calling the runtime (nor + // restored afterwards). So in this case, we bail out and + // delegate the work to the array set slow path. + // + // TODO: Extend the register allocator to support a new + // "(locally) live temp" location so as to avoid always + // going into the slow path when read barriers are enabled. + __ B(slow_path->GetEntryLabel()); } else { - __ B(ne, slow_path->GetEntryLabel()); + Register temp2 = temps.AcquireSameSizeAs(array); + // /* HeapReference<Class> */ temp = array->klass_ + __ Ldr(temp, HeapOperand(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + GetAssembler()->MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->component_type_ + __ Ldr(temp, HeapOperand(temp, component_offset)); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ Ldr(temp2, HeapOperand(Register(value), class_offset)); + // If heap poisoning is enabled, no need to unpoison `temp` + // nor `temp2`, as we are comparing two poisoned references. + __ Cmp(temp, temp2); + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + vixl::Label do_put; + __ B(eq, &do_put); + // If heap poisoning is enabled, the `temp` reference has + // not been unpoisoned yet; unpoison it now. + GetAssembler()->MaybeUnpoisonHeapReference(temp); + + // /* HeapReference<Class> */ temp = temp->super_class_ + __ Ldr(temp, HeapOperand(temp, super_offset)); + // If heap poisoning is enabled, no need to unpoison + // `temp`, as we are comparing against null below. + __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ B(ne, slow_path->GetEntryLabel()); + } + temps.Release(temp2); } - temps.Release(temp2); } if (kPoisonHeapReferences) { @@ -1824,7 +2289,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { } __ Str(source, destination); - if (!may_need_runtime_call) { + if (!may_need_runtime_call_for_type_check) { codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -2491,40 +2956,44 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // The out register is used as a temporary, so it overlaps with the inputs. - // Note that TypeCheckSlowPathARM64 uses this register too. - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // The "out" register is used as a temporary, so it overlaps with the inputs. + // Note that TypeCheckSlowPathARM64 uses this register too. + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + // When read barriers are enabled, we need a temporary register for + // some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); + Location out_loc = locations->Out(); Register out = OutputRegister(instruction); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); @@ -2540,15 +3009,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Cbz(obj, &zero); } - // In case of an interface/unresolved check, we put the object class into the object register. - // This is safe, as the register is caller-save, and the object must be in another - // register if it survives the runtime call. - Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) || - (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck) - ? obj - : out; - __ Ldr(target, HeapOperand(obj.W(), class_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(target); + // /* HeapReference<Class> */ out = obj->klass_ + __ Ldr(out, HeapOperand(obj.W(), class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); switch (instruction->GetTypeCheckKind()) { case TypeCheckKind::kExactCheck: { @@ -2559,13 +3022,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. vixl::Label loop, success; __ Bind(&loop); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = WRegisterFrom(temp_loc); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ Ldr(out, HeapOperand(out, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Cmp(out, cls); @@ -2576,14 +3049,24 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. vixl::Label loop, success; __ Bind(&loop); __ Cmp(out, cls); __ B(eq, &success); + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = WRegisterFrom(temp_loc); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->super_class_ __ Ldr(out, HeapOperand(out, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); __ Cbnz(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ B(&done); @@ -2594,14 +3077,24 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. vixl::Label exact_check; __ Cmp(out, cls); __ B(eq, &exact_check); - // Otherwise, we need to check that the object's class is a non primitive array. + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp = WRegisterFrom(temp_loc); + __ Mov(temp, out); + } + // /* HeapReference<Class> */ out = out->component_type_ __ Ldr(out, HeapOperand(out, component_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(out); + codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Ldrh(out, HeapOperand(out, primitive_offset)); @@ -2612,11 +3105,12 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ B(&done); break; } + case TypeCheckKind::kArrayCheck: { __ Cmp(out, cls); DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64( - instruction, /* is_fatal */ false); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, + /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); @@ -2625,13 +3119,25 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - default: { - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial), - instruction, - instruction->GetDexPc(), - nullptr); + case TypeCheckKind::kInterfaceCheck: { + // Note that we indeed only call on slow path, but we always go + // into the slow path for the unresolved and interface check + // cases. + // + // We cannot directly call the InstanceofNonTrivial runtime + // entry point without resorting to a type checking slow path + // here (i.e. by calling InvokeRuntime directly), as it would + // require to assign fixed registers for the inputs of this + // HInstanceOf instruction (following the runtime calling + // convention), which might be cluttered by the potential first + // read barrier emission at the beginning of this method. + DCHECK(locations->OnlyCallsOnSlowPath()); + slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, + /* is_fatal */ false); + codegen_->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); if (zero.IsLinked()) { __ B(&done); } @@ -2657,58 +3163,62 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - switch (instruction->GetTypeCheckKind()) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; + case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCall; - break; - case TypeCheckKind::kArrayCheck: call_kind = LocationSummary::kCallOnSlowPath; break; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, call_kind); - if (call_kind != LocationSummary::kCall) { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - // Note that TypeCheckSlowPathARM64 uses this register too. - locations->AddTemp(Location::RequiresRegister()); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + // Note that TypeCheckSlowPathARM64 uses this "temp" register too. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + // When read barriers are enabled, we need an additional temporary + // register for some cases. + if (kEmitCompilerReadBarrier && + (type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); + Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); Register cls = InputRegisterAt(instruction, 1); - Register temp; - if (!locations->WillCall()) { - temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0)); - } - + Location temp_loc = locations->GetTemp(0); + Register temp = WRegisterFrom(temp_loc); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - SlowPathCodeARM64* slow_path = nullptr; - if (!locations->WillCall()) { - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64( - instruction, !locations->CanCall()); - codegen_->AddSlowPath(slow_path); - } + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool is_type_check_slow_path_fatal = + (type_check_kind == TypeCheckKind::kExactCheck || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck) && + !instruction->CanThrowIntoCatchBlock(); + SlowPathCodeARM64* type_check_slow_path = + new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, + is_type_check_slow_path_fatal); + codegen_->AddSlowPath(type_check_slow_path); vixl::Label done; // Avoid null check if we know obj is not null. @@ -2716,76 +3226,159 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ Cbz(obj, &done); } - if (locations->WillCall()) { - __ Ldr(obj, HeapOperand(obj, class_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(obj); - } else { - __ Ldr(temp, HeapOperand(obj, class_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - } + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { __ Cmp(temp, cls); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ B(ne, slow_path->GetEntryLabel()); + __ B(ne, type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kAbstractClassCheck: { // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. - vixl::Label loop; + vixl::Label loop, compare_classes; __ Bind(&loop); + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = WRegisterFrom(temp2_loc); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ Ldr(temp, HeapOperand(temp, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - // Jump to the slow path to throw the exception. - __ Cbz(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // to the `compare_classes` label to compare it with the checked + // class. + __ Cbnz(temp, &compare_classes); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); + + __ Bind(&compare_classes); __ Cmp(temp, cls); __ B(ne, &loop); break; } + case TypeCheckKind::kClassHierarchyCheck: { // Walk over the class hierarchy to find a match. vixl::Label loop; __ Bind(&loop); __ Cmp(temp, cls); __ B(eq, &done); + + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = WRegisterFrom(temp2_loc); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->super_class_ __ Ldr(temp, HeapOperand(temp, super_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + + // If the class reference currently in `temp` is not null, jump + // back at the beginning of the loop. __ Cbnz(temp, &loop); - // Jump to the slow path to throw the exception. - __ B(slow_path->GetEntryLabel()); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kArrayObjectCheck: { // Do an exact check. + vixl::Label check_non_primitive_component_type; __ Cmp(temp, cls); __ B(eq, &done); - // Otherwise, we need to check that the object's class is a non primitive array. + + // Otherwise, we need to check that the object's class is a non-primitive array. + Location temp2_loc = + kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); + if (kEmitCompilerReadBarrier) { + // Save the value of `temp` into `temp2` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + Register temp2 = WRegisterFrom(temp2_loc); + __ Mov(temp2, temp); + } + // /* HeapReference<Class> */ temp = temp->component_type_ __ Ldr(temp, HeapOperand(temp, component_offset)); - GetAssembler()->MaybeUnpoisonHeapReference(temp); - __ Cbz(temp, slow_path->GetEntryLabel()); + codegen_->MaybeGenerateReadBarrier( + instruction, temp_loc, temp_loc, temp2_loc, component_offset); + + // If the component type is not null (i.e. the object is indeed + // an array), jump to label `check_non_primitive_component_type` + // to further check that this component type is not a primitive + // type. + __ Cbnz(temp, &check_non_primitive_component_type); + // Otherwise, jump to the slow path to throw the exception. + // + // But before, move back the object's class into `temp` before + // going into the slow path, as it has been overwritten in the + // meantime. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); + + __ Bind(&check_non_primitive_component_type); __ Ldrh(temp, HeapOperand(temp, primitive_offset)); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); - __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Cbz(temp, &done); + // Same comment as above regarding `temp` and the slow path. + // /* HeapReference<Class> */ temp = obj->klass_ + __ Ldr(temp, HeapOperand(obj, class_offset)); + codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + __ B(type_check_slow_path->GetEntryLabel()); break; } + case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: - default: - codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), - instruction, - instruction->GetDexPc(), - nullptr); + // We always go into the type check slow path for the unresolved + // and interface check cases. + // + // We cannot directly call the CheckCast runtime entry point + // without resorting to a type checking slow path here (i.e. by + // calling InvokeRuntime directly), as it would require to + // assign fixed registers for the inputs of this HInstanceOf + // instruction (following the runtime calling convention), which + // might be cluttered by the potential first read barrier + // emission at the beginning of this method. + __ B(type_check_slow_path->GetEntryLabel()); break; } __ Bind(&done); - if (slow_path != nullptr) { - __ Bind(slow_path->GetExitLabel()); - } + __ Bind(type_check_slow_path->GetExitLabel()); } void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) { @@ -2828,10 +3421,11 @@ void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0)); + LocationSummary* locations = invoke->GetLocations(); + Register temp = XRegisterFrom(locations->GetTemp(0)); uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value(); - Location receiver = invoke->GetLocations()->InAt(0); + Location receiver = locations->InAt(0); Offset class_offset = mirror::Object::ClassOffset(); Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); @@ -2843,14 +3437,22 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok scratch_scope.Exclude(ip1); __ Mov(ip1, invoke->GetDexMethodIndex()); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ Ldr(temp.W(), StackOperandFrom(receiver)); + // /* HeapReference<Class> */ temp = temp->klass_ __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset)); } else { + // /* HeapReference<Class> */ temp = receiver->klass_ __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); // temp = temp->GetImtEntryAt(method_offset); __ Ldr(temp, MemOperand(temp, method_offset)); @@ -2926,7 +3528,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok __ Ldr(XRegisterFrom(temp), MemOperand(tr, invoke->GetStringInitOffset())); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: // Load method address from literal pool. @@ -2960,7 +3562,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register reg = XRegisterFrom(temp); Register method_reg; if (current_method.IsRegister()) { @@ -2972,7 +3574,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset)); } - // temp = current_method->dex_cache_resolved_methods_; + // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; __ Ldr(reg.X(), MemOperand(method_reg.X(), ArtMethod::DexCacheResolvedMethodsOffset(kArm64WordSize).Int32Value())); @@ -3027,8 +3629,16 @@ void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location te BlockPoolsScope block_pools(GetVIXLAssembler()); DCHECK(receiver.IsRegister()); + // /* HeapReference<Class> */ temp = receiver->klass_ __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); // temp = temp->GetMethodAt(method_offset); __ Ldr(temp, MemOperand(temp, method_offset)); @@ -3141,7 +3751,8 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassLocationSummary( cls, LocationFrom(calling_convention.GetRegisterAt(0)), - LocationFrom(vixl::x0)); + LocationFrom(vixl::x0), + /* code_generator_supports_read_barrier */ true); } void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { @@ -3151,30 +3762,56 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } + Location out_loc = cls->GetLocations()->Out(); Register out = OutputRegister(cls); Register current_method = InputRegisterAt(cls, 0); if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ Add(out.X(), current_method.X(), declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ Ldr(out, MemOperand(current_method, declaring_class_offset)); + } } else { - DCHECK(cls->CanCallRuntime()); MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize); + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value())); - __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); - // TODO: We will need a read barrier here. - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ Cbz(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); + size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &out[type_index] + __ Add(out.X(), out.X(), cache_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); } else { - __ Bind(slow_path->GetExitLabel()); + // /* GcRoot<mirror::Class> */ out = out[type_index] + __ Ldr(out, MemOperand(out.X(), cache_offset)); + } + + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ Cbz(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -3220,12 +3857,35 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); codegen_->AddSlowPath(slow_path); + Location out_loc = load->GetLocations()->Out(); Register out = OutputRegister(load); Register current_method = InputRegisterAt(load, 0); - __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value())); - __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset())); - __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex()))); - // TODO: We will need a read barrier here. + + uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) + __ Add(out.X(), current_method.X(), declaring_class_offset); + // /* mirror::Class* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + __ Ldr(out, MemOperand(current_method, declaring_class_offset)); + } + + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ + __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value())); + + size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); + if (kEmitCompilerReadBarrier) { + // /* GcRoot<mirror::String>* */ out = &out[string_index] + __ Add(out.X(), out.X(), cache_offset); + // /* mirror::String* */ out = out->Read() + codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); + } else { + // /* GcRoot<mirror::String> */ out = out[string_index] + __ Ldr(out, MemOperand(out.X(), cache_offset)); + } + __ Cbz(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -3260,7 +3920,11 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins instruction, instruction->GetDexPc(), nullptr); - CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderARM64::VisitMul(HMul* mul) { @@ -3349,8 +4013,6 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { locations->SetOut(LocationFrom(x0)); locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2))); - CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, - void*, uint32_t, int32_t, ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { @@ -3372,17 +4034,12 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = instruction->GetLocations(); - Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt); - DCHECK(type_index.Is(w0)); - __ Mov(type_index, instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), @@ -3559,6 +4216,11 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) : QUICK_ENTRY_POINT(pFmod); codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc(), nullptr); + if (type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); + } else { + CheckEntrypointTypes<kQuickFmod, double, double, double>(); + } break; } @@ -3803,9 +4465,7 @@ void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* convers int min_size = std::min(result_size, input_size); Register output = OutputRegister(conversion); Register source = InputRegisterAt(conversion, 0); - if ((result_type == Primitive::kPrimChar) && (input_size < result_size)) { - __ Ubfx(output, source, 0, result_size * kBitsPerByte); - } else if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { + if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { // 'int' values are used directly as W registers, discarding the top // bits, so we don't need to sign-extend and can just perform a move. // We do not pass the `kDiscardForSameWReg` argument to force clearing the @@ -3814,9 +4474,11 @@ void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* convers // 32bit input value as a 64bit value assuming that the top 32 bits are // zero. __ Mov(output.W(), source.W()); - } else if ((result_type == Primitive::kPrimChar) || - ((input_type == Primitive::kPrimChar) && (result_size > input_size))) { - __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); + } else if (result_type == Primitive::kPrimChar || + (input_type == Primitive::kPrimChar && input_size < result_size)) { + __ Ubfx(output, + output.IsX() ? source.X() : source.W(), + 0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte); } else { __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); } @@ -3951,6 +4613,82 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst } } +void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + // TODO: When read barrier has a fast path, add it here. + /* Currently the read barrier call is inserted after the original load. + * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the + * original load. This load-load ordering is required by the read barrier. + * The fast path/slow path (for Baker's algorithm) should look like: + * + * bool isGray = obj.LockWord & kReadBarrierMask; + * lfence; // load fence or artificial data dependence to prevent load-load reordering + * ref = obj.field; // this is the original load + * if (isGray) { + * ref = Mark(ref); // ideally the slow path just does Mark(ref) + * } + */ + + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrier(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out)); + } +} + +void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root); + AddSlowPath(slow_path); + + // TODO: Implement a fast path for ReadBarrierForRoot, performing + // the following operation (for Baker's algorithm): + // + // if (thread.tls32_.is_gc_marking) { + // root = Mark(root); + // } + + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + #undef __ #undef QUICK_ENTRY_POINT diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 881afcc123..7950f078ad 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -424,6 +424,51 @@ class CodeGeneratorARM64 : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + // Generate a read barrier for a heap reference within `instruction`. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap reference. + // If heap poisoning is enabled, also unpoison the reference in `out`. + void MaybeGenerateReadBarrier(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction`. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + private: using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>; using MethodToLiteralMap = ArenaSafeMap<MethodReference, diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 919ed2db78..9dc9167824 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -415,13 +415,11 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { dex_pc, this, IsDirectEntrypoint(kQuickInstanceofNonTrivial)); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, - uint32_t, - const mirror::Class*, - const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), @@ -461,6 +459,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { dex_pc, this, IsDirectEntrypoint(kQuickDeoptimize)); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; } @@ -2638,6 +2637,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, Register obj = locations->InAt(0).AsRegister<Register>(); LoadOperandType load_type = kLoadUnsignedByte; bool is_volatile = field_info.IsVolatile(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (type) { case Primitive::kPrimBoolean: @@ -2668,8 +2668,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, if (is_volatile && load_type == kLoadDoubleword) { InvokeRuntimeCallingConvention calling_convention; - __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), - obj, field_info.GetFieldOffset().Uint32Value()); + __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset); // Do implicit Null check __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); @@ -2692,21 +2691,34 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, if (type == Primitive::kPrimLong) { DCHECK(locations->Out().IsRegisterPair()); dst = locations->Out().AsRegisterPairLow<Register>(); + Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); + if (obj == dst) { + __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ LoadFromOffset(kLoadWord, dst, obj, offset); + } else { + __ LoadFromOffset(kLoadWord, dst, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize); + } } else { DCHECK(locations->Out().IsRegister()); dst = locations->Out().AsRegister<Register>(); + __ LoadFromOffset(load_type, dst, obj, offset); } - __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value()); } else { DCHECK(locations->Out().IsFpuRegister()); FRegister dst = locations->Out().AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ LoadSFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value()); + __ LoadSFromOffset(dst, obj, offset); } else { - __ LoadDFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value()); + __ LoadDFromOffset(dst, obj, offset); } } - codegen_->MaybeRecordImplicitNullCheck(instruction); + // Longs are handled earlier. + if (type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } if (is_volatile) { @@ -2752,6 +2764,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, Register obj = locations->InAt(0).AsRegister<Register>(); StoreOperandType store_type = kStoreByte; bool is_volatile = field_info.IsVolatile(); + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (type) { case Primitive::kPrimBoolean: @@ -2782,8 +2795,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, if (is_volatile && store_type == kStoreDoubleword) { InvokeRuntimeCallingConvention calling_convention; - __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), - obj, field_info.GetFieldOffset().Uint32Value()); + __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset); // Do implicit Null check. __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); @@ -2806,21 +2818,28 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, if (type == Primitive::kPrimLong) { DCHECK(locations->InAt(1).IsRegisterPair()); src = locations->InAt(1).AsRegisterPairLow<Register>(); + Register src_high = locations->InAt(1).AsRegisterPairHigh<Register>(); + __ StoreToOffset(kStoreWord, src, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ StoreToOffset(kStoreWord, src_high, obj, offset + kMipsWordSize); } else { DCHECK(locations->InAt(1).IsRegister()); src = locations->InAt(1).AsRegister<Register>(); + __ StoreToOffset(store_type, src, obj, offset); } - __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value()); } else { DCHECK(locations->InAt(1).IsFpuRegister()); FRegister src = locations->InAt(1).AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { - __ StoreSToOffset(src, obj, field_info.GetFieldOffset().Uint32Value()); + __ StoreSToOffset(src, obj, offset); } else { - __ StoreDToOffset(src, obj, field_info.GetFieldOffset().Uint32Value()); + __ StoreDToOffset(src, obj, offset); } } - codegen_->MaybeRecordImplicitNullCheck(instruction); + // Longs are handled earlier. + if (type != Primitive::kPrimLong) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } } // TODO: memory barriers? @@ -3031,7 +3050,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke invoke->GetStringInitOffset()); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); @@ -3043,7 +3062,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke LOG(FATAL) << "Unsupported"; UNREACHABLE(); case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register reg = temp.AsRegister<Register>(); Register method_reg; if (current_method.IsRegister()) { @@ -3170,6 +3189,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { cls->GetDexPc(), nullptr, IsDirectEntrypoint(kQuickInitializeTypeAndVerifyAccess)); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -3181,21 +3201,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { - DCHECK(cls->CanCallRuntime()); __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value()); __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( - cls, - cls, - cls->GetDexPc(), - cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ Beqz(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( + cls, + cls, + cls->GetDexPc(), + cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ Beqz(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -3478,17 +3503,12 @@ void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - Register current_method_register = calling_convention.GetRegisterAt(1); - __ Lw(current_method_register, SP, kCurrentMethodStackOffset); - // Move an uint16_t value to a register. - __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); codegen_->InvokeRuntime( GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(), instruction, @@ -3705,7 +3725,7 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { instruction, instruction->GetDexPc(), nullptr, IsDirectEntrypoint(kQuickFmodf)); - CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); break; } case Primitive::kPrimDouble: { @@ -3713,7 +3733,7 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { instruction, instruction->GetDexPc(), nullptr, IsDirectEntrypoint(kQuickFmod)); - CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + CheckEntrypointTypes<kQuickFmod, double, double, double>(); break; } default: diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 5864660890..934f24bfb0 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -27,8 +27,8 @@ #include "mirror/class-inl.h" #include "offsets.h" #include "thread.h" -#include "utils/mips64/assembler_mips64.h" #include "utils/assembler.h" +#include "utils/mips64/assembler_mips64.h" #include "utils/stack_checks.h" namespace art { @@ -210,7 +210,7 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS64"; } @@ -257,7 +257,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { type); RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; } @@ -312,13 +312,13 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { - __ B(GetReturnLabel()); + __ Bc(GetReturnLabel()); } else { - __ B(mips64_codegen->GetLabelOf(successor_)); + __ Bc(mips64_codegen->GetLabelOf(successor_)); } } - Label* GetReturnLabel() { + Mips64Label* GetReturnLabel() { DCHECK(successor_ == nullptr); return &return_label_; } @@ -331,7 +331,7 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { HBasicBlock* const successor_; // If `successor_` is null, the label to branch to after the suspend check. - Label return_label_; + Mips64Label return_label_; DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathMIPS64); }; @@ -366,13 +366,11 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { instruction_, dex_pc, this); + CheckEntrypointTypes< + kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>(); Primitive::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); - CheckEntrypointTypes<kQuickInstanceofNonTrivial, - uint32_t, - const mirror::Class*, - const mirror::Class*>(); } else { DCHECK(instruction_->IsCheckCast()); mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this); @@ -380,7 +378,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, locations); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; } @@ -404,6 +402,7 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { uint32_t dex_pc = deoptimize->GetDexPc(); CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; } @@ -441,6 +440,32 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value() void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) { + // Ensure that we fix up branches. + __ FinalizeCode(); + + // Adjust native pc offsets in stack maps. + for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset; + uint32_t new_position = __ GetAdjustedPosition(old_position); + DCHECK_GE(new_position, old_position); + stack_map_stream_.SetStackMapNativePcOffset(i, new_position); + } + + // Adjust pc offsets for the disassembly information. + if (disasm_info_ != nullptr) { + GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval(); + frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start); + frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end); + for (auto& it : *disasm_info_->GetInstructionIntervals()) { + it.second.start = __ GetAdjustedPosition(it.second.start); + it.second.end = __ GetAdjustedPosition(it.second.end); + } + for (auto& it : *disasm_info_->GetSlowPathIntervals()) { + it.code_interval.start = __ GetAdjustedPosition(it.code_interval.start); + it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end); + } + } + CodeGenerator::Finalize(allocator); } @@ -603,6 +628,7 @@ void CodeGeneratorMIPS64::GenerateFrameExit() { } __ Jr(RA); + __ Nop(); __ cfi().RestoreState(); __ cfi().DefCFAOffset(GetFrameSize()); @@ -939,7 +965,7 @@ Location CodeGeneratorMIPS64::GetStackLocation(HLoadLocal* load) const { } void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) { - Label done; + Mips64Label done; GpuRegister card = AT; GpuRegister temp = TMP; __ Beqzc(value, &done); @@ -1048,6 +1074,7 @@ void CodeGeneratorMIPS64::InvokeRuntime(int32_t entry_point_offset, // TODO: anything related to T9/GP/GOT/PIC/.so's? __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); __ Jalr(T9); + __ Nop(); RecordPcInfo(instruction, dex_pc, slow_path); } @@ -1079,7 +1106,7 @@ void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruc __ Bind(slow_path->GetReturnLabel()); } else { __ Beqzc(TMP, codegen_->GetLabelOf(successor)); - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); // slow_path will return to GetLabelOf(successor). } } @@ -1583,6 +1610,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); } break; } @@ -1669,12 +1697,7 @@ void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) // length is limited by the maximum positive signed 32-bit integer. // Unsigned comparison of length and index checks for index < 0 // and for length <= index simultaneously. - // Mips R6 requires lhs != rhs for compact branches. - if (index == length) { - __ B(slow_path->GetEntryLabel()); - } else { - __ Bgeuc(index, length, slow_path->GetEntryLabel()); - } + __ Bgeuc(index, length, slow_path->GetEntryLabel()); } void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { @@ -1796,6 +1819,19 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { : QUICK_ENTRY_POINT(pCmplDouble); } codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr); + if (in_type == Primitive::kPrimFloat) { + if (instruction->IsGtBias()) { + CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>(); + } else { + CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>(); + } + } else { + if (instruction->IsGtBias()) { + CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>(); + } else { + CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>(); + } + } break; } @@ -2264,7 +2300,7 @@ void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instructio if (value.IsConstant()) { int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant()); if (divisor == 0) { - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); } else { // A division by a non-null constant is valid. We don't need to perform // any check, so simply fall through. @@ -2316,7 +2352,7 @@ void InstructionCodeGeneratorMIPS64::HandleGoto(HInstruction* got, HBasicBlock* GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); } if (!codegen_->GoesToNextBlock(block, successor)) { - __ B(codegen_->GetLabelOf(successor)); + __ Bc(codegen_->GetLabelOf(successor)); } } @@ -2341,8 +2377,8 @@ void InstructionCodeGeneratorMIPS64::VisitTryBoundary(HTryBoundary* try_boundary void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, - Label* true_target, - Label* false_target) { + Mips64Label* true_target, + Mips64Label* false_target) { HInstruction* cond = instruction->InputAt(condition_input_index); if (true_target == nullptr && false_target == nullptr) { @@ -2352,12 +2388,12 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc // Constant condition, statically compared against 1. if (cond->AsIntConstant()->IsOne()) { if (true_target != nullptr) { - __ B(true_target); + __ Bc(true_target); } } else { DCHECK(cond->AsIntConstant()->IsZero()); if (false_target != nullptr) { - __ B(false_target); + __ Bc(false_target); } } return; @@ -2397,7 +2433,7 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc } IfCondition if_cond; - Label* non_fallthrough_target; + Mips64Label* non_fallthrough_target; if (true_target == nullptr) { if_cond = condition->GetOppositeCondition(); non_fallthrough_target = false_target; @@ -2435,7 +2471,7 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc __ Bnezc(lhs, non_fallthrough_target); // > 0 if non-zero break; case kCondAE: - __ B(non_fallthrough_target); // always true + __ Bc(non_fallthrough_target); // always true break; } } else { @@ -2443,60 +2479,37 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc rhs_reg = TMP; __ LoadConst32(rhs_reg, rhs_imm); } - // It looks like we can get here with lhs == rhs. Should that be possible at all? - // Mips R6 requires lhs != rhs for compact branches. - if (lhs == rhs_reg) { - DCHECK(!use_imm); - switch (if_cond) { - case kCondEQ: - case kCondGE: - case kCondLE: - case kCondBE: - case kCondAE: - // if lhs == rhs for a positive condition, then it is a branch - __ B(non_fallthrough_target); - break; - case kCondNE: - case kCondLT: - case kCondGT: - case kCondB: - case kCondA: - // if lhs == rhs for a negative condition, then it is a NOP - break; - } - } else { - switch (if_cond) { - case kCondEQ: - __ Beqc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondNE: - __ Bnec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLT: - __ Bltc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondGE: - __ Bgec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLE: - __ Bgec(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondGT: - __ Bltc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondB: - __ Bltuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondAE: - __ Bgeuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondBE: - __ Bgeuc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondA: - __ Bltuc(rhs_reg, lhs, non_fallthrough_target); - break; - } + switch (if_cond) { + case kCondEQ: + __ Beqc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondNE: + __ Bnec(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondLT: + __ Bltc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondGE: + __ Bgec(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondLE: + __ Bgec(rhs_reg, lhs, non_fallthrough_target); + break; + case kCondGT: + __ Bltc(rhs_reg, lhs, non_fallthrough_target); + break; + case kCondB: + __ Bltuc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondAE: + __ Bgeuc(lhs, rhs_reg, non_fallthrough_target); + break; + case kCondBE: + __ Bgeuc(rhs_reg, lhs, non_fallthrough_target); + break; + case kCondA: + __ Bltuc(rhs_reg, lhs, non_fallthrough_target); + break; } } } @@ -2504,7 +2517,7 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc // If neither branch falls through (case 3), the conditional branch to `true_target` // was already emitted (case 2) and we need to emit a jump to `false_target`. if (true_target != nullptr && false_target != nullptr) { - __ B(false_target); + __ Bc(false_target); } } @@ -2518,9 +2531,9 @@ void LocationsBuilderMIPS64::VisitIf(HIf* if_instr) { void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) { HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); - Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? + Mips64Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? nullptr : codegen_->GetLabelOf(true_successor); - Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? + Mips64Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); } @@ -2695,7 +2708,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - Label done; + Mips64Label done; // Return 0 if `obj` is null. // TODO: Avoid this check if we know `obj` is not null. @@ -2790,6 +2803,7 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); + __ Nop(); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -2822,9 +2836,9 @@ void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* in // sorted out. if (invoke->HasCurrentMethodInput()) { LocationSummary* locations = invoke->GetLocations(); - Location location = locations->InAt(invoke->GetCurrentMethodInputIndex()); + Location location = locations->InAt(invoke->GetSpecialInputIndex()); if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { - locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation()); + locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation()); } } } @@ -2882,7 +2896,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo invoke->GetStringInitOffset()); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst64(temp.AsRegister<GpuRegister>(), invoke->GetMethodAddress()); @@ -2894,7 +2908,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo LOG(FATAL) << "Unsupported"; UNREACHABLE(); case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); GpuRegister reg = temp.AsRegister<GpuRegister>(); GpuRegister method_reg; if (current_method.IsRegister()) { @@ -2924,13 +2938,14 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Jalr(&frame_entry_label_, T9); + __ Jialc(&frame_entry_label_, T9); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: // LR = invoke->GetDirectCodePtr(); __ LoadConst64(T9, invoke->GetDirectCodePtr()); // LR() __ Jalr(T9); + __ Nop(); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: @@ -2947,6 +2962,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo kMips64WordSize).Int32Value()); // T9() __ Jalr(T9); + __ Nop(); break; } DCHECK(!IsLeafMethod()); @@ -2988,6 +3004,7 @@ void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); + __ Nop(); } void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -3016,6 +3033,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -3027,22 +3045,26 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { __ LoadFromOffset(kLoadUnsignedWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { - DCHECK(cls->CanCallRuntime()); __ LoadFromOffset(kLoadDoubleword, out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value()); __ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); // TODO: We will need a read barrier here. - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( - cls, - cls, - cls->GetDexPc(), - cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ Beqzc(out, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( + cls, + cls, + cls->GetDexPc(), + cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ Beqzc(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -3132,7 +3154,11 @@ void InstructionCodeGeneratorMIPS64::VisitMonitorOperation(HMonitorOperation* in instruction, instruction->GetDexPc(), nullptr); - CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderMIPS64::VisitMul(HMul* mul) { @@ -3266,15 +3292,12 @@ void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); } void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = instruction->GetLocations(); - // Move an uint16_t value to a register. - __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex()); codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), @@ -3454,6 +3477,11 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf) : QUICK_ENTRY_POINT(pFmod); codegen_->InvokeRuntime(entry_offset, instruction, instruction->GetDexPc(), nullptr); + if (type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickFmodf, float, float, float>(); + } else { + CheckEntrypointTypes<kQuickFmod, double, double, double>(); + } break; } default: @@ -3763,6 +3791,11 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver conversion, conversion->GetDexPc(), nullptr); + if (result_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + } else { + CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + } } } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); @@ -3778,6 +3811,19 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver conversion, conversion->GetDexPc(), nullptr); + if (result_type != Primitive::kPrimLong) { + if (input_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickF2iz, int32_t, float>(); + } else { + CheckEntrypointTypes<kQuickD2iz, int32_t, double>(); + } + } else { + if (input_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + } else { + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + } + } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsFloatingPointType(input_type)) { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); @@ -3929,7 +3975,7 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); for (int32_t i = 0; i < num_entries; i++) { int32_t case_value = lower_bound + i; - Label* succ = codegen_->GetLabelOf(successors[i]); + Mips64Label* succ = codegen_->GetLabelOf(successors[i]); if (case_value == 0) { __ Beqzc(value_reg, succ); } else { @@ -3940,7 +3986,7 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins // And the default for any other value. if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ B(codegen_->GetLabelOf(default_block)); + __ Bc(codegen_->GetLabelOf(default_block)); } } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index a078dd1819..85e3a4a3ce 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -158,12 +158,12 @@ class SlowPathCodeMIPS64 : public SlowPathCode { public: SlowPathCodeMIPS64() : entry_label_(), exit_label_() {} - Label* GetEntryLabel() { return &entry_label_; } - Label* GetExitLabel() { return &exit_label_; } + Mips64Label* GetEntryLabel() { return &entry_label_; } + Mips64Label* GetExitLabel() { return &exit_label_; } private: - Label entry_label_; - Label exit_label_; + Mips64Label entry_label_; + Mips64Label exit_label_; DISALLOW_COPY_AND_ASSIGN(SlowPathCodeMIPS64); }; @@ -231,8 +231,8 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, - Label* true_target, - Label* false_target); + Mips64Label* true_target, + Mips64Label* false_target); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); @@ -265,7 +265,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64WordSize; } uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE { - return GetLabelOf(block)->Position(); + return assembler_.GetLabelLocation(GetLabelOf(block)); } HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } @@ -298,12 +298,12 @@ class CodeGeneratorMIPS64 : public CodeGenerator { return isa_features_; } - Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_, block); + Mips64Label* GetLabelOf(HBasicBlock* block) const { + return CommonGetLabelOf<Mips64Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_ = CommonInitializeLabels<Label>(); + block_labels_ = CommonInitializeLabels<Mips64Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -349,8 +349,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator { private: // Labels for each block that will be compiled. - Label* block_labels_; // Indexed by block id. - Label frame_entry_label_; + Mips64Label* block_labels_; // Indexed by block id. + Mips64Label frame_entry_label_; LocationsBuilderMIPS64 location_builder_; InstructionCodeGeneratorMIPS64 instruction_visitor_; ParallelMoveResolverMIPS64 move_resolver_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 999306c34b..1fc09a81bc 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -67,6 +67,7 @@ class NullCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -93,6 +94,7 @@ class DivZeroCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -152,6 +154,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } bool IsFatal() const OVERRIDE { return true; } @@ -177,6 +180,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); @@ -222,6 +226,7 @@ class LoadStringSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); RestoreLiveRegisters(codegen, locations); @@ -257,6 +262,11 @@ class LoadClassSlowPathX86 : public SlowPathCode { x86_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType), at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } // Move the class to the desired location. Location out = locations->Out(); @@ -368,6 +378,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; } @@ -410,6 +421,7 @@ class ArraySetSlowPathX86 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -1908,7 +1920,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::Any()); + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); } return; } @@ -1917,7 +1929,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. if (invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetCurrentMethodInputIndex(), + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); } @@ -1926,9 +1938,9 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok // needs a register. We therefore do not require a register for it, and let // the code generation of the invoke handle it. LocationSummary* locations = invoke->GetLocations(); - Location location = locations->InAt(invoke->GetCurrentMethodInputIndex()); + Location location = locations->InAt(invoke->GetSpecialInputIndex()); if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { - locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation()); + locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation()); } } } @@ -2460,6 +2472,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); break; case Primitive::kPrimDouble: @@ -2468,6 +2481,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio conversion, conversion->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); break; default: @@ -3298,11 +3312,13 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); } else { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); } break; } @@ -3769,19 +3785,18 @@ void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); locations->SetOut(Location::RegisterLocation(EAX)); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -3798,13 +3813,13 @@ void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); - // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -4032,7 +4047,7 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOr Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp) { DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); - Location location = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); if (!invoke->GetLocations()->Intrinsified()) { return location.AsRegister<Register>(); } @@ -4063,7 +4078,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(invoke->GetStringInitOffset())); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); @@ -4084,7 +4099,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register method_reg; Register reg = temp.AsRegister<Register>(); if (current_method.IsRegister()) { @@ -4856,7 +4871,7 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); // Possibly used for read barrier too. + locations->AddTemp(Location::RegisterLocation(ECX)); } } @@ -5503,6 +5518,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -5524,7 +5540,6 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { __ movl(out, Address(current_method, declaring_class_offset)); } } else { - DCHECK(cls->CanCallRuntime()); // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ movl(out, Address(current_method, @@ -5541,15 +5556,22 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { __ movl(out, Address(out, cache_offset)); } - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + + if (!cls->IsInDexCache()) { + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + } + + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -5661,6 +5683,7 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { @@ -6150,6 +6173,11 @@ void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instr instruction, instruction->GetDexPc(), nullptr); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 4088160b3f..534ee1c5ab 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -65,6 +65,7 @@ class NullCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -91,6 +92,7 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } bool IsFatal() const OVERRIDE { return true; } @@ -149,6 +151,7 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickTestSuspend, void, void>(); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); @@ -203,6 +206,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } bool IsFatal() const OVERRIDE { return true; } @@ -240,6 +244,11 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { at_, dex_pc_, this); + if (do_clinit_) { + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + } else { + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + } Location out = locations->Out(); // Move the class to the desired location. @@ -290,6 +299,7 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); @@ -386,6 +396,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { deoptimize, deoptimize->GetDexPc(), this); + CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } @@ -428,6 +439,7 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); } @@ -718,7 +730,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo Address::Absolute(invoke->GetStringInitOffset(), true)); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: - callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress())); @@ -737,7 +749,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo __ Bind(&pc_relative_dex_cache_patches_.back().label); break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register method_reg; CpuRegister reg = temp.AsRegister<CpuRegister>(); if (current_method.IsRegister()) { @@ -3765,22 +3777,19 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(RAX)); } void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { - InvokeRuntimeCallingConvention calling_convention; - codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), - instruction->GetTypeIndex()); // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -3799,13 +3808,13 @@ void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), instruction->GetTypeIndex()); - // Note: if heap poisoning is enabled, the entry point takes cares // of poisoning the reference. codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); DCHECK(!codegen_->IsLeafMethod()); } @@ -4500,8 +4509,6 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { // This first temporary register is possibly used for heap // reference poisoning and/or read barrier emission too. locations->AddTemp(Location::RequiresRegister()); - // This second temporary register is possibly used for read - // barrier emission too. locations->AddTemp(Location::RequiresRegister()); } } @@ -5129,6 +5136,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { cls, cls->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); return; } @@ -5150,7 +5158,6 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { __ movl(out, Address(current_method, declaring_class_offset)); } } else { - DCHECK(cls->CanCallRuntime()); // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ __ movq(out, Address(current_method, @@ -5167,15 +5174,20 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { __ movl(out, Address(out, cache_offset)); } - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - __ testl(out, out); - __ j(kEqual, slow_path->GetEntryLabel()); - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); + if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (!cls->IsInDexCache()) { + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); + } } } } @@ -5278,6 +5290,7 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { instruction, instruction->GetDexPc(), nullptr); + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { @@ -5772,6 +5785,11 @@ void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* in instruction, instruction->GetDexPc(), nullptr); + if (instruction->IsEnter()) { + CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); + } else { + CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); + } } void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index e1a8c9cc0f..af8b8b562a 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_ #define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_ +#include "code_generator.h" #include "locations.h" #include "nodes.h" #include "utils/arm64/assembler_arm64.h" @@ -255,6 +256,67 @@ static inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers, return true; } +static inline vixl::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { + switch (op_kind) { + case HArm64DataProcWithShifterOp::kASR: return vixl::ASR; + case HArm64DataProcWithShifterOp::kLSL: return vixl::LSL; + case HArm64DataProcWithShifterOp::kLSR: return vixl::LSR; + default: + LOG(FATAL) << "Unexpected op kind " << op_kind; + UNREACHABLE(); + return vixl::NO_SHIFT; + } +} + +static inline vixl::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { + switch (op_kind) { + case HArm64DataProcWithShifterOp::kUXTB: return vixl::UXTB; + case HArm64DataProcWithShifterOp::kUXTH: return vixl::UXTH; + case HArm64DataProcWithShifterOp::kUXTW: return vixl::UXTW; + case HArm64DataProcWithShifterOp::kSXTB: return vixl::SXTB; + case HArm64DataProcWithShifterOp::kSXTH: return vixl::SXTH; + case HArm64DataProcWithShifterOp::kSXTW: return vixl::SXTW; + default: + LOG(FATAL) << "Unexpected op kind " << op_kind; + UNREACHABLE(); + return vixl::NO_EXTEND; + } +} + +static inline bool CanFitInShifterOperand(HInstruction* instruction) { + if (instruction->IsTypeConversion()) { + HTypeConversion* conversion = instruction->AsTypeConversion(); + Primitive::Type result_type = conversion->GetResultType(); + Primitive::Type input_type = conversion->GetInputType(); + // We don't expect to see the same type as input and result. + return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) && + (result_type != input_type); + } else { + return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) || + (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) || + (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant()); + } +} + +static inline bool HasShifterOperand(HInstruction* instr) { + // `neg` instructions are an alias of `sub` using the zero register as the + // first register input. + bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() || + instr->IsOr() || instr->IsSub() || instr->IsXor(); + return res; +} + +static inline bool ShifterOperandSupportsExtension(HInstruction* instruction) { + DCHECK(HasShifterOperand(instruction)); + // Although the `neg` instruction is an alias of the `sub` instruction, `HNeg` + // does *not* support extension. This is because the `extended register` form + // of the `sub` instruction interprets the left register with code 31 as the + // stack pointer and not the zero register. (So does the `immediate` form.) In + // the other form `shifted register, the register with code 31 is interpreted + // as the zero register. + return instruction->IsAdd() || instruction->IsSub(); +} + } // namespace helpers } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 5814d7556f..b3b09d2155 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -735,26 +735,29 @@ void SSAChecker::VisitPhi(HPhi* phi) { } } - // Test phi equivalents. There should not be two of the same type and they - // should only be created for constants which were untyped in DEX. - for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - HPhi* other_phi = phi_it.Current()->AsPhi(); - if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) { - if (phi->GetType() == other_phi->GetType()) { - std::stringstream type_str; - type_str << phi->GetType(); - AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.", - phi->GetId(), - phi->GetRegNumber(), - type_str.str().c_str())); - } else { - ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true); - if (!IsConstantEquivalent(phi, other_phi, &visited)) { - AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they " - "are not equivalents of constants.", + // Test phi equivalents. There should not be two of the same type and they should only be + // created for constants which were untyped in DEX. Note that this test can be skipped for + // a synthetic phi (indicated by lack of a virtual register). + if (phi->GetRegNumber() != kNoRegNumber) { + for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + HPhi* other_phi = phi_it.Current()->AsPhi(); + if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) { + if (phi->GetType() == other_phi->GetType()) { + std::stringstream type_str; + type_str << phi->GetType(); + AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.", phi->GetId(), - other_phi->GetId(), - phi->GetRegNumber())); + phi->GetRegNumber(), + type_str.str().c_str())); + } else { + ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true); + if (!IsConstantEquivalent(phi, other_phi, &visited)) { + AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they " + "are not equivalents of constants.", + phi->GetId(), + other_phi->GetId(), + phi->GetRegNumber())); + } } } } diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 2b7790184a..48bcd10b10 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -397,6 +397,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << invoke->IsRecursive() << std::noboolalpha; StartAttributeStream("intrinsic") << invoke->GetIntrinsic(); + if (invoke->IsStatic()) { + StartAttributeStream("clinit_check") << invoke->GetClinitCheckRequirement(); + } } void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE { @@ -419,6 +422,19 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit"); } +#ifdef ART_ENABLE_CODEGEN_arm64 + void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE { + StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind(); + if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) { + StartAttributeStream("shift") << instruction->GetShiftAmount(); + } + } + + void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE { + StartAttributeStream("kind") << instruction->GetOpKind(); + } +#endif + bool IsPass(const char* name) { return strcmp(pass_name_, name) == 0; } @@ -500,6 +516,18 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; } else if (instruction->IsLoadClass()) { StartAttributeStream("klass") << "unresolved"; + } else if (instruction->IsNullConstant()) { + // The NullConstant may be added to the graph during other passes that happen between + // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner + // doesn't run or doesn't inline anything, the NullConstant remains untyped. + // So we should check NullConstants for validity only after reference type propagation. + // + // Note: The infrastructure to properly type NullConstants everywhere is to complex to add + // for the benefits. + StartAttributeStream("klass") << "not_set"; + DCHECK(!is_after_pass_ + || !IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName)) + << " Expected a valid rti after reference type propagation"; } else { DCHECK(!is_after_pass_) << "Expected a valid rti after reference type propagation"; diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index c36de84064..4af111b784 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -377,9 +377,10 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { HInstruction* current = block->GetFirstInstruction(); while (current != nullptr) { - set->Kill(current->GetSideEffects()); // Save the next instruction in case `current` is removed from the graph. HInstruction* next = current->GetNext(); + // Do not kill the set with the side effects of the instruction just now: if + // the instruction is GVN'ed, we don't need to kill. if (current->CanBeMoved()) { if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) { // For commutative ops, (x op y) will be treated the same as (y op x) @@ -395,8 +396,11 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { current->ReplaceWith(existing); current->GetBlock()->RemoveInstruction(current); } else { + set->Kill(current->GetSideEffects()); set->Add(current); } + } else { + set->Kill(current->GetSideEffects()); } current = next; } diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index b97dc1a511..2f3df7fc68 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -169,16 +169,6 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { // src instruction->ReplaceWith(input_other); instruction->GetBlock()->RemoveInstruction(instruction); - } else if (instruction->IsShl() && input_cst->IsOne()) { - // Replace Shl looking like - // SHL dst, src, 1 - // with - // ADD dst, src, src - HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(), - input_other, - input_other); - instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add); - RecordSimplification(); } } } @@ -372,9 +362,8 @@ void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) { block->RemoveInstruction(equal); RecordSimplification(); } else if (input_const->AsIntConstant()->IsZero()) { - // Replace (bool_value == false) with !bool_value - block->ReplaceAndRemoveInstructionWith( - equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value)); + equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, equal)); + block->RemoveInstruction(equal); RecordSimplification(); } else { // Replace (bool_value == integer_not_zero_nor_one_constant) with false @@ -399,9 +388,8 @@ void InstructionSimplifierVisitor::VisitNotEqual(HNotEqual* not_equal) { // We are comparing the boolean to a constant which is of type int and can // be any constant. if (input_const->AsIntConstant()->IsOne()) { - // Replace (bool_value != true) with !bool_value - block->ReplaceAndRemoveInstructionWith( - not_equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value)); + not_equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, not_equal)); + block->RemoveInstruction(not_equal); RecordSimplification(); } else if (input_const->AsIntConstant()->IsZero()) { // Replace (bool_value != false) with bool_value @@ -796,6 +784,34 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { HShl* shl = new(allocator) HShl(type, input_other, shift); block->ReplaceAndRemoveInstructionWith(instruction, shl); RecordSimplification(); + } else if (IsPowerOfTwo(factor - 1)) { + // Transform code looking like + // MUL dst, src, (2^n + 1) + // into + // SHL tmp, src, n + // ADD dst, src, tmp + HShl* shl = new (allocator) HShl(type, + input_other, + GetGraph()->GetIntConstant(WhichPowerOf2(factor - 1))); + HAdd* add = new (allocator) HAdd(type, input_other, shl); + + block->InsertInstructionBefore(shl, instruction); + block->ReplaceAndRemoveInstructionWith(instruction, add); + RecordSimplification(); + } else if (IsPowerOfTwo(factor + 1)) { + // Transform code looking like + // MUL dst, src, (2^n - 1) + // into + // SHL tmp, src, n + // SUB dst, tmp, src + HShl* shl = new (allocator) HShl(type, + input_other, + GetGraph()->GetIntConstant(WhichPowerOf2(factor + 1))); + HSub* sub = new (allocator) HSub(type, shl, input_other); + + block->InsertInstructionBefore(shl, instruction); + block->ReplaceAndRemoveInstructionWith(instruction, sub); + RecordSimplification(); } } } diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index eb79f469eb..6a34b13320 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -16,11 +16,16 @@ #include "instruction_simplifier_arm64.h" +#include "common_arm64.h" #include "mirror/array-inl.h" namespace art { namespace arm64 { +using helpers::CanFitInShifterOperand; +using helpers::HasShifterOperand; +using helpers::ShifterOperandSupportsExtension; + void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access, HInstruction* array, HInstruction* index, @@ -62,6 +67,169 @@ void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstructio RecordSimplification(); } +bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use, + HInstruction* bitfield_op, + bool do_merge) { + DCHECK(HasShifterOperand(use)); + DCHECK(use->IsBinaryOperation() || use->IsNeg()); + DCHECK(CanFitInShifterOperand(bitfield_op)); + DCHECK(!bitfield_op->HasEnvironmentUses()); + + Primitive::Type type = use->GetType(); + if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) { + return false; + } + + HInstruction* left; + HInstruction* right; + if (use->IsBinaryOperation()) { + left = use->InputAt(0); + right = use->InputAt(1); + } else { + DCHECK(use->IsNeg()); + right = use->AsNeg()->InputAt(0); + left = GetGraph()->GetConstant(right->GetType(), 0); + } + DCHECK(left == bitfield_op || right == bitfield_op); + + if (left == right) { + // TODO: Handle special transformations in this situation? + // For example should we transform `(x << 1) + (x << 1)` into `(x << 2)`? + // Or should this be part of a separate transformation logic? + return false; + } + + bool is_commutative = use->IsBinaryOperation() && use->AsBinaryOperation()->IsCommutative(); + HInstruction* other_input; + if (bitfield_op == right) { + other_input = left; + } else { + if (is_commutative) { + other_input = right; + } else { + return false; + } + } + + HArm64DataProcWithShifterOp::OpKind op_kind; + int shift_amount = 0; + HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount); + + if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind) && + !ShifterOperandSupportsExtension(use)) { + return false; + } + + if (do_merge) { + HArm64DataProcWithShifterOp* alu_with_op = + new (GetGraph()->GetArena()) HArm64DataProcWithShifterOp(use, + other_input, + bitfield_op->InputAt(0), + op_kind, + shift_amount, + use->GetDexPc()); + use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op); + if (bitfield_op->GetUses().IsEmpty()) { + bitfield_op->GetBlock()->RemoveInstruction(bitfield_op); + } + RecordSimplification(); + } + + return true; +} + +// Merge a bitfield move instruction into its uses if it can be merged in all of them. +bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruction* bitfield_op) { + DCHECK(CanFitInShifterOperand(bitfield_op)); + + if (bitfield_op->HasEnvironmentUses()) { + return false; + } + + const HUseList<HInstruction*>& uses = bitfield_op->GetUses(); + + // Check whether we can merge the instruction in all its users' shifter operand. + for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) { + HInstruction* use = it_use.Current()->GetUser(); + if (!HasShifterOperand(use)) { + return false; + } + if (!CanMergeIntoShifterOperand(use, bitfield_op)) { + return false; + } + } + + // Merge the instruction into its uses. + for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) { + HInstruction* use = it_use.Current()->GetUser(); + bool merged = MergeIntoShifterOperand(use, bitfield_op); + DCHECK(merged); + } + + return true; +} + +bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns( + HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) { + DCHECK(Primitive::IsIntOrLongType(mul->GetType())); + DCHECK(input_binop->IsAdd() || input_binop->IsSub()); + DCHECK_NE(input_binop, input_other); + if (!input_binop->HasOnlyOneNonEnvironmentUse()) { + return false; + } + + // Try to interpret patterns like + // a * (b <+/-> 1) + // as + // (a * b) <+/-> a + HInstruction* input_a = input_other; + HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize. + HInstruction::InstructionKind op_kind; + + if (input_binop->IsAdd()) { + if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) { + // Interpret + // a * (b + 1) + // as + // (a * b) + a + input_b = input_binop->GetLeastConstantLeft(); + op_kind = HInstruction::kAdd; + } + } else { + DCHECK(input_binop->IsSub()); + if (input_binop->GetRight()->IsConstant() && + input_binop->GetRight()->AsConstant()->IsMinusOne()) { + // Interpret + // a * (b - (-1)) + // as + // a + (a * b) + input_b = input_binop->GetLeft(); + op_kind = HInstruction::kAdd; + } else if (input_binop->GetLeft()->IsConstant() && + input_binop->GetLeft()->AsConstant()->IsOne()) { + // Interpret + // a * (1 - b) + // as + // a - (a * b) + input_b = input_binop->GetRight(); + op_kind = HInstruction::kSub; + } + } + + if (input_b == nullptr) { + // We did not find a pattern we can optimize. + return false; + } + + HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate( + mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc()); + + mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc); + input_binop->GetBlock()->RemoveInstruction(input_binop); + + return false; +} + void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { TryExtractArrayAccessAddress(instruction, instruction->GetArray(), @@ -76,5 +244,110 @@ void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) { Primitive::ComponentSize(instruction->GetComponentType())); } +void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) { + Primitive::Type type = instruction->GetType(); + if (!Primitive::IsIntOrLongType(type)) { + return; + } + + HInstruction* use = instruction->HasNonEnvironmentUses() + ? instruction->GetUses().GetFirst()->GetUser() + : nullptr; + + if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) { + // Replace code looking like + // MUL tmp, x, y + // SUB dst, acc, tmp + // with + // MULSUB dst, acc, x, y + // Note that we do not want to (unconditionally) perform the merge when the + // multiplication has multiple uses and it can be merged in all of them. + // Multiple uses could happen on the same control-flow path, and we would + // then increase the amount of work. In the future we could try to evaluate + // whether all uses are on different control-flow paths (using dominance and + // reverse-dominance information) and only perform the merge when they are. + HInstruction* accumulator = nullptr; + HBinaryOperation* binop = use->AsBinaryOperation(); + HInstruction* binop_left = binop->GetLeft(); + HInstruction* binop_right = binop->GetRight(); + // Be careful after GVN. This should not happen since the `HMul` has only + // one use. + DCHECK_NE(binop_left, binop_right); + if (binop_right == instruction) { + accumulator = binop_left; + } else if (use->IsAdd()) { + DCHECK_EQ(binop_left, instruction); + accumulator = binop_right; + } + + if (accumulator != nullptr) { + HArm64MultiplyAccumulate* mulacc = + new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type, + binop->GetKind(), + accumulator, + instruction->GetLeft(), + instruction->GetRight()); + + binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); + DCHECK(!instruction->HasUses()); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + } + + // Use multiply accumulate instruction for a few simple patterns. + // We prefer not applying the following transformations if the left and + // right inputs perform the same operation. + // We rely on GVN having squashed the inputs if appropriate. However the + // results are still correct even if that did not happen. + if (instruction->GetLeft() == instruction->GetRight()) { + return; + } + + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + if ((right->IsAdd() || right->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) { + return; + } + if ((left->IsAdd() || left->IsSub()) && + TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) { + return; + } +} + +void InstructionSimplifierArm64Visitor::VisitShl(HShl* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArm64Visitor::VisitShr(HShr* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArm64Visitor::VisitTypeConversion(HTypeConversion* instruction) { + Primitive::Type result_type = instruction->GetResultType(); + Primitive::Type input_type = instruction->GetInputType(); + + if (input_type == result_type) { + // We let the arch-independent code handle this. + return; + } + + if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArm64Visitor::VisitUShr(HUShr* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 4b697dba0e..b7f490bb8c 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -39,9 +39,30 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { HInstruction* array, HInstruction* index, int access_size); + bool TryMergeIntoUsersShifterOperand(HInstruction* instruction); + bool TryMergeIntoShifterOperand(HInstruction* use, + HInstruction* bitfield_op, + bool do_merge); + bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + return TryMergeIntoShifterOperand(use, bitfield_op, false); + } + bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + DCHECK(CanMergeIntoShifterOperand(use, bitfield_op)); + return TryMergeIntoShifterOperand(use, bitfield_op, true); + } + + bool TrySimpleMultiplyAccumulatePatterns(HMul* mul, + HBinaryOperation* input_binop, + HInstruction* input_other); + // HInstruction visitors, sorted alphabetically. void VisitArrayGet(HArrayGet* instruction) OVERRIDE; void VisitArraySet(HArraySet* instruction) OVERRIDE; + void VisitMul(HMul* instruction) OVERRIDE; + void VisitShl(HShl* instruction) OVERRIDE; + void VisitShr(HShr* instruction) OVERRIDE; + void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; + void VisitUShr(HUShr* instruction) OVERRIDE; OptimizingCompilerStats* stats_; }; diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 0a5acc3e64..d2017da221 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -44,7 +44,23 @@ using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitor bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); + if (res == nullptr) { + return false; + } + if (kEmitCompilerReadBarrier && res->CanCall()) { + // Generating an intrinsic for this HInvoke may produce an + // IntrinsicSlowPathARM slow path. Currently this approach + // does not work when using read barriers, as the emitted + // calling sequence will make use of another slow path + // (ReadBarrierForRootSlowPathARM for HInvokeStaticOrDirect, + // ReadBarrierSlowPathARM for HInvokeVirtual). So we bail + // out in this case. + // + // TODO: Find a way to have intrinsics work with read barriers. + invoke->SetLocations(nullptr); + return false; + } + return res->Intrinsified(); } #define __ assembler-> @@ -662,20 +678,23 @@ static void GenUnsafeGet(HInvoke* invoke, (type == Primitive::kPrimLong) || (type == Primitive::kPrimNot)); ArmAssembler* assembler = codegen->GetAssembler(); - Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. - Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only. + Location base_loc = locations->InAt(1); + Register base = base_loc.AsRegister<Register>(); // Object pointer. + Location offset_loc = locations->InAt(2); + Register offset = offset_loc.AsRegisterPairLow<Register>(); // Long offset, lo part only. + Location trg_loc = locations->Out(); if (type == Primitive::kPrimLong) { - Register trg_lo = locations->Out().AsRegisterPairLow<Register>(); + Register trg_lo = trg_loc.AsRegisterPairLow<Register>(); __ add(IP, base, ShifterOperand(offset)); if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { - Register trg_hi = locations->Out().AsRegisterPairHigh<Register>(); + Register trg_hi = trg_loc.AsRegisterPairHigh<Register>(); __ ldrexd(trg_lo, trg_hi, IP); } else { __ ldrd(trg_lo, Address(IP)); } } else { - Register trg = locations->Out().AsRegister<Register>(); + Register trg = trg_loc.AsRegister<Register>(); __ ldr(trg, Address(base, offset)); } @@ -684,14 +703,18 @@ static void GenUnsafeGet(HInvoke* invoke, } if (type == Primitive::kPrimNot) { - Register trg = locations->Out().AsRegister<Register>(); - __ MaybeUnpoisonHeapReference(trg); + codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); } } static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + can_call ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -936,6 +959,7 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ Bind(&loop_head); __ ldrex(tmp_lo, tmp_ptr); + // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo)); @@ -964,7 +988,11 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic does not always work when heap // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it // off temporarily as a quick fix. + // // TODO(rpl): Fix it and turn it back on. + // + // TODO(rpl): Also, we should investigate whether we need a read + // barrier in the generated code. if (kPoisonHeapReferences) { return; } @@ -1400,6 +1428,10 @@ static void CheckPosition(ArmAssembler* assembler, } } +// TODO: Implement read barriers in the SystemArrayCopy intrinsic. +// Note that this code path is not used (yet) because we do not +// intrinsify methods that can go into the IntrinsicSlowPathARM +// slow path. void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { ArmAssembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 059abf090d..b04dcceb05 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -143,7 +143,23 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); - return res != nullptr && res->Intrinsified(); + if (res == nullptr) { + return false; + } + if (kEmitCompilerReadBarrier && res->CanCall()) { + // Generating an intrinsic for this HInvoke may produce an + // IntrinsicSlowPathARM64 slow path. Currently this approach + // does not work when using read barriers, as the emitted + // calling sequence will make use of another slow path + // (ReadBarrierForRootSlowPathARM64 for HInvokeStaticOrDirect, + // ReadBarrierSlowPathARM64 for HInvokeVirtual). So we bail + // out in this case. + // + // TODO: Find a way to have intrinsics work with read barriers. + invoke->SetLocations(nullptr); + return false; + } + return res->Intrinsified(); } #define __ masm-> @@ -818,9 +834,12 @@ static void GenUnsafeGet(HInvoke* invoke, (type == Primitive::kPrimLong) || (type == Primitive::kPrimNot)); vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_; - Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. - Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. - Register trg = RegisterFrom(locations->Out(), type); + Location base_loc = locations->InAt(1); + Register base = WRegisterFrom(base_loc); // Object pointer. + Location offset_loc = locations->InAt(2); + Register offset = XRegisterFrom(offset_loc); // Long offset. + Location trg_loc = locations->Out(); + Register trg = RegisterFrom(trg_loc, type); bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease(); MemOperand mem_op(base.X(), offset); @@ -837,13 +856,18 @@ static void GenUnsafeGet(HInvoke* invoke, if (type == Primitive::kPrimNot) { DCHECK(trg.IsW()); - codegen->GetAssembler()->MaybeUnpoisonHeapReference(trg); + codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); } } static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + can_call ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -1057,6 +1081,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat if (use_acquire_release) { __ Bind(&loop_head); __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); + // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); @@ -1065,6 +1092,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ Dmb(InnerShareable, BarrierWrites); __ Bind(&loop_head); __ Ldxr(tmp_value, MemOperand(tmp_ptr)); + // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); __ Stxr(tmp_32, value, MemOperand(tmp_ptr)); @@ -1090,7 +1120,11 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic does not always work when heap // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it // off temporarily as a quick fix. + // // TODO(rpl): Fix it and turn it back on. + // + // TODO(rpl): Also, we should investigate whether we need a read + // barrier in the generated code. if (kPoisonHeapReferences) { return; } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index a94e3a8c23..326844526e 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -138,6 +138,221 @@ bool IntrinsicLocationsBuilderMIPS::TryDispatch(HInvoke* invoke) { #define __ assembler-> +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { + FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); + + if (is64bit) { + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + __ Mfc1(out_lo, in); + __ Mfhc1(out_hi, in); + } else { + Register out = locations->Out().AsRegister<Register>(); + + __ Mfc1(out, in); + } +} + +// long java.lang.Double.doubleToRawLongBits(double) +void IntrinsicLocationsBuilderMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); +} + +// int java.lang.Float.floatToRawIntBits(float) +void IntrinsicLocationsBuilderMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + + if (is64bit) { + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + + __ Mtc1(in_lo, out); + __ Mthc1(in_hi, out); + } else { + Register in = locations->InAt(0).AsRegister<Register>(); + + __ Mtc1(in, out); + } +} + +// double java.lang.Double.longBitsToDouble(long) +void IntrinsicLocationsBuilderMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); +} + +// float java.lang.Float.intBitsToFloat(int) +void IntrinsicLocationsBuilderMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void GenReverseBytes(LocationSummary* locations, + Primitive::Type type, + MipsAssembler* assembler, + bool isR2OrNewer) { + DCHECK(type == Primitive::kPrimShort || + type == Primitive::kPrimInt || + type == Primitive::kPrimLong); + + if (type == Primitive::kPrimShort) { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (isR2OrNewer) { + __ Wsbh(out, in); + __ Seh(out, out); + } else { + __ Sll(TMP, in, 24); + __ Sra(TMP, TMP, 16); + __ Sll(out, in, 16); + __ Srl(out, out, 24); + __ Or(out, out, TMP); + } + } else if (type == Primitive::kPrimInt) { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (isR2OrNewer) { + __ Rotr(out, in, 16); + __ Wsbh(out, out); + } else { + // MIPS32r1 + // __ Rotr(out, in, 16); + __ Sll(TMP, in, 16); + __ Srl(out, in, 16); + __ Or(out, out, TMP); + // __ Wsbh(out, out); + __ LoadConst32(AT, 0x00FF00FF); + __ And(TMP, out, AT); + __ Sll(TMP, TMP, 8); + __ Srl(out, out, 8); + __ And(out, out, AT); + __ Or(out, out, TMP); + } + } else if (type == Primitive::kPrimLong) { + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + if (isR2OrNewer) { + __ Rotr(AT, in_hi, 16); + __ Rotr(TMP, in_lo, 16); + __ Wsbh(out_lo, AT); + __ Wsbh(out_hi, TMP); + } else { + // When calling CreateIntToIntLocations() we promised that the + // use of the out_lo/out_hi wouldn't overlap with the use of + // in_lo/in_hi. Be very careful not to write to out_lo/out_hi + // until we're completely done reading from in_lo/in_hi. + // __ Rotr(TMP, in_lo, 16); + __ Sll(TMP, in_lo, 16); + __ Srl(AT, in_lo, 16); + __ Or(TMP, TMP, AT); // Hold in TMP until it's safe + // to write to out_hi. + // __ Rotr(out_lo, in_hi, 16); + __ Sll(AT, in_hi, 16); + __ Srl(out_lo, in_hi, 16); // Here we are finally done reading + // from in_lo/in_hi so it's okay to + // write to out_lo/out_hi. + __ Or(out_lo, out_lo, AT); + // __ Wsbh(out_hi, out_hi); + __ LoadConst32(AT, 0x00FF00FF); + __ And(out_hi, TMP, AT); + __ Sll(out_hi, out_hi, 8); + __ Srl(TMP, TMP, 8); + __ And(TMP, TMP, AT); + __ Or(out_hi, out_hi, TMP); + // __ Wsbh(out_lo, out_lo); + __ And(TMP, out_lo, AT); // AT already holds the correct mask value + __ Sll(TMP, TMP, 8); + __ Srl(out_lo, out_lo, 8); + __ And(out_lo, out_lo, AT); + __ Or(out_lo, out_lo, TMP); + } + } +} + +// int java.lang.Integer.reverseBytes(int) +void IntrinsicLocationsBuilderMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), + Primitive::kPrimInt, + GetAssembler(), + codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()); +} + +// long java.lang.Long.reverseBytes(long) +void IntrinsicLocationsBuilderMIPS::VisitLongReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), + Primitive::kPrimLong, + GetAssembler(), + codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()); +} + +// short java.lang.Short.reverseBytes(short) +void IntrinsicLocationsBuilderMIPS::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), + Primitive::kPrimShort, + GetAssembler(), + codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2()); +} + // boolean java.lang.String.equals(Object anObject) void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, @@ -250,15 +465,8 @@ void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) UNIMPLEMENTED_INTRINSIC(IntegerReverse) UNIMPLEMENTED_INTRINSIC(LongReverse) -UNIMPLEMENTED_INTRINSIC(ShortReverseBytes) -UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes) -UNIMPLEMENTED_INTRINSIC(LongReverseBytes) UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros) UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros) -UNIMPLEMENTED_INTRINSIC(FloatIntBitsToFloat) -UNIMPLEMENTED_INTRINSIC(DoubleLongBitsToDouble) -UNIMPLEMENTED_INTRINSIC(FloatFloatToRawIntBits) -UNIMPLEMENTED_INTRINSIC(DoubleDoubleToRawLongBits) UNIMPLEMENTED_INTRINSIC(MathAbsDouble) UNIMPLEMENTED_INTRINSIC(MathAbsFloat) UNIMPLEMENTED_INTRINSIC(MathAbsInt) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index ff843ebb1e..ecee11dea6 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -115,7 +115,7 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { } RestoreLiveRegisters(codegen, invoke_->GetLocations()); - __ B(GetExitLabel()); + __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS64"; } @@ -806,7 +806,7 @@ static void GenRoundingMode(LocationSummary* locations, DCHECK_NE(in, out); - Label done; + Mips64Label done; // double floor/ceil(double in) { // if in.isNaN || in.isInfinite || in.isZero { @@ -1256,7 +1256,7 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); // result = tmp_value != 0; - Label loop_head, exit_loop; + Mips64Label loop_head, exit_loop; __ Daddu(TMP, base, offset); __ Sync(0); __ Bind(&loop_head); @@ -1391,6 +1391,108 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +// boolean java.lang.String.equals(Object anObject) +void IntrinsicLocationsBuilderMIPS64::VisitStringEquals(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); + + // Temporary registers to store lengths of strings and for calculations. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + GpuRegister str = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister arg = locations->InAt(1).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>(); + GpuRegister temp2 = locations->GetTemp(1).AsRegister<GpuRegister>(); + GpuRegister temp3 = locations->GetTemp(2).AsRegister<GpuRegister>(); + + Mips64Label loop; + Mips64Label end; + Mips64Label return_true; + Mips64Label return_false; + + // Get offsets of count, value, and class fields within a string object. + const int32_t count_offset = mirror::String::CountOffset().Int32Value(); + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + const int32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // If the register containing the pointer to "this", and the register + // containing the pointer to "anObject" are the same register then + // "this", and "anObject" are the same object and we can + // short-circuit the logic to a true result. + if (str == arg) { + __ LoadConst64(out, 1); + return; + } + + // Check if input is null, return false if it is. + __ Beqzc(arg, &return_false); + + // Reference equality check, return true if same reference. + __ Beqc(str, arg, &return_true); + + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ Lw(temp1, str, class_offset); + __ Lw(temp2, arg, class_offset); + __ Bnec(temp1, temp2, &return_false); + + // Load lengths of this and argument strings. + __ Lw(temp1, str, count_offset); + __ Lw(temp2, arg, count_offset); + // Check if lengths are equal, return false if they're not. + __ Bnec(temp1, temp2, &return_false); + // Return true if both strings are empty. + __ Beqzc(temp1, &return_true); + + // Don't overwrite input registers + __ Move(TMP, str); + __ Move(temp3, arg); + + // Assertions that must hold in order to compare strings 4 characters at a time. + DCHECK_ALIGNED(value_offset, 8); + static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); + + // Loop to compare strings 4 characters at a time starting at the beginning of the string. + // Ok to do this because strings are zero-padded to be 8-byte aligned. + __ Bind(&loop); + __ Ld(out, TMP, value_offset); + __ Ld(temp2, temp3, value_offset); + __ Bnec(out, temp2, &return_false); + __ Daddiu(TMP, TMP, 8); + __ Daddiu(temp3, temp3, 8); + __ Addiu(temp1, temp1, -4); + __ Bgtzc(temp1, &loop); + + // Return true and exit the function. + // If loop does not result in returning false, we return true. + __ Bind(&return_true); + __ LoadConst64(out, 1); + __ Bc(&end); + + // Return false and exit the function. + __ Bind(&return_false); + __ LoadConst64(out, 0); + __ Bind(&end); +} + static void GenerateStringIndexOf(HInvoke* invoke, Mips64Assembler* assembler, CodeGeneratorMIPS64* codegen, @@ -1412,7 +1514,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // full slow-path down and branch unconditionally. slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke); codegen->AddSlowPath(slow_path); - __ B(slow_path->GetEntryLabel()); + __ Bc(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); return; } @@ -1586,8 +1688,6 @@ void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(MathRoundFloat) -UNIMPLEMENTED_INTRINSIC(StringEquals) - UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 068d5db69c..5b89cfef5a 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -738,8 +738,6 @@ class LSEVisitor : public HGraphVisitor { } if (same_value || possibly_redundant) { possibly_removed_stores_.push_back(instruction); - // Same-value/singleton-field store shouldn't have a null check. - DCHECK(!ref->InputAt(0)->IsNullCheck()); } if (!same_value) { diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 73a44ee2cb..b5ac773505 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1890,7 +1890,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { * | * if_block * / \ - * dummy_block deopt_block + * true_block false_block * \ / * new_pre_header * | @@ -1898,62 +1898,73 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { */ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { DCHECK(header->IsLoopHeader()); - HBasicBlock* pre_header = header->GetDominator(); + HBasicBlock* old_pre_header = header->GetDominator(); - // Need this to avoid critical edge. + // Need extra block to avoid critical edge. HBasicBlock* if_block = new (arena_) HBasicBlock(this, header->GetDexPc()); - // Need this to avoid critical edge. - HBasicBlock* dummy_block = new (arena_) HBasicBlock(this, header->GetDexPc()); - HBasicBlock* deopt_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* true_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* false_block = new (arena_) HBasicBlock(this, header->GetDexPc()); HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); AddBlock(if_block); - AddBlock(dummy_block); - AddBlock(deopt_block); + AddBlock(true_block); + AddBlock(false_block); AddBlock(new_pre_header); - header->ReplacePredecessor(pre_header, new_pre_header); - pre_header->successors_.clear(); - pre_header->dominated_blocks_.clear(); - - pre_header->AddSuccessor(if_block); - if_block->AddSuccessor(dummy_block); // True successor - if_block->AddSuccessor(deopt_block); // False successor - dummy_block->AddSuccessor(new_pre_header); - deopt_block->AddSuccessor(new_pre_header); - - pre_header->dominated_blocks_.push_back(if_block); - if_block->SetDominator(pre_header); - if_block->dominated_blocks_.push_back(dummy_block); - dummy_block->SetDominator(if_block); - if_block->dominated_blocks_.push_back(deopt_block); - deopt_block->SetDominator(if_block); + header->ReplacePredecessor(old_pre_header, new_pre_header); + old_pre_header->successors_.clear(); + old_pre_header->dominated_blocks_.clear(); + + old_pre_header->AddSuccessor(if_block); + if_block->AddSuccessor(true_block); // True successor + if_block->AddSuccessor(false_block); // False successor + true_block->AddSuccessor(new_pre_header); + false_block->AddSuccessor(new_pre_header); + + old_pre_header->dominated_blocks_.push_back(if_block); + if_block->SetDominator(old_pre_header); + if_block->dominated_blocks_.push_back(true_block); + true_block->SetDominator(if_block); + if_block->dominated_blocks_.push_back(false_block); + false_block->SetDominator(if_block); if_block->dominated_blocks_.push_back(new_pre_header); new_pre_header->SetDominator(if_block); new_pre_header->dominated_blocks_.push_back(header); header->SetDominator(new_pre_header); + // Fix reverse post order. size_t index_of_header = IndexOfElement(reverse_post_order_, header); MakeRoomFor(&reverse_post_order_, 4, index_of_header - 1); reverse_post_order_[index_of_header++] = if_block; - reverse_post_order_[index_of_header++] = dummy_block; - reverse_post_order_[index_of_header++] = deopt_block; + reverse_post_order_[index_of_header++] = true_block; + reverse_post_order_[index_of_header++] = false_block; reverse_post_order_[index_of_header++] = new_pre_header; - HLoopInformation* info = pre_header->GetLoopInformation(); - if (info != nullptr) { - if_block->SetLoopInformation(info); - dummy_block->SetLoopInformation(info); - deopt_block->SetLoopInformation(info); - new_pre_header->SetLoopInformation(info); - for (HLoopInformationOutwardIterator loop_it(*pre_header); + // Fix loop information. + HLoopInformation* loop_info = old_pre_header->GetLoopInformation(); + if (loop_info != nullptr) { + if_block->SetLoopInformation(loop_info); + true_block->SetLoopInformation(loop_info); + false_block->SetLoopInformation(loop_info); + new_pre_header->SetLoopInformation(loop_info); + // Add blocks to all enveloping loops. + for (HLoopInformationOutwardIterator loop_it(*old_pre_header); !loop_it.Done(); loop_it.Advance()) { loop_it.Current()->Add(if_block); - loop_it.Current()->Add(dummy_block); - loop_it.Current()->Add(deopt_block); + loop_it.Current()->Add(true_block); + loop_it.Current()->Add(false_block); loop_it.Current()->Add(new_pre_header); } } + + // Fix try/catch information. + TryCatchInformation* try_catch_info = old_pre_header->IsTryBlock() + ? old_pre_header->GetTryCatchInformation() + : nullptr; + if_block->SetTryCatchInformation(try_catch_info); + true_block->SetTryCatchInformation(try_catch_info); + false_block->SetTryCatchInformation(try_catch_info); + new_pre_header->SetTryCatchInformation(try_catch_info); } void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) { @@ -2068,6 +2079,19 @@ void HInvokeStaticOrDirect::RemoveInputAt(size_t index) { } } +std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs) { + switch (rhs) { + case HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit: + return os << "explicit"; + case HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit: + return os << "implicit"; + case HInvokeStaticOrDirect::ClinitCheckRequirement::kNone: + return os << "none"; + default: + return os << "unknown:" << static_cast<int>(rhs); + } +} + void HInstruction::RemoveEnvironmentUsers() { for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) { HUseListNode<HEnvironment*>* user_node = use_it.Current(); @@ -2077,4 +2101,46 @@ void HInstruction::RemoveEnvironmentUsers() { env_uses_.Clear(); } +// Returns an instruction with the opposite boolean value from 'cond'. +HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction* cursor) { + ArenaAllocator* allocator = GetArena(); + + if (cond->IsCondition() && + !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType())) { + // Can't reverse floating point conditions. We have to use HBooleanNot in that case. + HInstruction* lhs = cond->InputAt(0); + HInstruction* rhs = cond->InputAt(1); + HInstruction* replacement = nullptr; + switch (cond->AsCondition()->GetOppositeCondition()) { // get *opposite* + case kCondEQ: replacement = new (allocator) HEqual(lhs, rhs); break; + case kCondNE: replacement = new (allocator) HNotEqual(lhs, rhs); break; + case kCondLT: replacement = new (allocator) HLessThan(lhs, rhs); break; + case kCondLE: replacement = new (allocator) HLessThanOrEqual(lhs, rhs); break; + case kCondGT: replacement = new (allocator) HGreaterThan(lhs, rhs); break; + case kCondGE: replacement = new (allocator) HGreaterThanOrEqual(lhs, rhs); break; + case kCondB: replacement = new (allocator) HBelow(lhs, rhs); break; + case kCondBE: replacement = new (allocator) HBelowOrEqual(lhs, rhs); break; + case kCondA: replacement = new (allocator) HAbove(lhs, rhs); break; + case kCondAE: replacement = new (allocator) HAboveOrEqual(lhs, rhs); break; + default: + LOG(FATAL) << "Unexpected condition"; + UNREACHABLE(); + } + cursor->GetBlock()->InsertInstructionBefore(replacement, cursor); + return replacement; + } else if (cond->IsIntConstant()) { + HIntConstant* int_const = cond->AsIntConstant(); + if (int_const->IsZero()) { + return GetIntConstant(1); + } else { + DCHECK(int_const->IsOne()); + return GetIntConstant(0); + } + } else { + HInstruction* replacement = new (allocator) HBooleanNot(cond); + cursor->GetBlock()->InsertInstructionBefore(replacement, cursor); + return replacement; + } +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index e3c810e6b1..d5110a7172 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -371,6 +371,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } + // Returns an instruction with the opposite boolean value from 'cond'. + // The instruction has been inserted into the graph, either as a constant, or + // before cursor. + HInstruction* InsertOppositeCondition(HInstruction* cond, HInstruction* cursor); + private: void FindBackEdges(ArenaBitVector* visited); void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; @@ -1096,7 +1101,9 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) #else #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ - M(Arm64IntermediateAddress, Instruction) + M(Arm64DataProcWithShifterOp, Instruction) \ + M(Arm64IntermediateAddress, Instruction) \ + M(Arm64MultiplyAccumulate, Instruction) #endif #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) @@ -1626,6 +1633,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { return holder_; } + + bool IsFromInlinedInvoke() const { + return GetParent() != nullptr; + } + private: // Record instructions' use entries of this environment for constant-time removal. // It should only be called by HInstruction when a new environment use is added. @@ -3238,7 +3250,7 @@ class HInvoke : public HInstruction { void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache); bool IsFromInlinedInvoke() const { - return GetEnvironment()->GetParent() != nullptr; + return GetEnvironment()->IsFromInlinedInvoke(); } bool CanThrow() const OVERRIDE { return true; } @@ -3434,14 +3446,19 @@ class HInvokeStaticOrDirect : public HInvoke { DCHECK(had_current_method_input || !needs_current_method_input); if (had_current_method_input && !needs_current_method_input) { - DCHECK_EQ(InputAt(GetCurrentMethodInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod()); - RemoveInputAt(GetCurrentMethodInputIndex()); + DCHECK_EQ(InputAt(GetSpecialInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod()); + RemoveInputAt(GetSpecialInputIndex()); } dispatch_info_ = dispatch_info; } - void InsertInputAt(size_t index, HInstruction* input); - void RemoveInputAt(size_t index); + void AddSpecialInput(HInstruction* input) { + // We allow only one special input. + DCHECK(!IsStringInit() && !HasCurrentMethodInput()); + DCHECK(InputCount() == GetSpecialInputIndex() || + (InputCount() == GetSpecialInputIndex() + 1 && IsStaticWithExplicitClinitCheck())); + InsertInputAt(GetSpecialInputIndex(), input); + } bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE { // We access the method via the dex cache so we can't do an implicit null check. @@ -3453,13 +3470,20 @@ class HInvokeStaticOrDirect : public HInvoke { return return_type_ == Primitive::kPrimNot && !IsStringInit(); } + // Get the index of the special input, if any. + // + // If the invoke IsStringInit(), it initially has a HFakeString special argument + // which is removed by the instruction simplifier; if the invoke HasCurrentMethodInput(), + // the "special input" is the current method pointer; otherwise there may be one + // platform-specific special input, such as PC-relative addressing base. + uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); } + InvokeType GetInvokeType() const { return invoke_type_; } MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; } CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; } bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; } bool NeedsDexCacheOfDeclaringClass() const OVERRIDE; bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; } - uint32_t GetCurrentMethodInputIndex() const { return GetNumberOfArguments(); } bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; } bool HasPcRelativeDexCache() const { return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; @@ -3467,11 +3491,11 @@ class HInvokeStaticOrDirect : public HInvoke { bool HasCurrentMethodInput() const { // This function can be called only after the invoke has been fully initialized by the builder. if (NeedsCurrentMethodInput(GetMethodLoadKind())) { - DCHECK(InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod()); + DCHECK(InputAt(GetSpecialInputIndex())->IsCurrentMethod()); return true; } else { - DCHECK(InputCount() == GetCurrentMethodInputIndex() || - !InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod()); + DCHECK(InputCount() == GetSpecialInputIndex() || + !InputAt(GetSpecialInputIndex())->IsCurrentMethod()); return false; } } @@ -3505,20 +3529,19 @@ class HInvokeStaticOrDirect : public HInvoke { return GetInvokeType() == kStatic; } - // Remove the art::HLoadClass instruction set as last input by - // art::PrepareForRegisterAllocation::VisitClinitCheck in lieu of - // the initial art::HClinitCheck instruction (only relevant for - // static calls with explicit clinit check). - void RemoveLoadClassAsLastInput() { + // Remove the HClinitCheck or the replacement HLoadClass (set as last input by + // PrepareForRegisterAllocation::VisitClinitCheck() in lieu of the initial HClinitCheck) + // instruction; only relevant for static calls with explicit clinit check. + void RemoveExplicitClinitCheck(ClinitCheckRequirement new_requirement) { DCHECK(IsStaticWithExplicitClinitCheck()); size_t last_input_index = InputCount() - 1; HInstruction* last_input = InputAt(last_input_index); DCHECK(last_input != nullptr); - DCHECK(last_input->IsLoadClass()) << last_input->DebugName(); + DCHECK(last_input->IsLoadClass() || last_input->IsClinitCheck()) << last_input->DebugName(); RemoveAsUserOfInput(last_input_index); inputs_.pop_back(); - clinit_check_requirement_ = ClinitCheckRequirement::kImplicit; - DCHECK(IsStaticWithImplicitClinitCheck()); + clinit_check_requirement_ = new_requirement; + DCHECK(!IsStaticWithExplicitClinitCheck()); } bool IsStringFactoryFor(HFakeString* str) const { @@ -3539,7 +3562,7 @@ class HInvokeStaticOrDirect : public HInvoke { } // Is this a call to a static method whose declaring class has an - // explicit intialization check in the graph? + // explicit initialization check in the graph? bool IsStaticWithExplicitClinitCheck() const { return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kExplicit); } @@ -3572,6 +3595,9 @@ class HInvokeStaticOrDirect : public HInvoke { return input_record; } + void InsertInputAt(size_t index, HInstruction* input); + void RemoveInputAt(size_t index); + private: const InvokeType invoke_type_; ClinitCheckRequirement clinit_check_requirement_; @@ -3583,6 +3609,7 @@ class HInvokeStaticOrDirect : public HInvoke { DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect); }; +std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs); class HInvokeVirtual : public HInvoke { public: @@ -3637,9 +3664,10 @@ class HInvokeInterface : public HInvoke { DISALLOW_COPY_AND_ASSIGN(HInvokeInterface); }; -class HNewInstance : public HExpression<1> { +class HNewInstance : public HExpression<2> { public: - HNewInstance(HCurrentMethod* current_method, + HNewInstance(HInstruction* cls, + HCurrentMethod* current_method, uint32_t dex_pc, uint16_t type_index, const DexFile& dex_file, @@ -3652,7 +3680,8 @@ class HNewInstance : public HExpression<1> { can_throw_(can_throw), finalizable_(finalizable), entrypoint_(entrypoint) { - SetRawInputAt(0, current_method); + SetRawInputAt(0, cls); + SetRawInputAt(1, current_method); } uint16_t GetTypeIndex() const { return type_index_; } @@ -3672,6 +3701,10 @@ class HNewInstance : public HExpression<1> { QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; } + void SetEntrypoint(QuickEntrypointEnum entrypoint) { + entrypoint_ = entrypoint; + } + DECLARE_INSTRUCTION(NewInstance); private: @@ -3679,7 +3712,7 @@ class HNewInstance : public HExpression<1> { const DexFile& dex_file_; const bool can_throw_; const bool finalizable_; - const QuickEntrypointEnum entrypoint_; + QuickEntrypointEnum entrypoint_; DISALLOW_COPY_AND_ASSIGN(HNewInstance); }; @@ -4287,9 +4320,13 @@ class HPhi : public HInstruction { : HInstruction(SideEffects::None(), dex_pc), inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)), reg_number_(reg_number), - type_(type), - is_live_(false), + type_(ToPhiType(type)), + // Phis are constructed live and marked dead if conflicting or unused. + // Individual steps of SsaBuilder should assume that if a phi has been + // marked dead, it can be ignored and will be removed by SsaPhiElimination. + is_live_(true), can_be_null_(true) { + DCHECK_NE(type_, Primitive::kPrimVoid); } // Returns a type equivalent to the given `type`, but that a `HPhi` can hold. @@ -4760,13 +4797,15 @@ class HLoadClass : public HExpression<1> { const DexFile& dex_file, bool is_referrers_class, uint32_t dex_pc, - bool needs_access_check) + bool needs_access_check, + bool is_in_dex_cache) : HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc), type_index_(type_index), dex_file_(dex_file), is_referrers_class_(is_referrers_class), generate_clinit_check_(false), needs_access_check_(needs_access_check), + is_in_dex_cache_(is_in_dex_cache), loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { // Referrers class should not need access check. We never inline unverified // methods so we can't possibly end up in this situation. @@ -4791,14 +4830,13 @@ class HLoadClass : public HExpression<1> { bool CanBeNull() const OVERRIDE { return false; } bool NeedsEnvironment() const OVERRIDE { - // Will call runtime and load the class if the class is not loaded yet. - // TODO: finer grain decision. - return !is_referrers_class_; + return CanCallRuntime(); } bool MustGenerateClinitCheck() const { return generate_clinit_check_; } + void SetMustGenerateClinitCheck(bool generate_clinit_check) { // The entrypoint the code generator is going to call does not do // clinit of the class. @@ -4807,7 +4845,9 @@ class HLoadClass : public HExpression<1> { } bool CanCallRuntime() const { - return MustGenerateClinitCheck() || !is_referrers_class_ || needs_access_check_; + return MustGenerateClinitCheck() || + (!is_referrers_class_ && !is_in_dex_cache_) || + needs_access_check_; } bool NeedsAccessCheck() const { @@ -4815,8 +4855,6 @@ class HLoadClass : public HExpression<1> { } bool CanThrow() const OVERRIDE { - // May call runtime and and therefore can throw. - // TODO: finer grain decision. return CanCallRuntime(); } @@ -4838,6 +4876,8 @@ class HLoadClass : public HExpression<1> { return SideEffects::CanTriggerGC(); } + bool IsInDexCache() const { return is_in_dex_cache_; } + DECLARE_INSTRUCTION(LoadClass); private: @@ -4847,7 +4887,8 @@ class HLoadClass : public HExpression<1> { // Whether this instruction must generate the initialization check. // Used for code generation. bool generate_clinit_check_; - bool needs_access_check_; + const bool needs_access_check_; + const bool is_in_dex_cache_; ReferenceTypeInfo loaded_class_rti_; @@ -4912,6 +4953,7 @@ class HClinitCheck : public HExpression<1> { return true; } + bool CanThrow() const OVERRIDE { return true; } HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); } diff --git a/compiler/optimizing/nodes_arm64.cc b/compiler/optimizing/nodes_arm64.cc new file mode 100644 index 0000000000..ac2f093847 --- /dev/null +++ b/compiler/optimizing/nodes_arm64.cc @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common_arm64.h" +#include "nodes.h" + +namespace art { + +using arm64::helpers::CanFitInShifterOperand; + +void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction, + /*out*/OpKind* op_kind, + /*out*/int* shift_amount) { + DCHECK(CanFitInShifterOperand(instruction)); + if (instruction->IsShl()) { + *op_kind = kLSL; + *shift_amount = instruction->AsShl()->GetRight()->AsIntConstant()->GetValue(); + } else if (instruction->IsShr()) { + *op_kind = kASR; + *shift_amount = instruction->AsShr()->GetRight()->AsIntConstant()->GetValue(); + } else if (instruction->IsUShr()) { + *op_kind = kLSR; + *shift_amount = instruction->AsUShr()->GetRight()->AsIntConstant()->GetValue(); + } else { + DCHECK(instruction->IsTypeConversion()); + Primitive::Type result_type = instruction->AsTypeConversion()->GetResultType(); + Primitive::Type input_type = instruction->AsTypeConversion()->GetInputType(); + int result_size = Primitive::ComponentSize(result_type); + int input_size = Primitive::ComponentSize(input_type); + int min_size = std::min(result_size, input_size); + // This follows the logic in + // `InstructionCodeGeneratorARM64::VisitTypeConversion()`. + if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { + // There is actually nothing to do. The register will be used as a W + // register, discarding the top bits. This is represented by the default + // encoding 'LSL 0'. + *op_kind = kLSL; + *shift_amount = 0; + } else if (result_type == Primitive::kPrimChar || + (input_type == Primitive::kPrimChar && input_size < result_size)) { + *op_kind = kUXTH; + } else { + switch (min_size) { + case 1: *op_kind = kSXTB; break; + case 2: *op_kind = kSXTH; break; + case 4: *op_kind = kSXTW; break; + default: + LOG(FATAL) << "Unexpected min size " << min_size; + } + } + } +} + +std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op) { + switch (op) { + case HArm64DataProcWithShifterOp::kLSL: return os << "LSL"; + case HArm64DataProcWithShifterOp::kLSR: return os << "LSR"; + case HArm64DataProcWithShifterOp::kASR: return os << "ASR"; + case HArm64DataProcWithShifterOp::kUXTB: return os << "UXTB"; + case HArm64DataProcWithShifterOp::kUXTH: return os << "UXTH"; + case HArm64DataProcWithShifterOp::kUXTW: return os << "UXTW"; + case HArm64DataProcWithShifterOp::kSXTB: return os << "SXTB"; + case HArm64DataProcWithShifterOp::kSXTH: return os << "SXTH"; + case HArm64DataProcWithShifterOp::kSXTW: return os << "SXTW"; + default: + LOG(FATAL) << "Invalid OpKind " << static_cast<int>(op); + UNREACHABLE(); + } +} + +} // namespace art diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h index 885d3a29ee..e8439354af 100644 --- a/compiler/optimizing/nodes_arm64.h +++ b/compiler/optimizing/nodes_arm64.h @@ -19,6 +19,79 @@ namespace art { +class HArm64DataProcWithShifterOp : public HExpression<2> { + public: + enum OpKind { + kLSL, // Logical shift left. + kLSR, // Logical shift right. + kASR, // Arithmetic shift right. + kUXTB, // Unsigned extend byte. + kUXTH, // Unsigned extend half-word. + kUXTW, // Unsigned extend word. + kSXTB, // Signed extend byte. + kSXTH, // Signed extend half-word. + kSXTW, // Signed extend word. + + // Aliases. + kFirstShiftOp = kLSL, + kLastShiftOp = kASR, + kFirstExtensionOp = kUXTB, + kLastExtensionOp = kSXTW + }; + HArm64DataProcWithShifterOp(HInstruction* instr, + HInstruction* left, + HInstruction* right, + OpKind op, + // The shift argument is unused if the operation + // is an extension. + int shift = 0, + uint32_t dex_pc = kNoDexPc) + : HExpression(instr->GetType(), SideEffects::None(), dex_pc), + instr_kind_(instr->GetKind()), op_kind_(op), shift_amount_(shift) { + DCHECK(!instr->HasSideEffects()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other_instr) const OVERRIDE { + HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp(); + return instr_kind_ == other->instr_kind_ && + op_kind_ == other->op_kind_ && + shift_amount_ == other->shift_amount_; + } + + static bool IsShiftOp(OpKind op_kind) { + return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp; + } + + static bool IsExtensionOp(OpKind op_kind) { + return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp; + } + + // Find the operation kind and shift amount from a bitfield move instruction. + static void GetOpInfoFromInstruction(HInstruction* bitfield_op, + /*out*/OpKind* op_kind, + /*out*/int* shift_amount); + + InstructionKind GetInstrKind() const { return instr_kind_; } + OpKind GetOpKind() const { return op_kind_; } + int GetShiftAmount() const { return shift_amount_; } + + DECLARE_INSTRUCTION(Arm64DataProcWithShifterOp); + + private: + InstructionKind instr_kind_; + OpKind op_kind_; + int shift_amount_; + + friend std::ostream& operator<<(std::ostream& os, OpKind op); + + DISALLOW_COPY_AND_ASSIGN(HArm64DataProcWithShifterOp); +}; + +std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op); + // This instruction computes an intermediate address pointing in the 'middle' of an object. The // result pointer cannot be handled by GC, so extra care is taken to make sure that this value is // never used across anything that can trigger GC. @@ -42,6 +115,40 @@ class HArm64IntermediateAddress : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress); }; +class HArm64MultiplyAccumulate : public HExpression<3> { + public: + HArm64MultiplyAccumulate(Primitive::Type type, + InstructionKind op, + HInstruction* accumulator, + HInstruction* mul_left, + HInstruction* mul_right, + uint32_t dex_pc = kNoDexPc) + : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) { + SetRawInputAt(kInputAccumulatorIndex, accumulator); + SetRawInputAt(kInputMulLeftIndex, mul_left); + SetRawInputAt(kInputMulRightIndex, mul_right); + } + + static constexpr int kInputAccumulatorIndex = 0; + static constexpr int kInputMulLeftIndex = 1; + static constexpr int kInputMulRightIndex = 2; + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_; + } + + InstructionKind GetOpKind() const { return op_kind_; } + + DECLARE_INSTRUCTION(Arm64MultiplyAccumulate); + + private: + // Indicates if this is a MADD or MSUB. + InstructionKind op_kind_; + + DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index 34f1fe5949..2b0d522b31 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -25,6 +25,7 @@ #include "utils/assembler.h" #include "utils/arm/assembler_thumb2.h" #include "utils/mips/assembler_mips.h" +#include "utils/mips64/assembler_mips64.h" #include "optimizing/optimizing_cfi_test_expected.inc" @@ -212,6 +213,34 @@ TEST_F(OptimizingCFITest, kMipsAdjust) { Check(kMips, "kMips_adjust", expected_asm, expected_cfi); } +TEST_F(OptimizingCFITest, kMips64Adjust) { + // One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum. + static constexpr size_t kNumNops = 1u + (1u << 15); + std::vector<uint8_t> expected_asm( + expected_asm_kMips64_adjust_head, + expected_asm_kMips64_adjust_head + arraysize(expected_asm_kMips64_adjust_head)); + expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u); + expected_asm.insert( + expected_asm.end(), + expected_asm_kMips64_adjust_tail, + expected_asm_kMips64_adjust_tail + arraysize(expected_asm_kMips64_adjust_tail)); + std::vector<uint8_t> expected_cfi( + expected_cfi_kMips64_adjust, + expected_cfi_kMips64_adjust + arraysize(expected_cfi_kMips64_adjust)); + SetUpFrame(kMips64); +#define __ down_cast<mips64::Mips64Assembler*>(GetCodeGenerator()->GetAssembler())-> + mips64::Mips64Label target; + __ Beqc(mips64::A1, mips64::A2, &target); + // Push the target out of range of BEQC. + for (size_t i = 0; i != kNumNops; ++i) { + __ Nop(); + } + __ Bind(&target); +#undef __ + Finish(); + Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi); +} + #endif // __ANDROID__ } // namespace art diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 4571ebf2d4..de857295c7 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -413,3 +413,57 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = { // 0x0002007c: nop // 0x00020080: .cfi_restore_state // 0x00020080: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips64_adjust_head[] = { + 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF, + 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7, + 0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x02, 0x00, 0xA6, 0x60, + 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8, +}; +static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = { + 0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, + 0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, + 0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips64_adjust[] = { + 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06, + 0x4C, 0x0E, 0x40, 0x04, 0x14, 0x00, 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, + 0x4C, 0xD0, 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, + 0x40, +}; +// 0x00000000: daddiu r29, r29, -40 +// 0x00000004: .cfi_def_cfa_offset: 40 +// 0x00000004: sd r31, +32(r29) +// 0x00000008: .cfi_offset: r31 at cfa-8 +// 0x00000008: sd r17, +24(r29) +// 0x0000000c: .cfi_offset: r17 at cfa-16 +// 0x0000000c: sd r16, +16(r29) +// 0x00000010: .cfi_offset: r16 at cfa-24 +// 0x00000010: sdc1 f25, +8(r29) +// 0x00000014: sdc1 f24, +0(r29) +// 0x00000018: daddiu r29, r29, -24 +// 0x0000001c: .cfi_def_cfa_offset: 64 +// 0x0000001c: sd r4, +0(r29) +// 0x00000020: bnec r5, r6, 0x0000002c ; +12 +// 0x00000024: auipc r1, 2 +// 0x00000028: jic r1, 12 ; b 0x00020030 ; +131080 +// 0x0000002c: nop +// ... +// 0x0002002c: nop +// 0x00020030: .cfi_remember_state +// 0x00020030: daddiu r29, r29, 24 +// 0x00020034: .cfi_def_cfa_offset: 40 +// 0x00020034: ldc1 f24, +0(r29) +// 0x00020038: ldc1 f25, +8(r29) +// 0x0002003c: ld r16, +16(r29) +// 0x00020040: .cfi_restore: r16 +// 0x00020040: ld r17, +24(r29) +// 0x00020044: .cfi_restore: r17 +// 0x00020044: ld r31, +32(r29) +// 0x00020048: .cfi_restore: r31 +// 0x00020048: daddiu r29, r29, 40 +// 0x0002004c: .cfi_def_cfa_offset: 0 +// 0x0002004c: jr r31 +// 0x00020050: nop +// 0x00020054: .cfi_restore_state +// 0x00020054: .cfi_def_cfa_offset: 64 diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 27ee47296c..8440813a87 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -110,24 +110,23 @@ class PassScope; class PassObserver : public ValueObject { public: PassObserver(HGraph* graph, - const char* method_name, CodeGenerator* codegen, std::ostream* visualizer_output, CompilerDriver* compiler_driver) : graph_(graph), - method_name_(method_name), + cached_method_name_(), timing_logger_enabled_(compiler_driver->GetDumpPasses()), - timing_logger_(method_name, true, true), + timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true), disasm_info_(graph->GetArena()), visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()), visualizer_(visualizer_output, graph, *codegen), graph_in_bad_state_(false) { if (timing_logger_enabled_ || visualizer_enabled_) { - if (!IsVerboseMethod(compiler_driver, method_name)) { + if (!IsVerboseMethod(compiler_driver, GetMethodName())) { timing_logger_enabled_ = visualizer_enabled_ = false; } if (visualizer_enabled_) { - visualizer_.PrintHeader(method_name_); + visualizer_.PrintHeader(GetMethodName()); codegen->SetDisassemblyInformation(&disasm_info_); } } @@ -135,7 +134,7 @@ class PassObserver : public ValueObject { ~PassObserver() { if (timing_logger_enabled_) { - LOG(INFO) << "TIMINGS " << method_name_; + LOG(INFO) << "TIMINGS " << GetMethodName(); LOG(INFO) << Dumpable<TimingLogger>(timing_logger_); } } @@ -148,6 +147,14 @@ class PassObserver : public ValueObject { void SetGraphInBadState() { graph_in_bad_state_ = true; } + const char* GetMethodName() { + // PrettyMethod() is expensive, so we delay calling it until we actually have to. + if (cached_method_name_.empty()) { + cached_method_name_ = PrettyMethod(graph_->GetMethodIdx(), graph_->GetDexFile()); + } + return cached_method_name_.c_str(); + } + private: void StartPass(const char* pass_name) { // Dump graph first, then start timer. @@ -206,7 +213,8 @@ class PassObserver : public ValueObject { } HGraph* const graph_; - const char* method_name_; + + std::string cached_method_name_; bool timing_logger_enabled_; TimingLogger timing_logger_; @@ -383,10 +391,12 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) { || instruction_set == kX86_64; } -// Read barrier are supported only on x86 and x86-64 at the moment. +// Read barrier are supported on ARM, ARM64, x86 and x86-64 at the moment. // TODO: Add support for other architectures and remove this function static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) { - return instruction_set == kX86 + return instruction_set == kArm64 + || instruction_set == kThumb2 + || instruction_set == kX86 || instruction_set == kX86_64; } @@ -663,13 +673,12 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, jobject class_loader, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const { - std::string method_name = PrettyMethod(method_idx, dex_file); MaybeRecordStat(MethodCompilationStat::kAttemptCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); InstructionSet instruction_set = compiler_driver->GetInstructionSet(); - // Always use the thumb2 assembler: some runtime functionality (like implicit stack - // overflow checks) assume thumb2. + // Always use the Thumb-2 assembler: some runtime functionality + // (like implicit stack overflow checks) assume Thumb-2. if (instruction_set == kArm) { instruction_set = kThumb2; } @@ -727,7 +736,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()); PassObserver pass_observer(graph, - method_name.c_str(), codegen.get(), visualizer_output_.get(), compiler_driver); @@ -755,7 +763,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, interpreter_metadata, dex_cache); - VLOG(compiler) << "Building " << method_name; + VLOG(compiler) << "Building " << pass_observer.GetMethodName(); { PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer); @@ -765,13 +773,14 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, } } - VLOG(compiler) << "Optimizing " << method_name; + VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName(); if (run_optimizations_) { { PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer); if (!graph->TryBuildingSsa()) { // We could not transform the graph to SSA, bailout. - LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop"; + LOG(INFO) << "Skipping compilation of " << pass_observer.GetMethodName() + << ": it contains a non natural loop"; MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA); pass_observer.SetGraphInBadState(); return nullptr; diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index c2894c7338..808a1dc6c2 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -113,9 +113,8 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) { InitializePCRelativeBasePointer(invoke); // Add the extra parameter base_. - uint32_t index = invoke_static_or_direct->GetCurrentMethodInputIndex(); DCHECK(!invoke_static_or_direct->HasCurrentMethodInput()); - invoke_static_or_direct->InsertInputAt(index, base_); + invoke_static_or_direct->AddSpecialInput(base_); } // Ensure that we can load FP arguments from the constant area. for (size_t i = 0, e = invoke->InputCount(); i < e; i++) { diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index ca928ae0f2..d1770b75ab 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -48,16 +48,85 @@ void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) { } void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { - HLoadClass* cls = check->GetLoadClass(); - check->ReplaceWith(cls); - if (check->GetPrevious() == cls) { + // Try to find a static invoke or a new-instance from which this check originated. + HInstruction* implicit_clinit = nullptr; + for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); it.Advance()) { + HInstruction* user = it.Current()->GetUser(); + if ((user->IsInvokeStaticOrDirect() || user->IsNewInstance()) && + CanMoveClinitCheck(check, user)) { + implicit_clinit = user; + if (user->IsInvokeStaticOrDirect()) { + DCHECK(user->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()); + user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck( + HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); + } else { + DCHECK(user->IsNewInstance()); + // We delegate the initialization duty to the allocation. + if (user->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectInitialized) { + user->AsNewInstance()->SetEntrypoint(kQuickAllocObjectResolved); + } + } + break; + } + } + // If we found a static invoke or new-instance for merging, remove the check + // from dominated static invokes. + if (implicit_clinit != nullptr) { + for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); ) { + HInstruction* user = it.Current()->GetUser(); + // All other uses must be dominated. + DCHECK(implicit_clinit->StrictlyDominates(user) || (implicit_clinit == user)); + it.Advance(); // Advance before we remove the node, reference to the next node is preserved. + if (user->IsInvokeStaticOrDirect()) { + user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck( + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + } + } + } + + HLoadClass* load_class = check->GetLoadClass(); + bool can_merge_with_load_class = CanMoveClinitCheck(load_class, check); + + check->ReplaceWith(load_class); + + if (implicit_clinit != nullptr) { + // Remove the check from the graph. It has been merged into the invoke or new-instance. + check->GetBlock()->RemoveInstruction(check); + // Check if we can merge the load class as well. + if (can_merge_with_load_class && !load_class->HasUses()) { + load_class->GetBlock()->RemoveInstruction(load_class); + } + } else if (can_merge_with_load_class) { // Pass the initialization duty to the `HLoadClass` instruction, // and remove the instruction from the graph. - cls->SetMustGenerateClinitCheck(true); + load_class->SetMustGenerateClinitCheck(true); check->GetBlock()->RemoveInstruction(check); } } +void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) { + HLoadClass* load_class = instruction->InputAt(0)->AsLoadClass(); + bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse(); + // Change the entrypoint to kQuickAllocObject if either: + // - the class is finalizable (only kQuickAllocObject handles finalizable classes), + // - the class needs access checks (we do not know if it's finalizable), + // - or the load class has only one use. + if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) { + instruction->SetEntrypoint(kQuickAllocObject); + instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0); + // The allocation entry point that deals with access checks does not work with inlined + // methods, so we need to check whether this allocation comes from an inlined method. + if (has_only_one_use && !instruction->GetEnvironment()->IsFromInlinedInvoke()) { + // We can remove the load class from the graph. If it needed access checks, we delegate + // the access check to the allocation. + if (load_class->NeedsAccessCheck()) { + instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck); + } + load_class->GetBlock()->RemoveInstruction(load_class); + } + } +} + void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { bool needs_materialization = false; if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) { @@ -86,30 +155,60 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire DCHECK(last_input != nullptr) << "Last input is not HLoadClass. It is " << last_input->DebugName(); - // Remove a load class instruction as last input of a static - // invoke, which has been added (along with a clinit check, - // removed by PrepareForRegisterAllocation::VisitClinitCheck - // previously) by the graph builder during the creation of the - // static invoke instruction, but is no longer required at this - // stage (i.e., after inlining has been performed). - invoke->RemoveLoadClassAsLastInput(); - - // The static call will initialize the class so there's no need for a clinit check if - // it's the first user. - // There is one special case where we still need the clinit check, when inlining. Because - // currently the callee is responsible for reporting parameters to the GC, the code - // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. - // Therefore we cannot allocate any object in that code, including loading a new class. - if (last_input == invoke->GetPrevious() && !invoke->IsFromInlinedInvoke()) { - last_input->SetMustGenerateClinitCheck(false); - - // If the load class instruction is no longer used, remove it from - // the graph. - if (!last_input->HasUses()) { - last_input->GetBlock()->RemoveInstruction(last_input); - } + // Detach the explicit class initialization check from the invoke. + // Keeping track of the initializing instruction is no longer required + // at this stage (i.e., after inlining has been performed). + invoke->RemoveExplicitClinitCheck(HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + + // Merging with load class should have happened in VisitClinitCheck(). + DCHECK(!CanMoveClinitCheck(last_input, invoke)); + } +} + +bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input, HInstruction* user) { + // Determine if input and user come from the same dex instruction, so that we can move + // the clinit check responsibility from one to the other, i.e. from HClinitCheck (user) + // to HLoadClass (input), or from HClinitCheck (input) to HInvokeStaticOrDirect (user). + + // Start with a quick dex pc check. + if (user->GetDexPc() != input->GetDexPc()) { + return false; + } + + // Now do a thorough environment check that this is really coming from the same instruction in + // the same inlined graph. Unfortunately, we have to go through the whole environment chain. + HEnvironment* user_environment = user->GetEnvironment(); + HEnvironment* input_environment = input->GetEnvironment(); + while (user_environment != nullptr || input_environment != nullptr) { + if (user_environment == nullptr || input_environment == nullptr) { + // Different environment chain length. This happens when a method is called + // once directly and once indirectly through another inlined method. + return false; + } + if (user_environment->GetDexPc() != input_environment->GetDexPc() || + user_environment->GetMethodIdx() != input_environment->GetMethodIdx() || + !IsSameDexFile(user_environment->GetDexFile(), input_environment->GetDexFile())) { + return false; + } + user_environment = user_environment->GetParent(); + input_environment = input_environment->GetParent(); + } + + // Check for code motion taking the input to a different block. + if (user->GetBlock() != input->GetBlock()) { + return false; + } + + // In debug mode, check that we have not inserted a throwing instruction + // or an instruction with side effects between input and user. + if (kIsDebugBuild) { + for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) { + CHECK(between != nullptr); // User must be after input in the same block. + CHECK(!between->CanThrow()); + CHECK(!between->HasSideEffects()); } } + return true; } } // namespace art diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index d7f277fa0d..9b2434250d 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -40,6 +40,9 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitClinitCheck(HClinitCheck* check) OVERRIDE; void VisitCondition(HCondition* condition) OVERRIDE; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; + void VisitNewInstance(HNewInstance* instruction) OVERRIDE; + + bool CanMoveClinitCheck(HInstruction* input, HInstruction* user); DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation); }; diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc index c98f43e461..bde54ee977 100644 --- a/compiler/optimizing/primitive_type_propagation.cc +++ b/compiler/optimizing/primitive_type_propagation.cc @@ -63,7 +63,6 @@ bool PrimitiveTypePropagation::UpdateType(HPhi* phi) { : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type); phi->ReplaceInput(equivalent, i); if (equivalent->IsPhi()) { - equivalent->AsPhi()->SetLive(); AddToWorklist(equivalent->AsPhi()); } else if (equivalent == input) { // The input has changed its type. It can be an input of other phis, diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 5190eb3b26..9e6cfbe653 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -22,6 +22,13 @@ namespace art { +// Returns whether this is a loop header phi which was eagerly created but later +// found inconsistent due to the vreg being undefined in one of its predecessors. +// Such phi is marked dead and should be ignored until its removal in SsaPhiElimination. +static bool IsUndefinedLoopHeaderPhi(HPhi* phi) { + return phi->IsLoopHeaderPhi() && phi->InputCount() != phi->GetBlock()->GetPredecessors().size(); +} + /** * A debuggable application may require to reviving phis, to ensure their * associated DEX register is available to a debugger. This class implements @@ -165,17 +172,15 @@ bool DeadPhiHandling::UpdateType(HPhi* phi) { void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); + if (IsUndefinedLoopHeaderPhi(phi)) { + DCHECK(phi->IsDead()); + continue; + } if (phi->IsDead() && phi->HasEnvironmentUses()) { phi->SetLive(); if (block->IsLoopHeader()) { - // Give a type to the loop phi to guarantee convergence of the algorithm. - // Note that the dead phi may already have a type if it is an equivalent - // generated for a typed LoadLocal. In that case we do not change the - // type because it could lead to an unsupported PrimNot/Float/Double -> - // PrimInt/Long transition and create same type equivalents. - if (phi->GetType() == Primitive::kPrimVoid) { - phi->SetType(phi->InputAt(0)->GetType()); - } + // Loop phis must have a type to guarantee convergence of the algorithm. + DCHECK_NE(phi->GetType(), Primitive::kPrimVoid); AddToWorklist(phi); } else { // Because we are doing a reverse post order visit, all inputs of @@ -220,6 +225,27 @@ void DeadPhiHandling::Run() { ProcessWorklist(); } +void SsaBuilder::SetLoopHeaderPhiInputs() { + for (size_t i = loop_headers_.size(); i > 0; --i) { + HBasicBlock* block = loop_headers_[i - 1]; + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + size_t vreg = phi->GetRegNumber(); + for (HBasicBlock* predecessor : block->GetPredecessors()) { + HInstruction* value = ValueOfLocal(predecessor, vreg); + if (value == nullptr) { + // Vreg is undefined at this predecessor. Mark it dead and leave with + // fewer inputs than predecessors. SsaChecker will fail if not removed. + phi->SetDead(); + break; + } else { + phi->AddInput(value); + } + } + } + } +} + void SsaBuilder::FixNullConstantType() { // The order doesn't matter here. for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) { @@ -283,15 +309,7 @@ void SsaBuilder::BuildSsa() { } // 2) Set inputs of loop phis. - for (HBasicBlock* block : loop_headers_) { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->AsPhi(); - for (HBasicBlock* predecessor : block->GetPredecessors()) { - HInstruction* input = ValueOfLocal(predecessor, phi->GetRegNumber()); - phi->AddInput(input); - } - } - } + SetLoopHeaderPhiInputs(); // 3) Mark dead phis. This will mark phis that are only used by environments: // at the DEX level, the type of these phis does not need to be consistent, but @@ -403,8 +421,13 @@ ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) { for (size_t i = 0; i < vregs; ++i) { // No point in creating the catch phi if it is already undefined at // the first throwing instruction. - if ((*current_locals_)[i] != nullptr) { - HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid); + HInstruction* current_local_value = (*current_locals_)[i]; + if (current_local_value != nullptr) { + HPhi* phi = new (arena) HPhi( + arena, + i, + 0, + current_local_value->GetType()); block->AddPhi(phi); (*locals)[i] = phi; } @@ -451,7 +474,10 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { HInstruction* incoming = ValueOfLocal(block->GetLoopInformation()->GetPreHeader(), local); if (incoming != nullptr) { HPhi* phi = new (GetGraph()->GetArena()) HPhi( - GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid); + GetGraph()->GetArena(), + local, + 0, + incoming->GetType()); block->AddPhi(phi); (*current_locals_)[local] = phi; } @@ -484,8 +510,12 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { } if (is_different) { + HInstruction* first_input = ValueOfLocal(block->GetPredecessors()[0], local); HPhi* phi = new (GetGraph()->GetArena()) HPhi( - GetGraph()->GetArena(), local, block->GetPredecessors().size(), Primitive::kPrimVoid); + GetGraph()->GetArena(), + local, + block->GetPredecessors().size(), + first_input->GetType()); for (size_t i = 0; i < block->GetPredecessors().size(); i++) { HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local); phi->SetRawInputAt(i, pred_value); @@ -583,8 +613,16 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive: phi->GetBlock()->InsertPhiAfter(new_phi, phi); return new_phi; } else { - DCHECK_EQ(next->GetType(), type); - return next->AsPhi(); + HPhi* next_phi = next->AsPhi(); + DCHECK_EQ(next_phi->GetType(), type); + if (next_phi->IsDead()) { + // TODO(dbrazdil): Remove this SetLive (we should not need to revive phis) + // once we stop running MarkDeadPhis before PrimitiveTypePropagation. This + // cannot revive undefined loop header phis because they cannot have uses. + DCHECK(!IsUndefinedLoopHeaderPhi(next_phi)); + next_phi->SetLive(); + } + return next_phi; } } @@ -638,7 +676,36 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { } void SsaBuilder::VisitStoreLocal(HStoreLocal* store) { - (*current_locals_)[store->GetLocal()->GetRegNumber()] = store->InputAt(1); + uint32_t reg_number = store->GetLocal()->GetRegNumber(); + HInstruction* stored_value = store->InputAt(1); + Primitive::Type stored_type = stored_value->GetType(); + DCHECK_NE(stored_type, Primitive::kPrimVoid); + + // Storing into vreg `reg_number` may implicitly invalidate the surrounding + // registers. Consider the following cases: + // (1) Storing a wide value must overwrite previous values in both `reg_number` + // and `reg_number+1`. We store `nullptr` in `reg_number+1`. + // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number` + // must invalidate it. We store `nullptr` in `reg_number-1`. + // Consequently, storing a wide value into the high vreg of another wide value + // will invalidate both `reg_number-1` and `reg_number+1`. + + if (reg_number != 0) { + HInstruction* local_low = (*current_locals_)[reg_number - 1]; + if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) { + // The vreg we are storing into was previously the high vreg of a pair. + // We need to invalidate its low vreg. + DCHECK((*current_locals_)[reg_number] == nullptr); + (*current_locals_)[reg_number - 1] = nullptr; + } + } + + (*current_locals_)[reg_number] = stored_value; + if (Primitive::Is64BitType(stored_type)) { + // We are storing a pair. Invalidate the instruction in the high vreg. + (*current_locals_)[reg_number + 1] = nullptr; + } + store->GetBlock()->RemoveInstruction(store); } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 79f1a28ac8..dcce5e4c2c 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -81,6 +81,7 @@ class SsaBuilder : public HGraphVisitor { static constexpr const char* kSsaBuilderPassName = "ssa_builder"; private: + void SetLoopHeaderPhiInputs(); void FixNullConstantType(); void EquivalentPhisCleanup(); diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index 72f9ddd506..a3219dcc38 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -16,6 +16,8 @@ #include "ssa_phi_elimination.h" +#include "base/arena_containers.h" + namespace art { void SsaDeadPhiElimination::Run() { @@ -24,22 +26,36 @@ void SsaDeadPhiElimination::Run() { } void SsaDeadPhiElimination::MarkDeadPhis() { + // Phis are constructed live and should not be revived if previously marked + // dead. This algorithm temporarily breaks that invariant but we DCHECK that + // only phis which were initially live are revived. + ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter()); + // Add to the worklist phis referenced by non-phi instructions. for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HPhi* phi = inst_it.Current()->AsPhi(); - // Set dead ahead of running through uses. The phi may have no use. - phi->SetDead(); + if (phi->IsDead()) { + continue; + } + + bool has_non_phi_use = false; for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { - HUseListNode<HInstruction*>* current = use_it.Current(); - HInstruction* user = current->GetUser(); - if (!user->IsPhi()) { - worklist_.push_back(phi); - phi->SetLive(); + if (!use_it.Current()->GetUser()->IsPhi()) { + has_non_phi_use = true; break; } } + + if (has_non_phi_use) { + worklist_.push_back(phi); + } else { + phi->SetDead(); + if (kIsDebugBuild) { + initially_live.insert(phi); + } + } } } @@ -48,10 +64,13 @@ void SsaDeadPhiElimination::MarkDeadPhis() { HPhi* phi = worklist_.back(); worklist_.pop_back(); for (HInputIterator it(phi); !it.Done(); it.Advance()) { - HInstruction* input = it.Current(); - if (input->IsPhi() && input->AsPhi()->IsDead()) { - worklist_.push_back(input->AsPhi()); - input->AsPhi()->SetLive(); + HPhi* input = it.Current()->AsPhi(); + if (input != nullptr && input->IsDead()) { + // Input is a dead phi. Revive it and add to the worklist. We make sure + // that the phi was not dead initially (see definition of `initially_live`). + DCHECK(ContainsElement(initially_live, input)); + input->SetLive(); + worklist_.push_back(input); } } } @@ -118,7 +137,6 @@ void SsaRedundantPhiElimination::Run() { } if (phi->InputCount() == 0) { - DCHECK(phi->IsCatchPhi()); DCHECK(phi->IsDead()); continue; } diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc index 68e39568bb..dead8fd9a8 100644 --- a/compiler/utils/arm/assembler_arm.cc +++ b/compiler/utils/arm/assembler_arm.cc @@ -342,9 +342,9 @@ bool Address::CanHoldLoadOffsetThumb(LoadOperandType type, int offset) { return IsAbsoluteUint<12>(offset); case kLoadSWord: case kLoadDWord: - return IsAbsoluteUint<10>(offset); // VFP addressing mode. + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; // VFP addressing mode. case kLoadWordPair: - return IsAbsoluteUint<10>(offset); + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; default: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -360,9 +360,9 @@ bool Address::CanHoldStoreOffsetThumb(StoreOperandType type, int offset) { return IsAbsoluteUint<12>(offset); case kStoreSWord: case kStoreDWord: - return IsAbsoluteUint<10>(offset); // VFP addressing mode. + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; // VFP addressing mode. case kStoreWordPair: - return IsAbsoluteUint<10>(offset); + return IsAbsoluteUint<10>(offset) && (offset & 3) == 0; default: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index 5233dcbbb0..ce3a87275d 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -389,8 +389,6 @@ class Arm32Assembler FINAL : public ArmAssembler { void EmitBranch(Condition cond, Label* label, bool link); static int32_t EncodeBranchOffset(int offset, int32_t inst); static int DecodeBranchOffset(int32_t inst); - int32_t EncodeTstOffset(int offset, int32_t inst); - int DecodeTstOffset(int32_t inst); bool ShifterOperandCanHoldArm32(uint32_t immediate, ShifterOperand* shifter_op); }; diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 297cc54e29..7ad5b440e0 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -1349,7 +1349,8 @@ void Thumb2Assembler::Emit32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED, int32_t encoding = 0; if (so.IsImmediate()) { // Check special cases. - if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12))) { + if ((opcode == SUB || opcode == ADD) && (so.GetImmediate() < (1u << 12)) && + /* Prefer T3 encoding to T4. */ !ShifterOperandCanAlwaysHold(so.GetImmediate())) { if (set_cc != kCcSet) { if (opcode == SUB) { thumb_opcode = 5U; @@ -3220,7 +3221,7 @@ void Thumb2Assembler::Ror(Register rd, Register rm, uint32_t shift_imm, void Thumb2Assembler::Rrx(Register rd, Register rm, Condition cond, SetCc set_cc) { CheckCondition(cond); - EmitShift(rd, rm, RRX, rm, cond, set_cc); + EmitShift(rd, rm, RRX, 0, cond, set_cc); } @@ -3469,6 +3470,73 @@ void Thumb2Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) } } +int32_t Thumb2Assembler::GetAllowedLoadOffsetBits(LoadOperandType type) { + switch (type) { + case kLoadSignedByte: + case kLoadSignedHalfword: + case kLoadUnsignedHalfword: + case kLoadUnsignedByte: + case kLoadWord: + // We can encode imm12 offset. + return 0xfffu; + case kLoadSWord: + case kLoadDWord: + case kLoadWordPair: + // We can encode imm8:'00' offset. + return 0xff << 2; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + +int32_t Thumb2Assembler::GetAllowedStoreOffsetBits(StoreOperandType type) { + switch (type) { + case kStoreHalfword: + case kStoreByte: + case kStoreWord: + // We can encode imm12 offset. + return 0xfff; + case kStoreSWord: + case kStoreDWord: + case kStoreWordPair: + // We can encode imm8:'00' offset. + return 0xff << 2; + default: + LOG(FATAL) << "UNREACHABLE"; + UNREACHABLE(); + } +} + +bool Thumb2Assembler::CanSplitLoadStoreOffset(int32_t allowed_offset_bits, + int32_t offset, + /*out*/ int32_t* add_to_base, + /*out*/ int32_t* offset_for_load_store) { + int32_t other_bits = offset & ~allowed_offset_bits; + if (ShifterOperandCanAlwaysHold(other_bits) || ShifterOperandCanAlwaysHold(-other_bits)) { + *add_to_base = offset & ~allowed_offset_bits; + *offset_for_load_store = offset & allowed_offset_bits; + return true; + } + return false; +} + +int32_t Thumb2Assembler::AdjustLoadStoreOffset(int32_t allowed_offset_bits, + Register temp, + Register base, + int32_t offset, + Condition cond) { + DCHECK_NE(offset & ~allowed_offset_bits, 0); + int32_t add_to_base, offset_for_load; + if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) { + AddConstant(temp, base, add_to_base, cond, kCcKeep); + return offset_for_load; + } else { + LoadImmediate(temp, offset, cond); + add(temp, temp, ShifterOperand(base), cond, kCcKeep); + return 0; + } +} // Implementation note: this method must emit at most one instruction when // Address::CanHoldLoadOffsetThumb. @@ -3479,12 +3547,26 @@ void Thumb2Assembler::LoadFromOffset(LoadOperandType type, Condition cond) { if (!Address::CanHoldLoadOffsetThumb(type, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); - base = IP; - offset = 0; + // Inlined AdjustLoadStoreOffset() allows us to pull a few more tricks. + int32_t allowed_offset_bits = GetAllowedLoadOffsetBits(type); + DCHECK_NE(offset & ~allowed_offset_bits, 0); + int32_t add_to_base, offset_for_load; + if (CanSplitLoadStoreOffset(allowed_offset_bits, offset, &add_to_base, &offset_for_load)) { + // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load. + AddConstant(reg, base, add_to_base, cond, kCcKeep); + base = reg; + offset = offset_for_load; + } else { + Register temp = (reg == base) ? IP : reg; + LoadImmediate(temp, offset, cond); + // TODO: Implement indexed load (not available for LDRD) and use it here to avoid the ADD. + // Use reg for the adjusted base. If it's low reg, we may end up using 16-bit load. + add(reg, reg, ShifterOperand((reg == base) ? IP : base), cond, kCcKeep); + base = reg; + offset = 0; + } } - CHECK(Address::CanHoldLoadOffsetThumb(type, offset)); + DCHECK(Address::CanHoldLoadOffsetThumb(type, offset)); switch (type) { case kLoadSignedByte: ldrsb(reg, Address(base, offset), cond); @@ -3510,7 +3592,6 @@ void Thumb2Assembler::LoadFromOffset(LoadOperandType type, } } - // Implementation note: this method must emit at most one instruction when // Address::CanHoldLoadOffsetThumb, as expected by JIT::GuardedLoadFromOffset. void Thumb2Assembler::LoadSFromOffset(SRegister reg, @@ -3519,12 +3600,10 @@ void Thumb2Assembler::LoadSFromOffset(SRegister reg, Condition cond) { if (!Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadSWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)); + DCHECK(Address::CanHoldLoadOffsetThumb(kLoadSWord, offset)); vldrs(reg, Address(base, offset), cond); } @@ -3537,12 +3616,10 @@ void Thumb2Assembler::LoadDFromOffset(DRegister reg, Condition cond) { if (!Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedLoadOffsetBits(kLoadDWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)); + DCHECK(Address::CanHoldLoadOffsetThumb(kLoadDWord, offset)); vldrd(reg, Address(base, offset), cond); } @@ -3573,12 +3650,12 @@ void Thumb2Assembler::StoreToOffset(StoreOperandType type, offset += kRegisterSize; } } - LoadImmediate(tmp_reg, offset, cond); - add(tmp_reg, tmp_reg, ShifterOperand(base), AL); + // TODO: Implement indexed store (not available for STRD), inline AdjustLoadStoreOffset() + // and in the "unsplittable" path get rid of the "add" by using the store indexed instead. + offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(type), tmp_reg, base, offset, cond); base = tmp_reg; - offset = 0; } - CHECK(Address::CanHoldStoreOffsetThumb(type, offset)); + DCHECK(Address::CanHoldStoreOffsetThumb(type, offset)); switch (type) { case kStoreByte: strb(reg, Address(base, offset), cond); @@ -3611,12 +3688,10 @@ void Thumb2Assembler::StoreSToOffset(SRegister reg, Condition cond) { if (!Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreSWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)); + DCHECK(Address::CanHoldStoreOffsetThumb(kStoreSWord, offset)); vstrs(reg, Address(base, offset), cond); } @@ -3629,12 +3704,10 @@ void Thumb2Assembler::StoreDToOffset(DRegister reg, Condition cond) { if (!Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)) { CHECK_NE(base, IP); - LoadImmediate(IP, offset, cond); - add(IP, IP, ShifterOperand(base), cond); + offset = AdjustLoadStoreOffset(GetAllowedStoreOffsetBits(kStoreDWord), IP, base, offset, cond); base = IP; - offset = 0; } - CHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)); + DCHECK(Address::CanHoldStoreOffsetThumb(kStoreDWord, offset)); vstrd(reg, Address(base, offset), cond); } diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index e18361300a..9aeece8e57 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -729,13 +729,23 @@ class Thumb2Assembler FINAL : public ArmAssembler { void EmitBranch(Condition cond, Label* label, bool link, bool x); static int32_t EncodeBranchOffset(int32_t offset, int32_t inst); static int DecodeBranchOffset(int32_t inst); - int32_t EncodeTstOffset(int offset, int32_t inst); - int DecodeTstOffset(int32_t inst); void EmitShift(Register rd, Register rm, Shift shift, uint8_t amount, Condition cond = AL, SetCc set_cc = kCcDontCare); void EmitShift(Register rd, Register rn, Shift shift, Register rm, Condition cond = AL, SetCc set_cc = kCcDontCare); + static int32_t GetAllowedLoadOffsetBits(LoadOperandType type); + static int32_t GetAllowedStoreOffsetBits(StoreOperandType type); + bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits, + int32_t offset, + /*out*/ int32_t* add_to_base, + /*out*/ int32_t* offset_for_load_store); + int32_t AdjustLoadStoreOffset(int32_t allowed_offset_bits, + Register temp, + Register base, + int32_t offset, + Condition cond); + // Whether the assembler can relocate branches. If false, unresolved branches will be // emitted on 32bits. bool can_relocate_branches_; diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index cb4b20b5ba..7b32b0fd26 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -243,7 +243,7 @@ TEST_F(AssemblerThumb2Test, sub) { const char* expected = "subs r1, r0, #42\n" - "subw r1, r0, #42\n" + "sub.w r1, r0, #42\n" "subs r1, r0, r2, asr #31\n" "sub r1, r0, r2, asr #31\n"; DriverStr(expected, "sub"); @@ -257,7 +257,7 @@ TEST_F(AssemblerThumb2Test, add) { const char* expected = "adds r1, r0, #42\n" - "addw r1, r0, #42\n" + "add.w r1, r0, #42\n" "adds r1, r0, r2, asr #31\n" "add r1, r0, r2, asr #31\n"; DriverStr(expected, "add"); @@ -305,21 +305,18 @@ TEST_F(AssemblerThumb2Test, StoreWordToNonThumbOffset) { __ StoreToOffset(type, arm::IP, arm::R5, offset); const char* expected = - "mov ip, #4096\n" // LoadImmediate(ip, 4096) - "add ip, ip, sp\n" + "add.w ip, sp, #4096\n" // AddConstant(ip, sp, 4096) "str r0, [ip, #0]\n" - "str r5, [sp, #-4]!\n" // Push(r5) - "movw r5, #4100\n" // LoadImmediate(r5, 4096 + kRegisterSize) - "add r5, r5, sp\n" - "str ip, [r5, #0]\n" - "ldr r5, [sp], #4\n" // Pop(r5) - - "str r6, [sp, #-4]!\n" // Push(r6) - "mov r6, #4096\n" // LoadImmediate(r6, 4096) - "add r6, r6, r5\n" - "str ip, [r6, #0]\n" - "ldr r6, [sp], #4\n"; // Pop(r6) + "str r5, [sp, #-4]!\n" // Push(r5) + "add.w r5, sp, #4096\n" // AddConstant(r5, 4100 & ~0xfff) + "str ip, [r5, #4]\n" // StoreToOffset(type, ip, r5, 4100 & 0xfff) + "ldr r5, [sp], #4\n" // Pop(r5) + + "str r6, [sp, #-4]!\n" // Push(r6) + "add.w r6, r5, #4096\n" // AddConstant(r6, r5, 4096 & ~0xfff) + "str ip, [r6, #0]\n" // StoreToOffset(type, ip, r6, 4096 & 0xfff) + "ldr r6, [sp], #4\n"; // Pop(r6) DriverStr(expected, "StoreWordToNonThumbOffset"); } @@ -360,20 +357,17 @@ TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) { __ StoreToOffset(type, arm::R11, arm::R5, offset); const char* expected = - "mov ip, #1024\n" // LoadImmediate(ip, 1024) - "add ip, ip, sp\n" + "add.w ip, sp, #1024\n" // AddConstant(ip, sp, 1024) "strd r0, r1, [ip, #0]\n" "str r5, [sp, #-4]!\n" // Push(r5) - "movw r5, #1028\n" // LoadImmediate(r5, 1024 + kRegisterSize) - "add r5, r5, sp\n" - "strd r11, ip, [r5, #0]\n" + "add.w r5, sp, #1024\n" // AddConstant(r5, sp, (1024 + kRegisterSize) & ~0x3fc) + "strd r11, ip, [r5, #4]\n" // StoreToOffset(type, r11, sp, (1024 + kRegisterSize) & 0x3fc) "ldr r5, [sp], #4\n" // Pop(r5) "str r6, [sp, #-4]!\n" // Push(r6) - "mov r6, #1024\n" // LoadImmediate(r6, 1024) - "add r6, r6, r5\n" - "strd r11, ip, [r6, #0]\n" + "add.w r6, r5, #1024\n" // AddConstant(r6, r5, 1024 & ~0x3fc) + "strd r11, ip, [r6, #0]\n" // StoreToOffset(type, r11, r6, 1024 & 0x3fc) "ldr r6, [sp], #4\n"; // Pop(r6) DriverStr(expected, "StoreWordPairToNonThumbOffset"); } diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index f1233ca457..9457da1c36 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -840,12 +840,17 @@ class AssemblerTest : public testing::Test { return str; } + // Override this to pad the code with NOPs to a certain size if needed. + virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) { + } + void DriverWrapper(std::string assembly_text, std::string test_name) { assembler_->FinalizeCode(); size_t cs = assembler_->CodeSize(); std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*data)[0], data->size()); assembler_->FinalizeInstructions(code); + Pad(*data); test_helper_->Driver(*data, assembly_text, test_name); } diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 2ae88413e7..1de51a2dc8 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -466,6 +466,38 @@ TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) { EmitAndCheck(&assembler, "DataProcessingShiftedRegister"); } +TEST(Thumb2AssemblerTest, ShiftImmediate) { + // Note: This test produces the same results as DataProcessingShiftedRegister + // but it does so using shift functions instead of mov(). + arm::Thumb2Assembler assembler; + + // 16-bit variants. + __ Lsl(R3, R4, 4); + __ Lsr(R3, R4, 5); + __ Asr(R3, R4, 6); + + // 32-bit ROR because ROR immediate doesn't have the same 16-bit version as other shifts. + __ Ror(R3, R4, 7); + + // 32-bit RRX because RRX has no 16-bit version. + __ Rrx(R3, R4); + + // 32 bit variants (not setting condition codes). + __ Lsl(R3, R4, 4, AL, kCcKeep); + __ Lsr(R3, R4, 5, AL, kCcKeep); + __ Asr(R3, R4, 6, AL, kCcKeep); + __ Ror(R3, R4, 7, AL, kCcKeep); + __ Rrx(R3, R4, AL, kCcKeep); + + // 32 bit variants (high registers). + __ Lsls(R8, R4, 4); + __ Lsrs(R8, R4, 5); + __ Asrs(R8, R4, 6); + __ Rors(R8, R4, 7); + __ Rrxs(R8, R4); + + EmitAndCheck(&assembler, "ShiftImmediate"); +} TEST(Thumb2AssemblerTest, BasicLoad) { arm::Thumb2Assembler assembler; @@ -823,29 +855,80 @@ TEST(Thumb2AssemblerTest, SpecialAddSub) { __ add(R2, SP, ShifterOperand(0xf00)); // 32 bit due to imm size. __ add(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size. + __ add(SP, SP, ShifterOperand(0xffc)); // 32 bit due to imm size; encoding T4. - __ sub(SP, SP, ShifterOperand(0x50)); // 16 bit - __ sub(R0, SP, ShifterOperand(0x50)); // 32 bit - __ sub(R8, SP, ShifterOperand(0x50)); // 32 bit. + __ sub(SP, SP, ShifterOperand(0x50)); // 16 bit + __ sub(R0, SP, ShifterOperand(0x50)); // 32 bit + __ sub(R8, SP, ShifterOperand(0x50)); // 32 bit. - __ sub(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size + __ sub(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size + __ sub(SP, SP, ShifterOperand(0xffc)); // 32 bit due to imm size; encoding T4. EmitAndCheck(&assembler, "SpecialAddSub"); } +TEST(Thumb2AssemblerTest, LoadFromOffset) { + arm::Thumb2Assembler assembler; + + __ LoadFromOffset(kLoadWord, R2, R4, 12); + __ LoadFromOffset(kLoadWord, R2, R4, 0xfff); + __ LoadFromOffset(kLoadWord, R2, R4, 0x1000); + __ LoadFromOffset(kLoadWord, R2, R4, 0x1000a4); + __ LoadFromOffset(kLoadWord, R2, R4, 0x101000); + __ LoadFromOffset(kLoadWord, R4, R4, 0x101000); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 12); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0xfff); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x1000a4); + __ LoadFromOffset(kLoadUnsignedHalfword, R2, R4, 0x101000); + __ LoadFromOffset(kLoadUnsignedHalfword, R4, R4, 0x101000); + __ LoadFromOffset(kLoadWordPair, R2, R4, 12); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x3fc); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x400a4); + __ LoadFromOffset(kLoadWordPair, R2, R4, 0x40400); + __ LoadFromOffset(kLoadWordPair, R4, R4, 0x40400); + + __ LoadFromOffset(kLoadWord, R0, R12, 12); // 32-bit because of R12. + __ LoadFromOffset(kLoadWord, R2, R4, 0xa4 - 0x100000); + + __ LoadFromOffset(kLoadSignedByte, R2, R4, 12); + __ LoadFromOffset(kLoadUnsignedByte, R2, R4, 12); + __ LoadFromOffset(kLoadSignedHalfword, R2, R4, 12); + + EmitAndCheck(&assembler, "LoadFromOffset"); +} + TEST(Thumb2AssemblerTest, StoreToOffset) { arm::Thumb2Assembler assembler; - __ StoreToOffset(kStoreWord, R2, R4, 12); // Simple - __ StoreToOffset(kStoreWord, R2, R4, 0x2000); // Offset too big. - __ StoreToOffset(kStoreWord, R0, R12, 12); - __ StoreToOffset(kStoreHalfword, R0, R12, 12); - __ StoreToOffset(kStoreByte, R2, R12, 12); + __ StoreToOffset(kStoreWord, R2, R4, 12); + __ StoreToOffset(kStoreWord, R2, R4, 0xfff); + __ StoreToOffset(kStoreWord, R2, R4, 0x1000); + __ StoreToOffset(kStoreWord, R2, R4, 0x1000a4); + __ StoreToOffset(kStoreWord, R2, R4, 0x101000); + __ StoreToOffset(kStoreWord, R4, R4, 0x101000); + __ StoreToOffset(kStoreHalfword, R2, R4, 12); + __ StoreToOffset(kStoreHalfword, R2, R4, 0xfff); + __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000); + __ StoreToOffset(kStoreHalfword, R2, R4, 0x1000a4); + __ StoreToOffset(kStoreHalfword, R2, R4, 0x101000); + __ StoreToOffset(kStoreHalfword, R4, R4, 0x101000); + __ StoreToOffset(kStoreWordPair, R2, R4, 12); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x3fc); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x400); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x400a4); + __ StoreToOffset(kStoreWordPair, R2, R4, 0x40400); + __ StoreToOffset(kStoreWordPair, R4, R4, 0x40400); + + __ StoreToOffset(kStoreWord, R0, R12, 12); // 32-bit because of R12. + __ StoreToOffset(kStoreWord, R2, R4, 0xa4 - 0x100000); + + __ StoreToOffset(kStoreByte, R2, R4, 12); EmitAndCheck(&assembler, "StoreToOffset"); } - TEST(Thumb2AssemblerTest, IfThen) { arm::Thumb2Assembler assembler; diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index b79c2e46f0..9246c827a7 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -132,8 +132,8 @@ const char* DataProcessingRegisterResults[] = { const char* DataProcessingImmediateResults[] = { " 0: 2055 movs r0, #85 ; 0x55\n", " 2: f06f 0055 mvn.w r0, #85 ; 0x55\n", - " 6: f201 0055 addw r0, r1, #85 ; 0x55\n", - " a: f2a1 0055 subw r0, r1, #85 ; 0x55\n", + " 6: f101 0055 add.w r0, r1, #85 ; 0x55\n", + " a: f1a1 0055 sub.w r0, r1, #85 ; 0x55\n", " e: f001 0055 and.w r0, r1, #85 ; 0x55\n", " 12: f041 0055 orr.w r0, r1, #85 ; 0x55\n", " 16: f061 0055 orn r0, r1, #85 ; 0x55\n", @@ -201,6 +201,24 @@ const char* DataProcessingShiftedRegisterResults[] = { " 32: ea5f 0834 movs.w r8, r4, rrx\n", nullptr }; +const char* ShiftImmediateResults[] = { + " 0: 0123 lsls r3, r4, #4\n", + " 2: 0963 lsrs r3, r4, #5\n", + " 4: 11a3 asrs r3, r4, #6\n", + " 6: ea4f 13f4 mov.w r3, r4, ror #7\n", + " a: ea4f 0334 mov.w r3, r4, rrx\n", + " e: ea4f 1304 mov.w r3, r4, lsl #4\n", + " 12: ea4f 1354 mov.w r3, r4, lsr #5\n", + " 16: ea4f 13a4 mov.w r3, r4, asr #6\n", + " 1a: ea4f 13f4 mov.w r3, r4, ror #7\n", + " 1e: ea4f 0334 mov.w r3, r4, rrx\n", + " 22: ea5f 1804 movs.w r8, r4, lsl #4\n", + " 26: ea5f 1854 movs.w r8, r4, lsr #5\n", + " 2a: ea5f 18a4 movs.w r8, r4, asr #6\n", + " 2e: ea5f 18f4 movs.w r8, r4, ror #7\n", + " 32: ea5f 0834 movs.w r8, r4, rrx\n", + nullptr +}; const char* BasicLoadResults[] = { " 0: 69a3 ldr r3, [r4, #24]\n", " 2: 7e23 ldrb r3, [r4, #24]\n", @@ -434,23 +452,115 @@ const char* MovWMovTResults[] = { const char* SpecialAddSubResults[] = { " 0: aa14 add r2, sp, #80 ; 0x50\n", " 2: b014 add sp, #80 ; 0x50\n", - " 4: f20d 0850 addw r8, sp, #80 ; 0x50\n", - " 8: f60d 7200 addw r2, sp, #3840 ; 0xf00\n", - " c: f60d 7d00 addw sp, sp, #3840 ; 0xf00\n", - " 10: b094 sub sp, #80 ; 0x50\n", - " 12: f2ad 0050 subw r0, sp, #80 ; 0x50\n", - " 16: f2ad 0850 subw r8, sp, #80 ; 0x50\n", - " 1a: f6ad 7d00 subw sp, sp, #3840 ; 0xf00\n", + " 4: f10d 0850 add.w r8, sp, #80 ; 0x50\n", + " 8: f50d 6270 add.w r2, sp, #3840 ; 0xf00\n", + " c: f50d 6d70 add.w sp, sp, #3840 ; 0xf00\n", + " 10: f60d 7dfc addw sp, sp, #4092 ; 0xffc\n", + " 14: b094 sub sp, #80 ; 0x50\n", + " 16: f1ad 0050 sub.w r0, sp, #80 ; 0x50\n", + " 1a: f1ad 0850 sub.w r8, sp, #80 ; 0x50\n", + " 1e: f5ad 6d70 sub.w sp, sp, #3840 ; 0xf00\n", + " 22: f6ad 7dfc subw sp, sp, #4092 ; 0xffc\n", + nullptr +}; +const char* LoadFromOffsetResults[] = { + " 0: 68e2 ldr r2, [r4, #12]\n", + " 2: f8d4 2fff ldr.w r2, [r4, #4095] ; 0xfff\n", + " 6: f504 5280 add.w r2, r4, #4096 ; 0x1000\n", + " a: 6812 ldr r2, [r2, #0]\n", + " c: f504 1280 add.w r2, r4, #1048576 ; 0x100000\n", + " 10: f8d2 20a4 ldr.w r2, [r2, #164] ; 0xa4\n", + " 14: f241 0200 movw r2, #4096 ; 0x1000\n", + " 18: f2c0 0210 movt r2, #16\n", + " 1c: 4422 add r2, r4\n", + " 1e: 6812 ldr r2, [r2, #0]\n", + " 20: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 24: f2c0 0c10 movt ip, #16\n", + " 28: 4464 add r4, ip\n", + " 2a: 6824 ldr r4, [r4, #0]\n", + " 2c: 89a2 ldrh r2, [r4, #12]\n", + " 2e: f8b4 2fff ldrh.w r2, [r4, #4095] ; 0xfff\n", + " 32: f504 5280 add.w r2, r4, #4096 ; 0x1000\n", + " 36: 8812 ldrh r2, [r2, #0]\n", + " 38: f504 1280 add.w r2, r4, #1048576 ; 0x100000\n", + " 3c: f8b2 20a4 ldrh.w r2, [r2, #164] ; 0xa4\n", + " 40: f241 0200 movw r2, #4096 ; 0x1000\n", + " 44: f2c0 0210 movt r2, #16\n", + " 48: 4422 add r2, r4\n", + " 4a: 8812 ldrh r2, [r2, #0]\n", + " 4c: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 50: f2c0 0c10 movt ip, #16\n", + " 54: 4464 add r4, ip\n", + " 56: 8824 ldrh r4, [r4, #0]\n", + " 58: e9d4 2303 ldrd r2, r3, [r4, #12]\n", + " 5c: e9d4 23ff ldrd r2, r3, [r4, #1020] ; 0x3fc\n", + " 60: f504 6280 add.w r2, r4, #1024 ; 0x400\n", + " 64: e9d2 2300 ldrd r2, r3, [r2]\n", + " 68: f504 2280 add.w r2, r4, #262144 ; 0x40000\n", + " 6c: e9d2 2329 ldrd r2, r3, [r2, #164]; 0xa4\n", + " 70: f240 4200 movw r2, #1024 ; 0x400\n", + " 74: f2c0 0204 movt r2, #4\n", + " 78: 4422 add r2, r4\n", + " 7a: e9d2 2300 ldrd r2, r3, [r2]\n", + " 7e: f240 4c00 movw ip, #1024 ; 0x400\n", + " 82: f2c0 0c04 movt ip, #4\n", + " 86: 4464 add r4, ip\n", + " 88: e9d4 4500 ldrd r4, r5, [r4]\n", + " 8c: f8dc 000c ldr.w r0, [ip, #12]\n", + " 90: f5a4 1280 sub.w r2, r4, #1048576 ; 0x100000\n", + " 94: f8d2 20a4 ldr.w r2, [r2, #164] ; 0xa4\n", + " 98: f994 200c ldrsb.w r2, [r4, #12]\n", + " 9c: 7b22 ldrb r2, [r4, #12]\n", + " 9e: f9b4 200c ldrsh.w r2, [r4, #12]\n", nullptr }; const char* StoreToOffsetResults[] = { " 0: 60e2 str r2, [r4, #12]\n", - " 2: f44f 5c00 mov.w ip, #8192 ; 0x2000\n", - " 6: 44a4 add ip, r4\n", - " 8: f8cc 2000 str.w r2, [ip]\n", - " c: f8cc 000c str.w r0, [ip, #12]\n", - " 10: f8ac 000c strh.w r0, [ip, #12]\n", - " 14: f88c 200c strb.w r2, [ip, #12]\n", + " 2: f8c4 2fff str.w r2, [r4, #4095] ; 0xfff\n", + " 6: f504 5c80 add.w ip, r4, #4096 ; 0x1000\n", + " a: f8cc 2000 str.w r2, [ip]\n", + " e: f504 1c80 add.w ip, r4, #1048576 ; 0x100000\n", + " 12: f8cc 20a4 str.w r2, [ip, #164] ; 0xa4\n", + " 16: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 1a: f2c0 0c10 movt ip, #16\n", + " 1e: 44a4 add ip, r4\n", + " 20: f8cc 2000 str.w r2, [ip]\n", + " 24: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 28: f2c0 0c10 movt ip, #16\n", + " 2c: 44a4 add ip, r4\n", + " 2e: f8cc 4000 str.w r4, [ip]\n", + " 32: 81a2 strh r2, [r4, #12]\n", + " 34: f8a4 2fff strh.w r2, [r4, #4095] ; 0xfff\n", + " 38: f504 5c80 add.w ip, r4, #4096 ; 0x1000\n", + " 3c: f8ac 2000 strh.w r2, [ip]\n", + " 40: f504 1c80 add.w ip, r4, #1048576 ; 0x100000\n", + " 44: f8ac 20a4 strh.w r2, [ip, #164] ; 0xa4\n", + " 48: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 4c: f2c0 0c10 movt ip, #16\n", + " 50: 44a4 add ip, r4\n", + " 52: f8ac 2000 strh.w r2, [ip]\n", + " 56: f241 0c00 movw ip, #4096 ; 0x1000\n", + " 5a: f2c0 0c10 movt ip, #16\n", + " 5e: 44a4 add ip, r4\n", + " 60: f8ac 4000 strh.w r4, [ip]\n", + " 64: e9c4 2303 strd r2, r3, [r4, #12]\n", + " 68: e9c4 23ff strd r2, r3, [r4, #1020] ; 0x3fc\n", + " 6c: f504 6c80 add.w ip, r4, #1024 ; 0x400\n", + " 70: e9cc 2300 strd r2, r3, [ip]\n", + " 74: f504 2c80 add.w ip, r4, #262144 ; 0x40000\n", + " 78: e9cc 2329 strd r2, r3, [ip, #164]; 0xa4\n", + " 7c: f240 4c00 movw ip, #1024 ; 0x400\n", + " 80: f2c0 0c04 movt ip, #4\n", + " 84: 44a4 add ip, r4\n", + " 86: e9cc 2300 strd r2, r3, [ip]\n", + " 8a: f240 4c00 movw ip, #1024 ; 0x400\n", + " 8e: f2c0 0c04 movt ip, #4\n", + " 92: 44a4 add ip, r4\n", + " 94: e9cc 4500 strd r4, r5, [ip]\n", + " 98: f8cc 000c str.w r0, [ip, #12]\n", + " 9c: f5a4 1c80 sub.w ip, r4, #1048576 ; 0x100000\n", + " a0: f8cc 20a4 str.w r2, [ip, #164] ; 0xa4\n", + " a4: 7322 strb r2, [r4, #12]\n", nullptr }; const char* IfThenResults[] = { @@ -4952,6 +5062,7 @@ void setup_results() { test_results["DataProcessingModifiedImmediate"] = DataProcessingModifiedImmediateResults; test_results["DataProcessingModifiedImmediates"] = DataProcessingModifiedImmediatesResults; test_results["DataProcessingShiftedRegister"] = DataProcessingShiftedRegisterResults; + test_results["ShiftImmediate"] = ShiftImmediateResults; test_results["BasicLoad"] = BasicLoadResults; test_results["BasicStore"] = BasicStoreResults; test_results["ComplexLoad"] = ComplexLoadResults; @@ -4966,6 +5077,7 @@ void setup_results() { test_results["StoreMultiple"] = StoreMultipleResults; test_results["MovWMovT"] = MovWMovTResults; test_results["SpecialAddSub"] = SpecialAddSubResults; + test_results["LoadFromOffset"] = LoadFromOffsetResults; test_results["StoreToOffset"] = StoreToOffsetResults; test_results["IfThen"] = IfThenResults; test_results["CbzCbnz"] = CbzCbnzResults; diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index aee64120a8..fc7ac7061a 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -310,15 +310,27 @@ void MipsAssembler::Seh(Register rd, Register rt) { EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20); } +void MipsAssembler::Wsbh(Register rd, Register rt) { + EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20); +} + void MipsAssembler::Sll(Register rd, Register rt, int shamt) { + CHECK(IsUint<5>(shamt)) << shamt; EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00); } void MipsAssembler::Srl(Register rd, Register rt, int shamt) { + CHECK(IsUint<5>(shamt)) << shamt; EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02); } +void MipsAssembler::Rotr(Register rd, Register rt, int shamt) { + CHECK(IsUint<5>(shamt)) << shamt; + EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02); +} + void MipsAssembler::Sra(Register rd, Register rt, int shamt) { + CHECK(IsUint<5>(shamt)) << shamt; EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03); } diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 4038c1f1c4..1ef0992dac 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -135,9 +135,11 @@ class MipsAssembler FINAL : public Assembler { void Seb(Register rd, Register rt); // R2+ void Seh(Register rd, Register rt); // R2+ + void Wsbh(Register rd, Register rt); // R2+ void Sll(Register rd, Register rt, int shamt); void Srl(Register rd, Register rt, int shamt); + void Rotr(Register rd, Register rt, int shamt); // R2+ void Sra(Register rd, Register rt, int shamt); void Sllv(Register rd, Register rt, Register rs); void Srlv(Register rd, Register rt, Register rs); diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index ba2525e555..107d5bb572 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -19,15 +19,73 @@ #include "base/bit_utils.h" #include "base/casts.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" #include "memory_region.h" #include "thread.h" namespace art { namespace mips64 { +void Mips64Assembler::FinalizeCode() { + for (auto& exception_block : exception_blocks_) { + EmitExceptionPoll(&exception_block); + } + PromoteBranches(); +} + +void Mips64Assembler::FinalizeInstructions(const MemoryRegion& region) { + EmitBranches(); + Assembler::FinalizeInstructions(region); + PatchCFI(); +} + +void Mips64Assembler::PatchCFI() { + if (cfi().NumberOfDelayedAdvancePCs() == 0u) { + return; + } + + typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC; + const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC(); + const std::vector<uint8_t>& old_stream = data.first; + const std::vector<DelayedAdvancePC>& advances = data.second; + + // Refill our data buffer with patched opcodes. + cfi().ReserveCFIStream(old_stream.size() + advances.size() + 16); + size_t stream_pos = 0; + for (const DelayedAdvancePC& advance : advances) { + DCHECK_GE(advance.stream_pos, stream_pos); + // Copy old data up to the point where advance was issued. + cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos); + stream_pos = advance.stream_pos; + // Insert the advance command with its final offset. + size_t final_pc = GetAdjustedPosition(advance.pc); + cfi().AdvancePC(final_pc); + } + // Copy the final segment if any. + cfi().AppendRawData(old_stream, stream_pos, old_stream.size()); +} + +void Mips64Assembler::EmitBranches() { + CHECK(!overwriting_); + // Switch from appending instructions at the end of the buffer to overwriting + // existing instructions (branch placeholders) in the buffer. + overwriting_ = true; + for (auto& branch : branches_) { + EmitBranch(&branch); + } + overwriting_ = false; +} + void Mips64Assembler::Emit(uint32_t value) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - buffer_.Emit<uint32_t>(value); + if (overwriting_) { + // Branches to labels are emitted into their placeholders here. + buffer_.Store<uint32_t>(overwrite_location_, value); + overwrite_location_ += sizeof(uint32_t); + } else { + // Other instructions are simply appended at the end here. + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + buffer_.Emit<uint32_t>(value); + } } void Mips64Assembler::EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, @@ -82,15 +140,16 @@ void Mips64Assembler::EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t void Mips64Assembler::EmitI21(int opcode, GpuRegister rs, uint32_t imm21) { CHECK_NE(rs, kNoGpuRegister); + CHECK(IsUint<21>(imm21)) << imm21; uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | static_cast<uint32_t>(rs) << kRsShift | - (imm21 & 0x1FFFFF); + imm21; Emit(encoding); } -void Mips64Assembler::EmitJ(int opcode, uint32_t addr26) { - uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | - (addr26 & 0x3FFFFFF); +void Mips64Assembler::EmitI26(int opcode, uint32_t imm26) { + CHECK(IsUint<26>(imm26)) << imm26; + uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | imm26; Emit(encoding); } @@ -428,26 +487,6 @@ void Mips64Assembler::Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { EmitI(0xb, rs, rt, imm16); } -void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16) { - EmitI(0x4, rs, rt, imm16); - Nop(); -} - -void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16) { - EmitI(0x5, rs, rt, imm16); - Nop(); -} - -void Mips64Assembler::J(uint32_t addr26) { - EmitJ(0x2, addr26); - Nop(); -} - -void Mips64Assembler::Jal(uint32_t addr26) { - EmitJ(0x3, addr26); - Nop(); -} - void Mips64Assembler::Seleqz(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 0, 0x35); } @@ -474,7 +513,6 @@ void Mips64Assembler::Dclo(GpuRegister rd, GpuRegister rs) { void Mips64Assembler::Jalr(GpuRegister rd, GpuRegister rs) { EmitR(0, rs, static_cast<GpuRegister>(0), rd, 0, 0x09); - Nop(); } void Mips64Assembler::Jalr(GpuRegister rs) { @@ -489,6 +527,15 @@ void Mips64Assembler::Auipc(GpuRegister rs, uint16_t imm16) { EmitI(0x3B, rs, static_cast<GpuRegister>(0x1E), imm16); } +void Mips64Assembler::Addiupc(GpuRegister rs, uint32_t imm19) { + CHECK(IsUint<19>(imm19)) << imm19; + EmitI21(0x3B, rs, imm19); +} + +void Mips64Assembler::Bc(uint32_t imm26) { + EmitI26(0x32, imm26); +} + void Mips64Assembler::Jic(GpuRegister rt, uint16_t imm16) { EmitI(0x36, static_cast<GpuRegister>(0), rt, imm16); } @@ -549,14 +596,14 @@ void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x8, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16); + EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16); } void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x18, (rs < rt) ? rs : rt, (rs < rt) ? rt : rs, imm16); + EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16); } void Mips64Assembler::Beqzc(GpuRegister rs, uint32_t imm21) { @@ -569,6 +616,65 @@ void Mips64Assembler::Bnezc(GpuRegister rs, uint32_t imm21) { EmitI21(0x3E, rs, imm21); } +void Mips64Assembler::EmitBcondc(BranchCondition cond, + GpuRegister rs, + GpuRegister rt, + uint32_t imm16_21) { + switch (cond) { + case kCondLT: + Bltc(rs, rt, imm16_21); + break; + case kCondGE: + Bgec(rs, rt, imm16_21); + break; + case kCondLE: + Bgec(rt, rs, imm16_21); + break; + case kCondGT: + Bltc(rt, rs, imm16_21); + break; + case kCondLTZ: + CHECK_EQ(rt, ZERO); + Bltzc(rs, imm16_21); + break; + case kCondGEZ: + CHECK_EQ(rt, ZERO); + Bgezc(rs, imm16_21); + break; + case kCondLEZ: + CHECK_EQ(rt, ZERO); + Blezc(rs, imm16_21); + break; + case kCondGTZ: + CHECK_EQ(rt, ZERO); + Bgtzc(rs, imm16_21); + break; + case kCondEQ: + Beqc(rs, rt, imm16_21); + break; + case kCondNE: + Bnec(rs, rt, imm16_21); + break; + case kCondEQZ: + CHECK_EQ(rt, ZERO); + Beqzc(rs, imm16_21); + break; + case kCondNEZ: + CHECK_EQ(rt, ZERO); + Bnezc(rs, imm16_21); + break; + case kCondLTU: + Bltuc(rs, rt, imm16_21); + break; + case kCondGEU: + Bgeuc(rs, rt, imm16_21); + break; + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << cond; + UNREACHABLE(); + } +} + void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { EmitFR(0x11, 0x10, ft, fs, fd, 0x0); } @@ -925,15 +1031,6 @@ void Mips64Assembler::LoadConst64(GpuRegister rd, int64_t value) { } } -void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp) { - if (IsInt<16>(value)) { - Addiu(rt, rs, value); - } else { - LoadConst32(rtmp, value); - Addu(rt, rs, rtmp); - } -} - void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) { if (IsInt<16>(value)) { Daddiu(rt, rs, value); @@ -943,177 +1040,621 @@ void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, Gp } } -// -// MIPS64R6 branches -// -// -// Unconditional (pc + 32-bit signed offset): -// -// auipc at, ofs_high -// jic at, ofs_low -// // no delay/forbidden slot -// -// -// Conditional (pc + 32-bit signed offset): -// -// b<cond>c reg, +2 // skip next 2 instructions -// auipc at, ofs_high -// jic at, ofs_low -// // no delay/forbidden slot -// -// -// Unconditional (pc + 32-bit signed offset) and link: -// -// auipc reg, ofs_high -// daddiu reg, ofs_low -// jialc reg, 0 -// // no delay/forbidden slot -// -// -// TODO: use shorter instruction sequences whenever possible. -// - -void Mips64Assembler::Bind(Label* label) { - CHECK(!label->IsBound()); - int32_t bound_pc = buffer_.Size(); +void Mips64Assembler::Branch::InitShortOrLong(Mips64Assembler::Branch::OffsetBits offset_size, + Mips64Assembler::Branch::Type short_type, + Mips64Assembler::Branch::Type long_type) { + type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type; +} - // Walk the list of the branches (auipc + jic pairs) referring to and preceding this label. - // Embed the previously unknown pc-relative addresses in them. - while (label->IsLinked()) { - int32_t position = label->Position(); - // Extract the branch (instruction pair) - uint32_t auipc = buffer_.Load<uint32_t>(position); - uint32_t jic = buffer_.Load<uint32_t>(position + 4); // actually, jic or daddiu +void Mips64Assembler::Branch::InitializeType(bool is_call) { + OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_); + if (is_call) { + InitShortOrLong(offset_size, kCall, kLongCall); + } else if (condition_ == kUncond) { + InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch); + } else { + if (condition_ == kCondEQZ || condition_ == kCondNEZ) { + // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions. + type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch; + } else { + InitShortOrLong(offset_size, kCondBranch, kLongCondBranch); + } + } + old_type_ = type_; +} + +bool Mips64Assembler::Branch::IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs) { + switch (condition) { + case kCondLT: + case kCondGT: + case kCondNE: + case kCondLTU: + return lhs == rhs; + default: + return false; + } +} + +bool Mips64Assembler::Branch::IsUncond(BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs) { + switch (condition) { + case kUncond: + return true; + case kCondGE: + case kCondLE: + case kCondEQ: + case kCondGEU: + return lhs == rhs; + default: + return false; + } +} + +Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(ZERO), + rhs_reg_(ZERO), + condition_(kUncond) { + InitializeType(false); +} + +Mips64Assembler::Branch::Branch(uint32_t location, + uint32_t target, + Mips64Assembler::BranchCondition condition, + GpuRegister lhs_reg, + GpuRegister rhs_reg) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(lhs_reg), + rhs_reg_(rhs_reg), + condition_(condition) { + CHECK_NE(condition, kUncond); + switch (condition) { + case kCondEQ: + case kCondNE: + case kCondLT: + case kCondGE: + case kCondLE: + case kCondGT: + case kCondLTU: + case kCondGEU: + CHECK_NE(lhs_reg, ZERO); + CHECK_NE(rhs_reg, ZERO); + break; + case kCondLTZ: + case kCondGEZ: + case kCondLEZ: + case kCondGTZ: + case kCondEQZ: + case kCondNEZ: + CHECK_NE(lhs_reg, ZERO); + CHECK_EQ(rhs_reg, ZERO); + break; + case kUncond: + UNREACHABLE(); + } + CHECK(!IsNop(condition, lhs_reg, rhs_reg)); + if (IsUncond(condition, lhs_reg, rhs_reg)) { + // Branch condition is always true, make the branch unconditional. + condition_ = kUncond; + } + InitializeType(false); +} + +Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg) + : old_location_(location), + location_(location), + target_(target), + lhs_reg_(indirect_reg), + rhs_reg_(ZERO), + condition_(kUncond) { + CHECK_NE(indirect_reg, ZERO); + CHECK_NE(indirect_reg, AT); + InitializeType(true); +} + +Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition( + Mips64Assembler::BranchCondition cond) { + switch (cond) { + case kCondLT: + return kCondGE; + case kCondGE: + return kCondLT; + case kCondLE: + return kCondGT; + case kCondGT: + return kCondLE; + case kCondLTZ: + return kCondGEZ; + case kCondGEZ: + return kCondLTZ; + case kCondLEZ: + return kCondGTZ; + case kCondGTZ: + return kCondLEZ; + case kCondEQ: + return kCondNE; + case kCondNE: + return kCondEQ; + case kCondEQZ: + return kCondNEZ; + case kCondNEZ: + return kCondEQZ; + case kCondLTU: + return kCondGEU; + case kCondGEU: + return kCondLTU; + case kUncond: + LOG(FATAL) << "Unexpected branch condition " << cond; + } + UNREACHABLE(); +} + +Mips64Assembler::Branch::Type Mips64Assembler::Branch::GetType() const { + return type_; +} + +Mips64Assembler::BranchCondition Mips64Assembler::Branch::GetCondition() const { + return condition_; +} + +GpuRegister Mips64Assembler::Branch::GetLeftRegister() const { + return lhs_reg_; +} + +GpuRegister Mips64Assembler::Branch::GetRightRegister() const { + return rhs_reg_; +} + +uint32_t Mips64Assembler::Branch::GetTarget() const { + return target_; +} - // Extract the location of the previous pair in the list (walking the list backwards; - // the previous pair location was stored in the immediate operands of the instructions) - int32_t prev = (auipc << 16) | (jic & 0xFFFF); +uint32_t Mips64Assembler::Branch::GetLocation() const { + return location_; +} + +uint32_t Mips64Assembler::Branch::GetOldLocation() const { + return old_location_; +} + +uint32_t Mips64Assembler::Branch::GetLength() const { + return branch_info_[type_].length; +} + +uint32_t Mips64Assembler::Branch::GetOldLength() const { + return branch_info_[old_type_].length; +} + +uint32_t Mips64Assembler::Branch::GetSize() const { + return GetLength() * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetOldSize() const { + return GetOldLength() * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetEndLocation() const { + return GetLocation() + GetSize(); +} + +uint32_t Mips64Assembler::Branch::GetOldEndLocation() const { + return GetOldLocation() + GetOldSize(); +} + +bool Mips64Assembler::Branch::IsLong() const { + switch (type_) { + // Short branches. + case kUncondBranch: + case kCondBranch: + case kCall: + return false; + // Long branches. + case kLongUncondBranch: + case kLongCondBranch: + case kLongCall: + return true; + } + UNREACHABLE(); +} + +bool Mips64Assembler::Branch::IsResolved() const { + return target_ != kUnresolved; +} + +Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSize() const { + OffsetBits offset_size = + (type_ == kCondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ)) + ? kOffset23 + : branch_info_[type_].offset_size; + return offset_size; +} + +Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSizeNeeded(uint32_t location, + uint32_t target) { + // For unresolved targets assume the shortest encoding + // (later it will be made longer if needed). + if (target == kUnresolved) + return kOffset16; + int64_t distance = static_cast<int64_t>(target) - location; + // To simplify calculations in composite branches consisting of multiple instructions + // bump up the distance by a value larger than the max byte size of a composite branch. + distance += (distance >= 0) ? kMaxBranchSize : -kMaxBranchSize; + if (IsInt<kOffset16>(distance)) + return kOffset16; + else if (IsInt<kOffset18>(distance)) + return kOffset18; + else if (IsInt<kOffset21>(distance)) + return kOffset21; + else if (IsInt<kOffset23>(distance)) + return kOffset23; + else if (IsInt<kOffset28>(distance)) + return kOffset28; + return kOffset32; +} + +void Mips64Assembler::Branch::Resolve(uint32_t target) { + target_ = target; +} + +void Mips64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) { + if (location_ > expand_location) { + location_ += delta; + } + if (!IsResolved()) { + return; // Don't know the target yet. + } + if (target_ > expand_location) { + target_ += delta; + } +} + +void Mips64Assembler::Branch::PromoteToLong() { + switch (type_) { + // Short branches. + case kUncondBranch: + type_ = kLongUncondBranch; + break; + case kCondBranch: + type_ = kLongCondBranch; + break; + case kCall: + type_ = kLongCall; + break; + default: + // Note: 'type_' is already long. + break; + } + CHECK(IsLong()); +} + +uint32_t Mips64Assembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) { + // If the branch is still unresolved or already long, nothing to do. + if (IsLong() || !IsResolved()) { + return 0; + } + // Promote the short branch to long if the offset size is too small + // to hold the distance between location_ and target_. + if (GetOffsetSizeNeeded(location_, target_) > GetOffsetSize()) { + PromoteToLong(); + uint32_t old_size = GetOldSize(); + uint32_t new_size = GetSize(); + CHECK_GT(new_size, old_size); + return new_size - old_size; + } + // The following logic is for debugging/testing purposes. + // Promote some short branches to long when it's not really required. + if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) { + int64_t distance = static_cast<int64_t>(target_) - location_; + distance = (distance >= 0) ? distance : -distance; + if (distance >= max_short_distance) { + PromoteToLong(); + uint32_t old_size = GetOldSize(); + uint32_t new_size = GetSize(); + CHECK_GT(new_size, old_size); + return new_size - old_size; + } + } + return 0; +} + +uint32_t Mips64Assembler::Branch::GetOffsetLocation() const { + return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t); +} + +uint32_t Mips64Assembler::Branch::GetOffset() const { + CHECK(IsResolved()); + uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize()); + // Calculate the byte distance between instructions and also account for + // different PC-relative origins. + uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t); + // Prepare the offset for encoding into the instruction(s). + offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift; + return offset; +} - // Get the pc-relative address - uint32_t offset = bound_pc - position; - offset += (offset & 0x8000) << 1; // account for sign extension in jic/daddiu +Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +const Mips64Assembler::Branch* Mips64Assembler::GetBranch(uint32_t branch_id) const { + CHECK_LT(branch_id, branches_.size()); + return &branches_[branch_id]; +} + +void Mips64Assembler::Bind(Mips64Label* label) { + CHECK(!label->IsBound()); + uint32_t bound_pc = buffer_.Size(); - // Embed it in the two instructions - auipc = (auipc & 0xFFFF0000) | (offset >> 16); - jic = (jic & 0xFFFF0000) | (offset & 0xFFFF); + // Walk the list of branches referring to and preceding this label. + // Store the previously unknown target addresses in them. + while (label->IsLinked()) { + uint32_t branch_id = label->Position(); + Branch* branch = GetBranch(branch_id); + branch->Resolve(bound_pc); - // Save the adjusted instructions - buffer_.Store<uint32_t>(position, auipc); - buffer_.Store<uint32_t>(position + 4, jic); + uint32_t branch_location = branch->GetLocation(); + // Extract the location of the previous branch in the list (walking the list backwards; + // the previous branch ID was stored in the space reserved for this branch). + uint32_t prev = buffer_.Load<uint32_t>(branch_location); // On to the previous branch in the list... label->position_ = prev; } - // Now make the label object contain its own location - // (it will be used by the branches referring to and following this label) + // Now make the label object contain its own location (relative to the end of the preceding + // branch, if any; it will be used by the branches referring to and following this label). + label->prev_branch_id_plus_one_ = branches_.size(); + if (label->prev_branch_id_plus_one_) { + uint32_t branch_id = label->prev_branch_id_plus_one_ - 1; + const Branch* branch = GetBranch(branch_id); + bound_pc -= branch->GetEndLocation(); + } label->BindTo(bound_pc); } -void Mips64Assembler::B(Label* label) { - if (label->IsBound()) { - // Branch backwards (to a preceding label), distance is known - uint32_t offset = label->Position() - buffer_.Size(); - CHECK_LE(static_cast<int32_t>(offset), 0); - offset += (offset & 0x8000) << 1; // account for sign extension in jic - Auipc(AT, offset >> 16); - Jic(AT, offset); - } else { - // Branch forward (to a following label), distance is unknown - int32_t position = buffer_.Size(); - // The first branch forward will have 0 in its pc-relative address (copied from label's - // position). It will be the terminator of the list of forward-reaching branches. - uint32_t prev = label->position_; - Auipc(AT, prev >> 16); - Jic(AT, prev); - // Now make the link object point to the location of this branch - // (this forms a linked list of branches preceding this label) - label->LinkTo(position); +uint32_t Mips64Assembler::GetLabelLocation(Mips64Label* label) const { + CHECK(label->IsBound()); + uint32_t target = label->Position(); + if (label->prev_branch_id_plus_one_) { + // Get label location based on the branch preceding it. + uint32_t branch_id = label->prev_branch_id_plus_one_ - 1; + const Branch* branch = GetBranch(branch_id); + target += branch->GetEndLocation(); + } + return target; +} + +uint32_t Mips64Assembler::GetAdjustedPosition(uint32_t old_position) { + // We can reconstruct the adjustment by going through all the branches from the beginning + // up to the old_position. Since we expect AdjustedPosition() to be called in a loop + // with increasing old_position, we can use the data from last AdjustedPosition() to + // continue where we left off and the whole loop should be O(m+n) where m is the number + // of positions to adjust and n is the number of branches. + if (old_position < last_old_position_) { + last_position_adjustment_ = 0; + last_old_position_ = 0; + last_branch_id_ = 0; + } + while (last_branch_id_ != branches_.size()) { + const Branch* branch = GetBranch(last_branch_id_); + if (branch->GetLocation() >= old_position + last_position_adjustment_) { + break; + } + last_position_adjustment_ += branch->GetSize() - branch->GetOldSize(); + ++last_branch_id_; + } + last_old_position_ = old_position; + return old_position + last_position_adjustment_; +} + +void Mips64Assembler::FinalizeLabeledBranch(Mips64Label* label) { + uint32_t length = branches_.back().GetLength(); + if (!label->IsBound()) { + // Branch forward (to a following label), distance is unknown. + // The first branch forward will contain 0, serving as the terminator of + // the list of forward-reaching branches. + Emit(label->position_); + length--; + // Now make the label object point to this branch + // (this forms a linked list of branches preceding this label). + uint32_t branch_id = branches_.size() - 1; + label->LinkTo(branch_id); + } + // Reserve space for the branch. + while (length--) { + Nop(); } } -void Mips64Assembler::Jalr(Label* label, GpuRegister indirect_reg) { - if (label->IsBound()) { - // Branch backwards (to a preceding label), distance is known - uint32_t offset = label->Position() - buffer_.Size(); - CHECK_LE(static_cast<int32_t>(offset), 0); - offset += (offset & 0x8000) << 1; // account for sign extension in daddiu - Auipc(indirect_reg, offset >> 16); - Daddiu(indirect_reg, indirect_reg, offset); - Jialc(indirect_reg, 0); - } else { - // Branch forward (to a following label), distance is unknown - int32_t position = buffer_.Size(); - // The first branch forward will have 0 in its pc-relative address (copied from label's - // position). It will be the terminator of the list of forward-reaching branches. - uint32_t prev = label->position_; - Auipc(indirect_reg, prev >> 16); - Daddiu(indirect_reg, indirect_reg, prev); - Jialc(indirect_reg, 0); - // Now make the link object point to the location of this branch - // (this forms a linked list of branches preceding this label) - label->LinkTo(position); +void Mips64Assembler::Buncond(Mips64Label* label) { + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::Bcond(Mips64Label* label, + BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs) { + // If lhs = rhs, this can be a NOP. + if (Branch::IsNop(condition, lhs, rhs)) { + return; + } + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::Call(Mips64Label* label, GpuRegister indirect_reg) { + uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; + branches_.emplace_back(buffer_.Size(), target, indirect_reg); + FinalizeLabeledBranch(label); +} + +void Mips64Assembler::PromoteBranches() { + // Promote short branches to long as necessary. + bool changed; + do { + changed = false; + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + uint32_t delta = branch.PromoteIfNeeded(); + // If this branch has been promoted and needs to expand in size, + // relocate all branches by the expansion size. + if (delta) { + changed = true; + uint32_t expand_location = branch.GetLocation(); + for (auto& branch2 : branches_) { + branch2.Relocate(expand_location, delta); + } + } + } + } while (changed); + + // Account for branch expansion by resizing the code buffer + // and moving the code in it to its final location. + size_t branch_count = branches_.size(); + if (branch_count > 0) { + // Resize. + Branch& last_branch = branches_[branch_count - 1]; + uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation(); + uint32_t old_size = buffer_.Size(); + buffer_.Resize(old_size + size_delta); + // Move the code residing between branch placeholders. + uint32_t end = old_size; + for (size_t i = branch_count; i > 0; ) { + Branch& branch = branches_[--i]; + uint32_t size = end - branch.GetOldEndLocation(); + buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size); + end = branch.GetOldLocation(); + } + } +} + +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. +const Mips64Assembler::Branch::BranchInfo Mips64Assembler::Branch::branch_info_[] = { + // Short branches. + { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kUncondBranch + { 2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 }, // kCondBranch + // Exception: kOffset23 for beqzc/bnezc + { 2, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kCall + // Long branches. + { 2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongUncondBranch + { 3, 1, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCondBranch + { 3, 0, 0, Mips64Assembler::Branch::kOffset32, 0 }, // kLongCall +}; + +// Note: make sure branch_info_[] and EmitBranch() are kept synchronized. +void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { + CHECK(overwriting_); + overwrite_location_ = branch->GetLocation(); + uint32_t offset = branch->GetOffset(); + BranchCondition condition = branch->GetCondition(); + GpuRegister lhs = branch->GetLeftRegister(); + GpuRegister rhs = branch->GetRightRegister(); + switch (branch->GetType()) { + // Short branches. + case Branch::kUncondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Bc(offset); + break; + case Branch::kCondBranch: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + EmitBcondc(condition, lhs, rhs, offset); + Nop(); // TODO: improve by filling the forbidden slot. + break; + case Branch::kCall: + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Addiupc(lhs, offset); + Jialc(lhs, 0); + break; + + // Long branches. + case Branch::kLongUncondBranch: + offset += (offset & 0x8000) << 1; // Account for sign extension in jic. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Jic(AT, Low16Bits(offset)); + break; + case Branch::kLongCondBranch: + EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2); + offset += (offset & 0x8000) << 1; // Account for sign extension in jic. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(AT, High16Bits(offset)); + Jic(AT, Low16Bits(offset)); + break; + case Branch::kLongCall: + offset += (offset & 0x8000) << 1; // Account for sign extension in daddiu. + CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); + Auipc(lhs, High16Bits(offset)); + Daddiu(lhs, lhs, Low16Bits(offset)); + Jialc(lhs, 0); + break; } + CHECK_EQ(overwrite_location_, branch->GetEndLocation()); + CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize)); } -void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Label* label) { - Bgec(rs, rt, 2); - B(label); +void Mips64Assembler::Bc(Mips64Label* label) { + Buncond(label); } -void Mips64Assembler::Bltzc(GpuRegister rt, Label* label) { - Bgezc(rt, 2); - B(label); +void Mips64Assembler::Jialc(Mips64Label* label, GpuRegister indirect_reg) { + Call(label, indirect_reg); } -void Mips64Assembler::Bgtzc(GpuRegister rt, Label* label) { - Blezc(rt, 2); - B(label); +void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLT, rs, rt); } -void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Label* label) { - Bltc(rs, rt, 2); - B(label); +void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLTZ, rt); } -void Mips64Assembler::Bgezc(GpuRegister rt, Label* label) { - Bltzc(rt, 2); - B(label); +void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGTZ, rt); } -void Mips64Assembler::Blezc(GpuRegister rt, Label* label) { - Bgtzc(rt, 2); - B(label); +void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGE, rs, rt); } -void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Label* label) { - Bgeuc(rs, rt, 2); - B(label); +void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGEZ, rt); } -void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Label* label) { - Bltuc(rs, rt, 2); - B(label); +void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLEZ, rt); } -void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Label* label) { - Bnec(rs, rt, 2); - B(label); +void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondLTU, rs, rt); } -void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Label* label) { - Beqc(rs, rt, 2); - B(label); +void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondGEU, rs, rt); } -void Mips64Assembler::Beqzc(GpuRegister rs, Label* label) { - Bnezc(rs, 2); - B(label); +void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondEQ, rs, rt); } -void Mips64Assembler::Bnezc(GpuRegister rs, Label* label) { - Beqzc(rs, 2); - B(label); +void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label) { + Bcond(label, kCondNE, rs, rt); +} + +void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label) { + Bcond(label, kCondEQZ, rs); +} + +void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) { + Bcond(label, kCondNEZ, rs); } void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, @@ -1256,6 +1797,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) { CHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK(!overwriting_); // Increase frame to required size. IncreaseFrameSize(frame_size); @@ -1298,6 +1840,7 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void Mips64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + DCHECK(!overwriting_); cfi_.RememberState(); // Pop callee saves and return address @@ -1316,6 +1859,7 @@ void Mips64Assembler::RemoveFrame(size_t frame_size, // Then jump to the return address. Jr(RA); + Nop(); // The CFI should be restored for any code that follows the exit block. cfi_.RestoreState(); @@ -1324,12 +1868,14 @@ void Mips64Assembler::RemoveFrame(size_t frame_size, void Mips64Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kFramePointerSize); + DCHECK(!overwriting_); Daddiu64(SP, SP, static_cast<int32_t>(-adjust)); cfi_.AdjustCFAOffset(adjust); } void Mips64Assembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kFramePointerSize); + DCHECK(!overwriting_); Daddiu64(SP, SP, static_cast<int32_t>(adjust)); cfi_.AdjustCFAOffset(-adjust); } @@ -1379,17 +1925,7 @@ void Mips64Assembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, StoreToOffset(kStoreWord, scratch.AsGpuRegister(), SP, dest.Int32Value()); } -void Mips64Assembler::StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, - ManagedRegister mscratch) { - Mips64ManagedRegister scratch = mscratch.AsMips64(); - CHECK(scratch.IsGpuRegister()) << scratch; - // TODO: it's unclear wether 32 or 64 bits need to be stored (Arm64 and x86/x64 disagree?). - // Is this function even referenced anywhere else in the code? - LoadConst32(scratch.AsGpuRegister(), imm); - StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, dest.Int32Value()); -} - -void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, +void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); @@ -1398,7 +1934,7 @@ void Mips64Assembler::StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), S1, thr_offs.Int32Value()); } -void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<8> thr_offs) { +void Mips64Assembler::StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) { StoreToOffset(kStoreDoubleword, SP, S1, thr_offs.Int32Value()); } @@ -1415,7 +1951,9 @@ void Mips64Assembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) return EmitLoad(mdest, SP, src.Int32Value(), size); } -void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) { +void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> src, + size_t size) { return EmitLoad(mdest, S1, src.Int32Value(), size); } @@ -1449,18 +1987,20 @@ void Mips64Assembler::LoadRawPtr(ManagedRegister mdest, ManagedRegister base, } void Mips64Assembler::LoadRawPtrFromThread64(ManagedRegister mdest, - ThreadOffset<8> offs) { + ThreadOffset<kMipsDoublewordSize> offs) { Mips64ManagedRegister dest = mdest.AsMips64(); CHECK(dest.IsGpuRegister()); LoadFromOffset(kLoadDoubleword, dest.AsGpuRegister(), S1, offs.Int32Value()); } -void Mips64Assembler::SignExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no sign extension necessary for mips"; +void Mips64Assembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No sign extension necessary for MIPS64"; } -void Mips64Assembler::ZeroExtend(ManagedRegister /*mreg*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no zero extension necessary for mips"; +void Mips64Assembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No zero extension necessary for MIPS64"; } void Mips64Assembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) { @@ -1492,7 +2032,7 @@ void Mips64Assembler::CopyRef(FrameOffset dest, FrameOffset src, } void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, - ThreadOffset<8> thr_offs, + ThreadOffset<kMipsDoublewordSize> thr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; @@ -1500,7 +2040,7 @@ void Mips64Assembler::CopyRawPtrFromThread64(FrameOffset fr_offs, StoreToOffset(kStoreDoubleword, scratch.AsGpuRegister(), SP, fr_offs.Int32Value()); } -void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<8> thr_offs, +void Mips64Assembler::CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) { Mips64ManagedRegister scratch = mscratch.AsMips64(); @@ -1561,9 +2101,12 @@ void Mips64Assembler::Copy(ManagedRegister dest_base, Offset dest_offset, FrameO } } -void Mips64Assembler::Copy(FrameOffset /*dest*/, FrameOffset /*src_base*/, Offset /*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED, + FrameOffset src_base ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset, @@ -1584,15 +2127,18 @@ void Mips64Assembler::Copy(ManagedRegister dest, Offset dest_offset, } } -void Mips64Assembler::Copy(FrameOffset /*dest*/, Offset /*dest_offset*/, FrameOffset /*src*/, Offset -/*src_offset*/, - ManagedRegister /*mscratch*/, size_t /*size*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::Copy(FrameOffset dest ATTRIBUTE_UNUSED, + Offset dest_offset ATTRIBUTE_UNUSED, + FrameOffset src ATTRIBUTE_UNUSED, + Offset src_offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED, + size_t size ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } -void Mips64Assembler::MemoryBarrier(ManagedRegister) { +void Mips64Assembler::MemoryBarrier(ManagedRegister mreg ATTRIBUTE_UNUSED) { // TODO: sync? - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, @@ -1604,7 +2150,7 @@ void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg, CHECK(in_reg.IsNoRegister() || in_reg.IsGpuRegister()) << in_reg; CHECK(out_reg.IsGpuRegister()) << out_reg; if (null_allowed) { - Label null_arg; + Mips64Label null_arg; // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is // the address in the handle scope holding the reference. // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) @@ -1631,7 +2177,7 @@ void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off, Mips64ManagedRegister scratch = mscratch.AsMips64(); CHECK(scratch.IsGpuRegister()) << scratch; if (null_allowed) { - Label null_arg; + Mips64Label null_arg; LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP, handle_scope_offset.Int32Value()); // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is @@ -1653,7 +2199,7 @@ void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, Mips64ManagedRegister in_reg = min_reg.AsMips64(); CHECK(out_reg.IsGpuRegister()) << out_reg; CHECK(in_reg.IsGpuRegister()) << in_reg; - Label null_arg; + Mips64Label null_arg; if (!out_reg.Equals(in_reg)) { LoadConst32(out_reg.AsGpuRegister(), 0); } @@ -1663,11 +2209,13 @@ void Mips64Assembler::LoadReferenceFromHandleScope(ManagedRegister mout_reg, Bind(&null_arg); } -void Mips64Assembler::VerifyObject(ManagedRegister /*src*/, bool /*could_be_null*/) { +void Mips64Assembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED, + bool could_be_null ATTRIBUTE_UNUSED) { // TODO: not validating references } -void Mips64Assembler::VerifyObject(FrameOffset /*src*/, bool /*could_be_null*/) { +void Mips64Assembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED, + bool could_be_null ATTRIBUTE_UNUSED) { // TODO: not validating references } @@ -1679,6 +2227,7 @@ void Mips64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), base.AsGpuRegister(), offset.Int32Value()); Jalr(scratch.AsGpuRegister()); + Nop(); // TODO: place reference map on call } @@ -1691,11 +2240,13 @@ void Mips64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscr LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), scratch.AsGpuRegister(), offset.Int32Value()); Jalr(scratch.AsGpuRegister()); + Nop(); // TODO: place reference map on call } -void Mips64Assembler::CallFromThread64(ThreadOffset<8> /*offset*/, ManagedRegister /*mscratch*/) { - UNIMPLEMENTED(FATAL) << "no mips64 implementation"; +void Mips64Assembler::CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset ATTRIBUTE_UNUSED, + ManagedRegister mscratch ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "No MIPS64 implementation"; } void Mips64Assembler::GetCurrentThread(ManagedRegister tr) { @@ -1703,37 +2254,39 @@ void Mips64Assembler::GetCurrentThread(ManagedRegister tr) { } void Mips64Assembler::GetCurrentThread(FrameOffset offset, - ManagedRegister /*mscratch*/) { + ManagedRegister mscratch ATTRIBUTE_UNUSED) { StoreToOffset(kStoreDoubleword, S1, SP, offset.Int32Value()); } void Mips64Assembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) { Mips64ManagedRegister scratch = mscratch.AsMips64(); - Mips64ExceptionSlowPath* slow = new Mips64ExceptionSlowPath(scratch, stack_adjust); - buffer_.EnqueueSlowPath(slow); - LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(), - S1, Thread::ExceptionOffset<8>().Int32Value()); - Bnezc(scratch.AsGpuRegister(), slow->Entry()); -} - -void Mips64ExceptionSlowPath::Emit(Assembler* sasm) { - Mips64Assembler* sp_asm = down_cast<Mips64Assembler*>(sasm); -#define __ sp_asm-> - __ Bind(&entry_); - if (stack_adjust_ != 0) { // Fix up the frame. - __ DecreaseFrameSize(stack_adjust_); + exception_blocks_.emplace_back(scratch, stack_adjust); + LoadFromOffset(kLoadDoubleword, + scratch.AsGpuRegister(), + S1, + Thread::ExceptionOffset<kMipsDoublewordSize>().Int32Value()); + Bnezc(scratch.AsGpuRegister(), exception_blocks_.back().Entry()); +} + +void Mips64Assembler::EmitExceptionPoll(Mips64ExceptionSlowPath* exception) { + Bind(exception->Entry()); + if (exception->stack_adjust_ != 0) { // Fix up the frame. + DecreaseFrameSize(exception->stack_adjust_); } - // Pass exception object as argument - // Don't care about preserving A0 as this call won't return - __ Move(A0, scratch_.AsGpuRegister()); + // Pass exception object as argument. + // Don't care about preserving A0 as this call won't return. + CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); + Move(A0, exception->scratch_.AsGpuRegister()); // Set up call to Thread::Current()->pDeliverException - __ LoadFromOffset(kLoadDoubleword, T9, S1, - QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()); - // TODO: check T9 usage - __ Jr(T9); + LoadFromOffset(kLoadDoubleword, + T9, + S1, + QUICK_ENTRYPOINT_OFFSET(kMipsDoublewordSize, pDeliverException).Int32Value()); + Jr(T9); + Nop(); + // Call never returns - __ Break(); -#undef __ + Break(); } } // namespace mips64 diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 42962bca20..57fc19a6e9 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -17,18 +17,22 @@ #ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ #define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ +#include <utility> #include <vector> #include "base/macros.h" #include "constants_mips64.h" #include "globals.h" #include "managed_register_mips64.h" -#include "utils/assembler.h" #include "offsets.h" +#include "utils/assembler.h" +#include "utils/label.h" namespace art { namespace mips64 { +static constexpr size_t kMipsDoublewordSize = 8; + enum LoadOperandType { kLoadSignedByte, kLoadUnsignedByte, @@ -60,10 +64,57 @@ enum FPClassMaskType { kPositiveZero = 0x200, }; +class Mips64Label : public Label { + public: + Mips64Label() : prev_branch_id_plus_one_(0) {} + + Mips64Label(Mips64Label&& src) + : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {} + + private: + uint32_t prev_branch_id_plus_one_; // To get distance from preceding branch, if any. + + friend class Mips64Assembler; + DISALLOW_COPY_AND_ASSIGN(Mips64Label); +}; + +// Slowpath entered when Thread::Current()->_exception is non-null. +class Mips64ExceptionSlowPath { + public: + explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) + : scratch_(scratch), stack_adjust_(stack_adjust) {} + + Mips64ExceptionSlowPath(Mips64ExceptionSlowPath&& src) + : scratch_(src.scratch_), + stack_adjust_(src.stack_adjust_), + exception_entry_(std::move(src.exception_entry_)) {} + + private: + Mips64Label* Entry() { return &exception_entry_; } + const Mips64ManagedRegister scratch_; + const size_t stack_adjust_; + Mips64Label exception_entry_; + + friend class Mips64Assembler; + DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath); +}; + class Mips64Assembler FINAL : public Assembler { public: - Mips64Assembler() {} - virtual ~Mips64Assembler() {} + Mips64Assembler() + : overwriting_(false), + overwrite_location_(0), + last_position_adjustment_(0), + last_old_position_(0), + last_branch_id_(0) { + cfi().DelayEmittingAdvancePCs(); + } + + virtual ~Mips64Assembler() { + for (auto& branch : branches_) { + CHECK(branch.IsResolved()); + } + } // Emit Machine Instructions. void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt); @@ -156,14 +207,12 @@ class Mips64Assembler FINAL : public Assembler { void Dclz(GpuRegister rd, GpuRegister rs); void Dclo(GpuRegister rd, GpuRegister rs); - void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16); - void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16); - void J(uint32_t addr26); - void Jal(uint32_t addr26); void Jalr(GpuRegister rd, GpuRegister rs); void Jalr(GpuRegister rs); void Jr(GpuRegister rs); void Auipc(GpuRegister rs, uint16_t imm16); + void Addiupc(GpuRegister rs, uint32_t imm19); + void Bc(uint32_t imm26); void Jic(GpuRegister rt, uint16_t imm16); void Jialc(GpuRegister rt, uint16_t imm16); void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16); @@ -240,32 +289,34 @@ class Mips64Assembler FINAL : public Assembler { void Clear(GpuRegister rd); void Not(GpuRegister rd, GpuRegister rs); - // Higher level composite instructions + // Higher level composite instructions. void LoadConst32(GpuRegister rd, int32_t value); void LoadConst64(GpuRegister rd, int64_t value); // MIPS64 - void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp = AT); void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT); // MIPS64 - void Bind(Label* label) OVERRIDE; - void Jump(Label* label) OVERRIDE { - B(label); + void Bind(Label* label) OVERRIDE { + Bind(down_cast<Mips64Label*>(label)); + } + void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE { + UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS64"; } - void B(Label* label); - void Jalr(Label* label, GpuRegister indirect_reg = RA); - // TODO: implement common for R6 and non-R6 interface for conditional branches? - void Bltc(GpuRegister rs, GpuRegister rt, Label* label); - void Bltzc(GpuRegister rt, Label* label); - void Bgtzc(GpuRegister rt, Label* label); - void Bgec(GpuRegister rs, GpuRegister rt, Label* label); - void Bgezc(GpuRegister rt, Label* label); - void Blezc(GpuRegister rt, Label* label); - void Bltuc(GpuRegister rs, GpuRegister rt, Label* label); - void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label); - void Beqc(GpuRegister rs, GpuRegister rt, Label* label); - void Bnec(GpuRegister rs, GpuRegister rt, Label* label); - void Beqzc(GpuRegister rs, Label* label); - void Bnezc(GpuRegister rs, Label* label); + + void Bind(Mips64Label* label); + void Bc(Mips64Label* label); + void Jialc(Mips64Label* label, GpuRegister indirect_reg); + void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bltzc(GpuRegister rt, Mips64Label* label); + void Bgtzc(GpuRegister rt, Mips64Label* label); + void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bgezc(GpuRegister rt, Mips64Label* label); + void Blezc(GpuRegister rt, Mips64Label* label); + void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label); + void Beqzc(GpuRegister rs, Mips64Label* label); + void Bnezc(GpuRegister rs, Mips64Label* label); void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); @@ -277,43 +328,42 @@ class Mips64Assembler FINAL : public Assembler { void Emit(uint32_t value); // - // Overridden common assembler high-level functionality + // Overridden common assembler high-level functionality. // - // Emit code that will create an activation on the stack + // Emit code that will create an activation on the stack. void BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; - // Emit code that will remove an activation from the stack + // Emit code that will remove an activation from the stack. void RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) OVERRIDE; void IncreaseFrameSize(size_t adjust) OVERRIDE; void DecreaseFrameSize(size_t adjust) OVERRIDE; - // Store routines + // Store routines. void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE; void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE; void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE; void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE; - void StoreImmediateToThread64(ThreadOffset<8> dest, uint32_t imm, - ManagedRegister mscratch) OVERRIDE; - - void StoreStackOffsetToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, + void StoreStackOffsetToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) OVERRIDE; - void StoreStackPointerToThread64(ThreadOffset<8> thr_offs) OVERRIDE; + void StoreStackPointerToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs) OVERRIDE; void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off, ManagedRegister mscratch) OVERRIDE; - // Load routines + // Load routines. void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE; - void LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> src, size_t size) OVERRIDE; + void LoadFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> src, + size_t size) OVERRIDE; void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; @@ -322,15 +372,16 @@ class Mips64Assembler FINAL : public Assembler { void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE; - void LoadRawPtrFromThread64(ManagedRegister mdest, ThreadOffset<8> offs) OVERRIDE; + void LoadRawPtrFromThread64(ManagedRegister mdest, + ThreadOffset<kMipsDoublewordSize> offs) OVERRIDE; - // Copying routines + // Copying routines. void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE; - void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<8> thr_offs, + void CopyRawPtrFromThread64(FrameOffset fr_offs, ThreadOffset<kMipsDoublewordSize> thr_offs, ManagedRegister mscratch) OVERRIDE; - void CopyRawPtrToThread64(ThreadOffset<8> thr_offs, FrameOffset fr_offs, + void CopyRawPtrToThread64(ThreadOffset<kMipsDoublewordSize> thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) OVERRIDE; void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE; @@ -354,13 +405,13 @@ class Mips64Assembler FINAL : public Assembler { void MemoryBarrier(ManagedRegister) OVERRIDE; - // Sign extension + // Sign extension. void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; - // Zero extension + // Zero extension. void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; - // Exploit fast access in managed code to Thread::Current() + // Exploit fast access in managed code to Thread::Current(). void GetCurrentThread(ManagedRegister tr) OVERRIDE; void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE; @@ -376,7 +427,7 @@ class Mips64Assembler FINAL : public Assembler { void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister mscratch, bool null_allowed) OVERRIDE; - // src holds a handle scope entry (Object**) load this into dst + // src holds a handle scope entry (Object**) load this into dst. void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; // Heap::VerifyObject on src. In some cases (such as a reference to this) we @@ -384,37 +435,253 @@ class Mips64Assembler FINAL : public Assembler { void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; - // Call to address held at [base+offset] + // Call to address held at [base+offset]. void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE; void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE; - void CallFromThread64(ThreadOffset<8> offset, ManagedRegister mscratch) OVERRIDE; + void CallFromThread64(ThreadOffset<kMipsDoublewordSize> offset, + ManagedRegister mscratch) OVERRIDE; // Generate code to check if Thread::Current()->exception_ is non-null // and branch to a ExceptionSlowPath if it is. void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE; + // Emit slow paths queued during assembly and promote short branches to long if needed. + void FinalizeCode() OVERRIDE; + + // Emit branches and finalize all instructions. + void FinalizeInstructions(const MemoryRegion& region); + + // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64, + // must be used instead of Mips64Label::GetPosition()). + uint32_t GetLabelLocation(Mips64Label* label) const; + + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + uint32_t GetAdjustedPosition(uint32_t old_position); + + enum BranchCondition { + kCondLT, + kCondGE, + kCondLE, + kCondGT, + kCondLTZ, + kCondGEZ, + kCondLEZ, + kCondGTZ, + kCondEQ, + kCondNE, + kCondEQZ, + kCondNEZ, + kCondLTU, + kCondGEU, + kUncond, + }; + friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs); + private: + class Branch { + public: + enum Type { + // Short branches. + kUncondBranch, + kCondBranch, + kCall, + // Long branches. + kLongUncondBranch, + kLongCondBranch, + kLongCall, + }; + + // Bit sizes of offsets defined as enums to minimize chance of typos. + enum OffsetBits { + kOffset16 = 16, + kOffset18 = 18, + kOffset21 = 21, + kOffset23 = 23, + kOffset28 = 28, + kOffset32 = 32, + }; + + static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_ + static constexpr int32_t kMaxBranchLength = 32; + static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t); + + struct BranchInfo { + // Branch length as a number of 4-byte-long instructions. + uint32_t length; + // Ordinal number (0-based) of the first (or the only) instruction that contains the branch's + // PC-relative offset (or its most significant 16-bit half, which goes first). + uint32_t instr_offset; + // Different MIPS instructions with PC-relative offsets apply said offsets to slightly + // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte + // instructions) from the instruction containing the offset. + uint32_t pc_org; + // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch is + // an exception: use kOffset23 for beqzc/bnezc). + OffsetBits offset_size; + // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift + // count. + int offset_shift; + }; + static const BranchInfo branch_info_[/* Type */]; + + // Unconditional branch. + Branch(uint32_t location, uint32_t target); + // Conditional branch. + Branch(uint32_t location, + uint32_t target, + BranchCondition condition, + GpuRegister lhs_reg, + GpuRegister rhs_reg = ZERO); + // Call (branch and link) that stores the target address in a given register (i.e. T9). + Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg); + + // Some conditional branches with lhs = rhs are effectively NOPs, while some + // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs. + // So, we need a way to identify such branches in order to emit no instructions for them + // or change them to unconditional. + static bool IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs); + static bool IsUncond(BranchCondition condition, GpuRegister lhs, GpuRegister rhs); + + static BranchCondition OppositeCondition(BranchCondition cond); + + Type GetType() const; + BranchCondition GetCondition() const; + GpuRegister GetLeftRegister() const; + GpuRegister GetRightRegister() const; + uint32_t GetTarget() const; + uint32_t GetLocation() const; + uint32_t GetOldLocation() const; + uint32_t GetLength() const; + uint32_t GetOldLength() const; + uint32_t GetSize() const; + uint32_t GetOldSize() const; + uint32_t GetEndLocation() const; + uint32_t GetOldEndLocation() const; + bool IsLong() const; + bool IsResolved() const; + + // Returns the bit size of the signed offset that the branch instruction can handle. + OffsetBits GetOffsetSize() const; + + // Calculates the distance between two byte locations in the assembler buffer and + // returns the number of bits needed to represent the distance as a signed integer. + // + // Branch instructions have signed offsets of 16, 19 (addiupc), 21 (beqzc/bnezc), + // and 26 (bc) bits, which are additionally shifted left 2 positions at run time. + // + // Composite branches (made of several instructions) with longer reach have 32-bit + // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first). + // The composite branches cover the range of PC + ~+/-2GB. The range is not end-to-end, + // however. Consider the following implementation of a long unconditional branch, for + // example: + // + // auipc at, offset_31_16 // at = pc + sign_extend(offset_31_16) << 16 + // jic at, offset_15_0 // pc = at + sign_extend(offset_15_0) + // + // Both of the above instructions take 16-bit signed offsets as immediate operands. + // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000 + // due to sign extension. This must be compensated for by incrementing offset_31_16 + // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is + // 0x7FFF, adding 1 will overflow the positive offset into the negative range. + // Therefore, the long branch range is something like from PC - 0x80000000 to + // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side. + // + // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special + // case with the addiu instruction and a 16 bit offset. + static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target); + + // Resolve a branch when the target is known. + void Resolve(uint32_t target); + + // Relocate a branch by a given delta if needed due to expansion of this or another + // branch at a given location by this delta (just changes location_ and target_). + void Relocate(uint32_t expand_location, uint32_t delta); + + // If the branch is short, changes its type to long. + void PromoteToLong(); + + // If necessary, updates the type by promoting a short branch to a long branch + // based on the branch location and target. Returns the amount (in bytes) by + // which the branch size has increased. + // max_short_distance caps the maximum distance between location_ and target_ + // that is allowed for short branches. This is for debugging/testing purposes. + // max_short_distance = 0 forces all short branches to become long. + // Use the implicit default argument when not debugging/testing. + uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max()); + + // Returns the location of the instruction(s) containing the offset. + uint32_t GetOffsetLocation() const; + + // Calculates and returns the offset ready for encoding in the branch instruction(s). + uint32_t GetOffset() const; + + private: + // Completes branch construction by determining and recording its type. + void InitializeType(bool is_call); + // Helper for the above. + void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); + + uint32_t old_location_; // Offset into assembler buffer in bytes. + uint32_t location_; // Offset into assembler buffer in bytes. + uint32_t target_; // Offset into assembler buffer in bytes. + + GpuRegister lhs_reg_; // Left-hand side register in conditional branches or + // indirect call register. + GpuRegister rhs_reg_; // Right-hand side register in conditional branches. + BranchCondition condition_; // Condition for conditional branches. + + Type type_; // Current type of the branch. + Type old_type_; // Initial type of the branch. + }; + friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs); + friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs); + void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct); void EmitRsd(int opcode, GpuRegister rs, GpuRegister rd, int shamt, int funct); void EmitRtd(int opcode, GpuRegister rt, GpuRegister rd, int shamt, int funct); void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm); void EmitI21(int opcode, GpuRegister rs, uint32_t imm21); - void EmitJ(int opcode, uint32_t addr26); + void EmitI26(int opcode, uint32_t imm26); void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct); void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm); + void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21); - DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); -}; + void Buncond(Mips64Label* label); + void Bcond(Mips64Label* label, + BranchCondition condition, + GpuRegister lhs, + GpuRegister rhs = ZERO); + void Call(Mips64Label* label, GpuRegister indirect_reg); + void FinalizeLabeledBranch(Mips64Label* label); -// Slowpath entered when Thread::Current()->_exception is non-null -class Mips64ExceptionSlowPath FINAL : public SlowPath { - public: - Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) - : scratch_(scratch), stack_adjust_(stack_adjust) {} - virtual void Emit(Assembler *sp_asm) OVERRIDE; - private: - const Mips64ManagedRegister scratch_; - const size_t stack_adjust_; + Branch* GetBranch(uint32_t branch_id); + const Branch* GetBranch(uint32_t branch_id) const; + + void PromoteBranches(); + void EmitBranch(Branch* branch); + void EmitBranches(); + void PatchCFI(); + + // Emits exception block. + void EmitExceptionPoll(Mips64ExceptionSlowPath* exception); + + // List of exception blocks to generate at the end of the code cache. + std::vector<Mips64ExceptionSlowPath> exception_blocks_; + + std::vector<Branch> branches_; + + // Whether appending instructions at the end of the buffer or overwriting the existing ones. + bool overwriting_; + // The current overwrite location. + uint32_t overwrite_location_; + + // Data for AdjustedPosition(), see the description there. + uint32_t last_position_adjustment_; + uint32_t last_old_position_; + uint32_t last_branch_id_; + + DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); }; } // namespace mips64 diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 4413906fd7..29a5a88316 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -24,6 +24,8 @@ #include "base/stl_util.h" #include "utils/assembler_test.h" +#define __ GetAssembler()-> + namespace art { struct MIPS64CpuRegisterCompare { @@ -48,8 +50,26 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return "mips64"; } + std::string GetAssemblerCmdName() OVERRIDE { + // We assemble and link for MIPS64R6. See GetAssemblerParameters() for details. + return "gcc"; + } + std::string GetAssemblerParameters() OVERRIDE { - return " --no-warn -march=mips64r6"; + // We assemble and link for MIPS64R6. The reason is that object files produced for MIPS64R6 + // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative + // branches in the .text section and so they require a relocation pass (there's a relocation + // section, .rela.text, that has the needed info to fix up the branches). + return " -march=mips64r6 -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib"; + } + + void Pad(std::vector<uint8_t>& data) OVERRIDE { + // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple + // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't + // pad, so, in order for two assembler outputs to match, we need to match the padding as well. + // NOP is encoded as four zero bytes on MIPS. + size_t pad_size = RoundUp(data.size(), 16u) - data.size(); + data.insert(data.end(), pad_size, 0); } std::string GetDisassembleParameters() OVERRIDE { @@ -182,6 +202,71 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return secondary_register_names_[reg]; } + std::string RepeatInsn(size_t count, const std::string& insn) { + std::string result; + for (; count != 0u; --count) { + result += insn; + } + return result; + } + + void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, + mips64::Mips64Label*), + std::string instr_name) { + mips64::Mips64Label label; + (Base::GetAssembler()->*f)(mips64::A0, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + (Base::GetAssembler()->*f)(mips64::A1, &label); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a1, 1b\n" + "nop\n"; + DriverStr(expected, instr_name); + } + + void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister, + mips64::GpuRegister, + mips64::Mips64Label*), + std::string instr_name) { + mips64::Mips64Label label; + (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label); + + std::string expected = + ".set noreorder\n" + + instr_name + " $a0, $a1, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + instr_name + " $a2, $a3, 1b\n" + "nop\n"; + DriverStr(expected, instr_name); + } + private: std::vector<mips64::GpuRegister*> registers_; std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_; @@ -194,7 +279,6 @@ TEST_F(AssemblerMIPS64Test, Toolchain) { EXPECT_TRUE(CheckTools()); } - /////////////////// // FP Operations // /////////////////// @@ -348,7 +432,203 @@ TEST_F(AssemblerMIPS64Test, CvtSW) { //////////////// TEST_F(AssemblerMIPS64Test, Jalr) { - DriverStr(RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); + DriverStr(".set noreorder\n" + + RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr"); +} + +TEST_F(AssemblerMIPS64Test, Jialc) { + mips64::Mips64Label label1, label2; + __ Jialc(&label1, mips64::T9); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Jialc(&label2, mips64::T9); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Jialc(&label1, mips64::T9); + + std::string expected = + ".set noreorder\n" + "lapc $t9, 1f\n" + "jialc $t9, 0\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + "lapc $t9, 2f\n" + "jialc $t9, 0\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + "lapc $t9, 1b\n" + "jialc $t9, 0\n"; + DriverStr(expected, "Jialc"); +} + +TEST_F(AssemblerMIPS64Test, LongJialc) { + mips64::Mips64Label label1, label2; + __ Jialc(&label1, mips64::T9); + constexpr uint32_t kAdduCount1 = (1u << 18) + 1; + for (uint32_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Jialc(&label2, mips64::T9); + constexpr uint32_t kAdduCount2 = (1u << 18) + 1; + for (uint32_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Jialc(&label1, mips64::T9); + + uint32_t offset_forward1 = 3 + kAdduCount1; // 3: account for auipc, daddiu and jic. + offset_forward1 <<= 2; + offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in daddiu. + + uint32_t offset_forward2 = 3 + kAdduCount2; // 3: account for auipc, daddiu and jic. + offset_forward2 <<= 2; + offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in daddiu. + + uint32_t offset_back = -(3 + kAdduCount2); // 3: account for auipc, daddiu and jic. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in daddiu. + + std::ostringstream oss; + oss << + ".set noreorder\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_forward1) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward1) << "\n" + "jialc $t9, 0\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "1:\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_forward2) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_forward2) << "\n" + "jialc $t9, 0\n" << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "2:\n" + "auipc $t9, 0x" << std::hex << High16Bits(offset_back) << "\n" + "daddiu $t9, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "jialc $t9, 0\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongJialc"); +} + +TEST_F(AssemblerMIPS64Test, Bc) { + mips64::Mips64Label label1, label2; + __ Bc(&label1); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label1); + __ Bc(&label2); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label2); + __ Bc(&label1); + + std::string expected = + ".set noreorder\n" + "bc 1f\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + "bc 2f\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "2:\n" + "bc 1b\n"; + DriverStr(expected, "Bc"); +} + +TEST_F(AssemblerMIPS64Test, Beqzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc"); +} + +TEST_F(AssemblerMIPS64Test, Bnezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc"); +} + +TEST_F(AssemblerMIPS64Test, Bltzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc"); +} + +TEST_F(AssemblerMIPS64Test, Bgezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc"); +} + +TEST_F(AssemblerMIPS64Test, Blezc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc"); +} + +TEST_F(AssemblerMIPS64Test, Bgtzc) { + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc"); +} + +TEST_F(AssemblerMIPS64Test, Beqc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc"); +} + +TEST_F(AssemblerMIPS64Test, Bnec) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec"); +} + +TEST_F(AssemblerMIPS64Test, Bltc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc"); +} + +TEST_F(AssemblerMIPS64Test, Bgec) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec"); +} + +TEST_F(AssemblerMIPS64Test, Bltuc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc"); +} + +TEST_F(AssemblerMIPS64Test, Bgeuc) { + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc"); +} + +TEST_F(AssemblerMIPS64Test, LongBeqc) { + mips64::Mips64Label label; + __ Beqc(mips64::A0, mips64::A1, &label); + constexpr uint32_t kAdduCount1 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr uint32_t kAdduCount2 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Beqc(mips64::A2, mips64::A3, &label); + + uint32_t offset_forward = 2 + kAdduCount1; // 2: account for auipc and jic. + offset_forward <<= 2; + offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic. + + uint32_t offset_back = -(kAdduCount2 + 1); // 1: account for bnec. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic. + + std::ostringstream oss; + oss << + ".set noreorder\n" + "bnec $a0, $a1, 1f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n" + "1:\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "2:\n" << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "bnec $a2, $a3, 3f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "3:\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongBeqc"); } ////////// diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 68cf6d9233..89c2a7cbdf 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -1231,6 +1231,7 @@ class Dex2Oat FINAL { // Handle and ClassLoader creation needs to come after Runtime::Create jobject class_loader = nullptr; + jobject class_path_class_loader = nullptr; Thread* self = Thread::Current(); if (!boot_image_option_.empty()) { @@ -1248,10 +1249,12 @@ class Dex2Oat FINAL { key_value_store_->Put(OatHeader::kClassPathKey, OatFile::EncodeDexFileDependencies(class_path_files)); - // Then the dex files we'll compile. Thus we'll resolve the class-path first. - class_path_files.insert(class_path_files.end(), dex_files_.begin(), dex_files_.end()); + class_path_class_loader = class_linker->CreatePathClassLoader(self, + class_path_files, + nullptr); - class_loader = class_linker->CreatePathClassLoader(self, class_path_files); + // Class path loader as parent so that we'll resolve there first. + class_loader = class_linker->CreatePathClassLoader(self, dex_files_, class_path_class_loader); } driver_.reset(new CompilerDriver(compiler_options_.get(), diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index cd83de6265..94eb82b054 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -2412,7 +2412,7 @@ static int DumpOatWithRuntime(Runtime* runtime, OatFile* oat_file, OatDumperOpti // Need a class loader. // Fake that we're a compiler. - jobject class_loader = class_linker->CreatePathClassLoader(self, class_path); + jobject class_loader = class_linker->CreatePathClassLoader(self, class_path, /*parent*/nullptr); // Use the class loader while dumping. StackHandleScope<1> scope(self); diff --git a/runtime/Android.mk b/runtime/Android.mk index 1fdffe3e17..571a2f5d64 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -60,6 +60,7 @@ LIBART_COMMON_SRC_FILES := \ gc/collector/concurrent_copying.cc \ gc/collector/garbage_collector.cc \ gc/collector/immune_region.cc \ + gc/collector/immune_spaces.cc \ gc/collector/mark_compact.cc \ gc/collector/mark_sweep.cc \ gc/collector/partial_mark_sweep.cc \ @@ -102,6 +103,7 @@ LIBART_COMMON_SRC_FILES := \ jit/jit.cc \ jit/jit_code_cache.cc \ jit/jit_instrumentation.cc \ + jit/offline_profiling_info.cc \ jit/profiling_info.cc \ lambda/art_lambda_method.cc \ lambda/box_table.cc \ diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc index afa48cde34..8746badf19 100644 --- a/runtime/arch/stub_test.cc +++ b/runtime/arch/stub_test.cc @@ -431,9 +431,9 @@ class StubTest : public CommonRuntimeTest { [referrer] "r"(referrer), [hidden] "r"(hidden) : "at", "v0", "v1", "t0", "t1", "t2", "t3", "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "t8", "t9", "k0", "k1", "fp", "ra", - "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "f13", - "f14", "f15", "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", "f24", "f25", "f26", - "f27", "f28", "f29", "f30", "f31", + "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9", "$f10", "$f11", + "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18", "$f19", "$f20", "$f21", "$f22", + "$f23", "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31", "memory"); // clobber. #elif defined(__x86_64__) && !defined(__APPLE__) #define PUSH(reg) "pushq " # reg "\n\t .cfi_adjust_cfa_offset 8\n\t" diff --git a/runtime/base/stl_util.h b/runtime/base/stl_util.h index 0949619640..324ab218d2 100644 --- a/runtime/base/stl_util.h +++ b/runtime/base/stl_util.h @@ -149,6 +149,13 @@ bool ContainsElement(const Container& container, const T& value, size_t start_po return it != container.end(); } +// const char* compare function suitable for std::map or std::set. +struct CStringLess { + bool operator()(const char* lhs, const char* rhs) const { + return strcmp(lhs, rhs) < 0; + } +}; + } // namespace art #endif // ART_RUNTIME_BASE_STL_UTIL_H_ diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index dde100125a..2dd2a83888 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -322,7 +322,8 @@ ClassLinker::ClassLinker(InternTable* intern_table) std::fill_n(find_array_class_cache_, kFindArrayCacheSize, GcRoot<mirror::Class>(nullptr)); } -void ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path) { +bool ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path, + std::string* error_msg) { VLOG(startup) << "ClassLinker::Init"; Thread* const self = Thread::Current(); @@ -477,9 +478,15 @@ void ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> b // Setup boot_class_path_ and register class_path now that we can use AllocObjectArray to create // DexCache instances. Needs to be after String, Field, Method arrays since AllocDexCache uses // these roots. - CHECK_NE(0U, boot_class_path.size()); + if (boot_class_path.empty()) { + *error_msg = "Boot classpath is empty."; + return false; + } for (auto& dex_file : boot_class_path) { - CHECK(dex_file.get() != nullptr); + if (dex_file.get() == nullptr) { + *error_msg = "Null dex file."; + return false; + } AppendToBootClassPath(self, *dex_file); opened_dex_files_.push_back(std::move(dex_file)); } @@ -660,6 +667,8 @@ void ClassLinker::InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> b FinishInit(self); VLOG(startup) << "ClassLinker::InitFromCompiler exiting"; + + return true; } void ClassLinker::FinishInit(Thread* self) { @@ -850,7 +859,7 @@ class SetInterpreterEntrypointArtMethodVisitor : public ArtMethodVisitor { DISALLOW_COPY_AND_ASSIGN(SetInterpreterEntrypointArtMethodVisitor); }; -void ClassLinker::InitFromImage() { +bool ClassLinker::InitFromImage(std::string* error_msg) { VLOG(startup) << "ClassLinker::InitFromImage entering"; CHECK(!init_done_); @@ -895,22 +904,32 @@ void ClassLinker::InitFromImage() { java_lang_Object->GetObjectSize(), VoidFunctor())); - CHECK_EQ(oat_file->GetOatHeader().GetDexFileCount(), - static_cast<uint32_t>(dex_caches->GetLength())); + if (oat_file->GetOatHeader().GetDexFileCount() != + static_cast<uint32_t>(dex_caches->GetLength())) { + *error_msg = "Dex cache count and dex file count mismatch while trying to initialize from " + "image"; + return false; + } for (int32_t i = 0; i < dex_caches->GetLength(); i++) { StackHandleScope<1> hs2(self); Handle<mirror::DexCache> dex_cache(hs2.NewHandle(dex_caches->Get(i))); const std::string& dex_file_location(dex_cache->GetLocation()->ToModifiedUtf8()); const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_file_location.c_str(), nullptr); - CHECK(oat_dex_file != nullptr) << oat_file->GetLocation() << " " << dex_file_location; - std::string error_msg; - std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error_msg); + if (oat_dex_file == nullptr) { + *error_msg = StringPrintf("Failed finding oat dex file for %s %s", + oat_file->GetLocation().c_str(), + dex_file_location.c_str()); + return false; + } + std::string inner_error_msg; + std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&inner_error_msg); if (dex_file == nullptr) { - LOG(FATAL) << "Failed to open dex file " << dex_file_location - << " from within oat file " << oat_file->GetLocation() - << " error '" << error_msg << "'"; - UNREACHABLE(); + *error_msg = StringPrintf("Failed to open dex file %s from within oat file %s error '%s'", + dex_file_location.c_str(), + oat_file->GetLocation().c_str(), + inner_error_msg.c_str()); + return false; } if (kSanityCheckObjects) { @@ -920,13 +939,22 @@ void ClassLinker::InitFromImage() { space); } - CHECK_EQ(dex_file->GetLocationChecksum(), oat_dex_file->GetDexFileLocationChecksum()); + if (dex_file->GetLocationChecksum() != oat_dex_file->GetDexFileLocationChecksum()) { + *error_msg = StringPrintf("Checksums do not match for %s: %x vs %x", + dex_file_location.c_str(), + dex_file->GetLocationChecksum(), + oat_dex_file->GetDexFileLocationChecksum()); + return false; + } AppendToBootClassPath(*dex_file.get(), dex_cache); opened_dex_files_.push_back(std::move(dex_file)); } - CHECK(ValidPointerSize(image_pointer_size_)) << image_pointer_size_; + if (!ValidPointerSize(image_pointer_size_)) { + *error_msg = StringPrintf("Invalid image pointer size: %zu", image_pointer_size_); + return false; + } // Set classes on AbstractMethod early so that IsMethod tests can be performed during the live // bitmap walk. @@ -934,7 +962,12 @@ void ClassLinker::InitFromImage() { // Only the Aot compiler supports having an image with a different pointer size than the // runtime. This happens on the host for compile 32 bit tests since we use a 64 bit libart // compiler. We may also use 32 bit dex2oat on a system with 64 bit apps. - CHECK_EQ(image_pointer_size_, sizeof(void*)); + if (image_pointer_size_ != sizeof(void*)) { + *error_msg = StringPrintf("Runtime must use current image pointer size: %zu vs %zu", + image_pointer_size_ , + sizeof(void*)); + return false; + } } if (kSanityCheckObjects) { @@ -987,6 +1020,8 @@ void ClassLinker::InitFromImage() { FinishInit(self); VLOG(startup) << "ClassLinker::InitFromImage exiting"; + + return true; } bool ClassLinker::ClassInClassTable(mirror::Class* klass) { @@ -6594,7 +6629,9 @@ bool ClassLinker::MayBeCalledWithDirectCodePointer(ArtMethod* m) { } } -jobject ClassLinker::CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files) { +jobject ClassLinker::CreatePathClassLoader(Thread* self, + std::vector<const DexFile*>& dex_files, + jobject parent_loader) { // SOAAlreadyRunnable is protected, and we need something to add a global reference. // We could move the jobject to the callers, but all call-sites do this... ScopedObjectAccessUnchecked soa(self); @@ -6625,8 +6662,8 @@ jobject ClassLinker::CreatePathClassLoader(Thread* self, std::vector<const DexFi for (const DexFile* dex_file : dex_files) { StackHandleScope<3> hs2(self); - // CreatePathClassLoader is only used by gtests. Index 0 of h_long_array is supposed to be the - // oat file but we can leave it null. + // CreatePathClassLoader is only used by gtests and dex2oat. Index 0 of h_long_array is + // supposed to be the oat file but we can leave it null. Handle<mirror::LongArray> h_long_array = hs2.NewHandle(mirror::LongArray::Alloc( self, kDexFileIndexStart + 1)); @@ -6672,9 +6709,10 @@ jobject ClassLinker::CreatePathClassLoader(Thread* self, std::vector<const DexFi mirror::Class::FindField(self, hs.NewHandle(h_path_class_loader->GetClass()), "parent", "Ljava/lang/ClassLoader;"); DCHECK(parent_field != nullptr); - mirror::Object* boot_cl = - soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader)->AllocObject(self); - parent_field->SetObject<false>(h_path_class_loader.Get(), boot_cl); + mirror::Object* parent = (parent_loader != nullptr) + ? soa.Decode<mirror::ClassLoader*>(parent_loader) + : soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader)->AllocObject(self); + parent_field->SetObject<false>(h_path_class_loader.Get(), parent); // Make it a global ref and return. ScopedLocalRef<jobject> local_ref( diff --git a/runtime/class_linker.h b/runtime/class_linker.h index a72b58602f..29aac312c1 100644 --- a/runtime/class_linker.h +++ b/runtime/class_linker.h @@ -115,12 +115,15 @@ class ClassLinker { ~ClassLinker(); // Initialize class linker by bootstraping from dex files. - void InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path) + bool InitWithoutImage(std::vector<std::unique_ptr<const DexFile>> boot_class_path, + std::string* error_msg) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_); // Initialize class linker from one or more images. - void InitFromImage() SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_); + bool InitFromImage(std::string* error_msg) + SHARED_REQUIRES(Locks::mutator_lock_) + REQUIRES(!dex_lock_); // Finds a class by its descriptor, loading it if necessary. // If class_loader is null, searches boot_class_path_. @@ -511,7 +514,10 @@ class ClassLinker { // Creates a GlobalRef PathClassLoader that can be used to load classes from the given dex files. // Note: the objects are not completely set up. Do not use this outside of tests and the compiler. - jobject CreatePathClassLoader(Thread* self, std::vector<const DexFile*>& dex_files) + // If parent_loader is null then we use the boot class loader. + jobject CreatePathClassLoader(Thread* self, + std::vector<const DexFile*>& dex_files, + jobject parent_loader) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!dex_lock_); diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc index b6b514177a..f705a50d55 100644 --- a/runtime/common_runtime_test.cc +++ b/runtime/common_runtime_test.cc @@ -553,7 +553,8 @@ jobject CommonRuntimeTest::LoadDex(const char* dex_name) { Thread* self = Thread::Current(); jobject class_loader = Runtime::Current()->GetClassLinker()->CreatePathClassLoader(self, - class_path); + class_path, + nullptr); self->SetClassLoaderOverride(class_loader); return class_loader; } diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc index 70096f5627..4163e2efdf 100644 --- a/runtime/dex_file.cc +++ b/runtime/dex_file.cc @@ -1870,10 +1870,10 @@ bool DexFile::ProcessAnnotationValue(Handle<mirror::Class> klass, const uint8_t* Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader())); ArtField* enum_field = Runtime::Current()->GetClassLinker()->ResolveField( klass->GetDexFile(), index, dex_cache, class_loader, true); - Handle<mirror::Class> field_class(hs.NewHandle(enum_field->GetDeclaringClass())); if (enum_field == nullptr) { return false; } else { + Handle<mirror::Class> field_class(hs.NewHandle(enum_field->GetDeclaringClass())); Runtime::Current()->GetClassLinker()->EnsureInitialized(self, field_class, true, true); element_object = enum_field->GetObject(field_class.Get()); set_object = true; diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index f4cf3ae260..bcfcb89e62 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -134,10 +134,10 @@ void ConcurrentCopying::BindBitmaps() { WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); // Mark all of the spaces we never collect as immune. for (const auto& space : heap_->GetContinuousSpaces()) { - if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect - || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) { + if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect || + space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) { CHECK(space->IsZygoteSpace() || space->IsImageSpace()); - CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space; + immune_spaces_.AddSpace(space); const char* bitmap_name = space->IsImageSpace() ? "cc image space bitmap" : "cc zygote space bitmap"; // TODO: try avoiding using bitmaps for image/zygote to save space. @@ -164,7 +164,7 @@ void ConcurrentCopying::InitializePhase() { << reinterpret_cast<void*>(region_space_->Limit()); } CheckEmptyMarkStack(); - immune_region_.Reset(); + immune_spaces_.Reset(); bytes_moved_.StoreRelaxed(0); objects_moved_.StoreRelaxed(0); if (GetCurrentIteration()->GetGcCause() == kGcCauseExplicit || @@ -177,7 +177,11 @@ void ConcurrentCopying::InitializePhase() { BindBitmaps(); if (kVerboseMode) { LOG(INFO) << "force_evacuate_all=" << force_evacuate_all_; - LOG(INFO) << "Immune region: " << immune_region_.Begin() << "-" << immune_region_.End(); + LOG(INFO) << "Largest immune region: " << immune_spaces_.GetLargestImmuneRegion().Begin() + << "-" << immune_spaces_.GetLargestImmuneRegion().End(); + for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) { + LOG(INFO) << "Immune space: " << *space; + } LOG(INFO) << "GC end of InitializePhase"; } } @@ -300,7 +304,7 @@ class ConcurrentCopyingImmuneSpaceObjVisitor { void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) SHARED_REQUIRES(Locks::heap_bitmap_lock_) { DCHECK(obj != nullptr); - DCHECK(collector_->immune_region_.ContainsObject(obj)); + DCHECK(collector_->immune_spaces_.ContainsObject(obj)); accounting::ContinuousSpaceBitmap* cc_bitmap = collector_->cc_heap_bitmap_->GetContinuousSpaceBitmap(obj); DCHECK(cc_bitmap != nullptr) @@ -383,15 +387,13 @@ void ConcurrentCopying::MarkingPhase() { } // Immune spaces. - for (auto& space : heap_->GetContinuousSpaces()) { - if (immune_region_.ContainsSpace(space)) { - DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); - accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap(); - ConcurrentCopyingImmuneSpaceObjVisitor visitor(this); - live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()), - reinterpret_cast<uintptr_t>(space->Limit()), - visitor); - } + for (auto& space : immune_spaces_.GetSpaces()) { + DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); + accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap(); + ConcurrentCopyingImmuneSpaceObjVisitor visitor(this); + live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()), + reinterpret_cast<uintptr_t>(space->Limit()), + visitor); } Thread* self = Thread::Current(); @@ -1078,7 +1080,7 @@ inline void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) { !IsInToSpace(to_ref->AsReference()->GetReferent<kWithoutReadBarrier>())))) { // Leave this Reference gray in the queue so that GetReferent() will trigger a read barrier. We // will change it to black or white later in ReferenceQueue::DequeuePendingReference(). - CHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref; + DCHECK(to_ref->AsReference()->IsEnqueued()) << "Left unenqueued ref gray " << to_ref; } else { // We may occasionally leave a Reference black or white in the queue if its referent happens to // be concurrently marked after the Scan() call above has enqueued the Reference, in which case @@ -1087,9 +1089,10 @@ inline void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) { if (kUseBakerReadBarrier) { if (region_space_->IsInToSpace(to_ref)) { // If to-space, change from gray to white. - bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), - ReadBarrier::WhitePtr()); - CHECK(success) << "Must succeed as we won the race."; + bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>( + ReadBarrier::GrayPtr(), + ReadBarrier::WhitePtr()); + DCHECK(success) << "Must succeed as we won the race."; DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr()); } else { // If non-moving space/unevac from space, change from gray @@ -1099,9 +1102,10 @@ inline void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) { // indicate non-moving objects that have been marked // through. Note we'd need to change from black to white // later (concurrently). - bool success = to_ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), - ReadBarrier::BlackPtr()); - CHECK(success) << "Must succeed as we won the race."; + bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>( + ReadBarrier::GrayPtr(), + ReadBarrier::BlackPtr()); + DCHECK(success) << "Must succeed as we won the race."; DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::BlackPtr()); } } @@ -1205,7 +1209,7 @@ void ConcurrentCopying::Sweep(bool swap_bitmaps) { for (const auto& space : GetHeap()->GetContinuousSpaces()) { if (space->IsContinuousMemMapAllocSpace()) { space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace(); - if (space == region_space_ || immune_region_.ContainsSpace(space)) { + if (space == region_space_ || immune_spaces_.ContainsSpace(space)) { continue; } TimingLogger::ScopedTiming split2( @@ -1225,9 +1229,6 @@ class ConcurrentCopyingClearBlackPtrsVisitor { public: explicit ConcurrentCopyingClearBlackPtrsVisitor(ConcurrentCopying* cc) : collector_(cc) {} -#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER - NO_RETURN -#endif void operator()(mirror::Object* obj) const SHARED_REQUIRES(Locks::mutator_lock_) SHARED_REQUIRES(Locks::heap_bitmap_lock_) { DCHECK(obj != nullptr); @@ -1507,8 +1508,8 @@ void ConcurrentCopying::LogFromSpaceRefHolder(mirror::Object* obj, MemberOffset } } else { // In a non-moving space. - if (immune_region_.ContainsObject(obj)) { - LOG(INFO) << "holder is in the image or the zygote space."; + if (immune_spaces_.ContainsObject(obj)) { + LOG(INFO) << "holder is in an immune image or the zygote space."; accounting::ContinuousSpaceBitmap* cc_bitmap = cc_heap_bitmap_->GetContinuousSpaceBitmap(obj); CHECK(cc_bitmap != nullptr) @@ -1519,7 +1520,7 @@ void ConcurrentCopying::LogFromSpaceRefHolder(mirror::Object* obj, MemberOffset LOG(INFO) << "holder is NOT marked in the bit map."; } } else { - LOG(INFO) << "holder is in a non-moving (or main) space."; + LOG(INFO) << "holder is in a non-immune, non-moving (or main) space."; accounting::ContinuousSpaceBitmap* mark_bitmap = heap_mark_bitmap_->GetContinuousSpaceBitmap(obj); accounting::LargeObjectBitmap* los_bitmap = @@ -1547,7 +1548,7 @@ void ConcurrentCopying::LogFromSpaceRefHolder(mirror::Object* obj, MemberOffset void ConcurrentCopying::AssertToSpaceInvariantInNonMovingSpace(mirror::Object* obj, mirror::Object* ref) { // In a non-moving spaces. Check that the ref is marked. - if (immune_region_.ContainsObject(ref)) { + if (immune_spaces_.ContainsObject(ref)) { accounting::ContinuousSpaceBitmap* cc_bitmap = cc_heap_bitmap_->GetContinuousSpaceBitmap(ref); CHECK(cc_bitmap != nullptr) @@ -1932,7 +1933,7 @@ mirror::Object* ConcurrentCopying::IsMarked(mirror::Object* from_ref) { } } else { // from_ref is in a non-moving space. - if (immune_region_.ContainsObject(from_ref)) { + if (immune_spaces_.ContainsObject(from_ref)) { accounting::ContinuousSpaceBitmap* cc_bitmap = cc_heap_bitmap_->GetContinuousSpaceBitmap(from_ref); DCHECK(cc_bitmap != nullptr) @@ -1986,7 +1987,7 @@ bool ConcurrentCopying::IsOnAllocStack(mirror::Object* ref) { mirror::Object* ConcurrentCopying::MarkNonMoving(mirror::Object* ref) { // ref is in a non-moving space (from_ref == to_ref). DCHECK(!region_space_->HasAddress(ref)) << ref; - if (immune_region_.ContainsObject(ref)) { + if (immune_spaces_.ContainsObject(ref)) { accounting::ContinuousSpaceBitmap* cc_bitmap = cc_heap_bitmap_->GetContinuousSpaceBitmap(ref); DCHECK(cc_bitmap != nullptr) diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index 27726e23c1..5d21c599e4 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -19,7 +19,7 @@ #include "barrier.h" #include "garbage_collector.h" -#include "immune_region.h" +#include "immune_spaces.h" #include "jni.h" #include "object_callbacks.h" #include "offsets.h" @@ -200,7 +200,7 @@ class ConcurrentCopying : public GarbageCollector { bool is_marking_; // True while marking is ongoing. bool is_active_; // True while the collection is ongoing. bool is_asserting_to_space_invariant_; // True while asserting the to-space invariant. - ImmuneRegion immune_region_; + ImmuneSpaces immune_spaces_; std::unique_ptr<accounting::HeapBitmap> cc_heap_bitmap_; std::vector<accounting::SpaceBitmap<kObjectAlignment>*> cc_bitmaps_; accounting::SpaceBitmap<kObjectAlignment>* region_space_bitmap_; diff --git a/runtime/gc/collector/immune_region.cc b/runtime/gc/collector/immune_region.cc index 3e1c944302..8a04c178b5 100644 --- a/runtime/gc/collector/immune_region.cc +++ b/runtime/gc/collector/immune_region.cc @@ -32,39 +32,6 @@ void ImmuneRegion::Reset() { SetEnd(nullptr); } -bool ImmuneRegion::AddContinuousSpace(space::ContinuousSpace* space) { - // Bind live to mark bitmap if necessary. - if (space->GetLiveBitmap() != space->GetMarkBitmap()) { - CHECK(space->IsContinuousMemMapAllocSpace()); - space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap(); - } - mirror::Object* space_begin = reinterpret_cast<mirror::Object*>(space->Begin()); - mirror::Object* space_limit = reinterpret_cast<mirror::Object*>(space->Limit()); - if (IsEmpty()) { - SetBegin(space_begin); - SetEnd(space_limit); - } else { - if (space_limit <= begin_) { // Space is before the immune region. - SetBegin(space_begin); - } else if (space_begin >= end_) { // Space is after the immune region. - SetEnd(space_limit); - } else { - return false; - } - } - return true; -} - -bool ImmuneRegion::ContainsSpace(const space::ContinuousSpace* space) const { - bool contains = - begin_ <= reinterpret_cast<mirror::Object*>(space->Begin()) && - end_ >= reinterpret_cast<mirror::Object*>(space->Limit()); - if (kIsDebugBuild && contains) { - // A bump pointer space shoult not be in the immune region. - DCHECK(space->GetType() != space::kSpaceTypeBumpPointerSpace); - } - return contains; -} } // namespace collector } // namespace gc diff --git a/runtime/gc/collector/immune_region.h b/runtime/gc/collector/immune_region.h index 3ead501046..b60426daf0 100644 --- a/runtime/gc/collector/immune_region.h +++ b/runtime/gc/collector/immune_region.h @@ -39,35 +39,34 @@ namespace collector { class ImmuneRegion { public: ImmuneRegion(); + void Reset(); - bool AddContinuousSpace(space::ContinuousSpace* space) - REQUIRES(Locks::heap_bitmap_lock_); - bool ContainsSpace(const space::ContinuousSpace* space) const; + // Returns true if an object is inside of the immune region (assumed to be marked). - bool ContainsObject(const mirror::Object* obj) const ALWAYS_INLINE { + ALWAYS_INLINE bool ContainsObject(const mirror::Object* obj) const { // Note: Relies on integer underflow behavior. return reinterpret_cast<uintptr_t>(obj) - reinterpret_cast<uintptr_t>(begin_) < size_; } + void SetBegin(mirror::Object* begin) { begin_ = begin; UpdateSize(); } + void SetEnd(mirror::Object* end) { end_ = end; UpdateSize(); } - mirror::Object* Begin() { + mirror::Object* Begin() const { return begin_; } - mirror::Object* End() { + + mirror::Object* End() const { return end_; } private: - bool IsEmpty() const { - return size_ == 0; - } void UpdateSize() { size_ = reinterpret_cast<uintptr_t>(end_) - reinterpret_cast<uintptr_t>(begin_); } diff --git a/runtime/gc/collector/immune_spaces.cc b/runtime/gc/collector/immune_spaces.cc new file mode 100644 index 0000000000..8f9a9e294f --- /dev/null +++ b/runtime/gc/collector/immune_spaces.cc @@ -0,0 +1,98 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "immune_spaces.h" + +#include "gc/space/space-inl.h" +#include "mirror/object.h" + +namespace art { +namespace gc { +namespace collector { + +void ImmuneSpaces::Reset() { + spaces_.clear(); + largest_immune_region_.Reset(); +} + +void ImmuneSpaces::CreateLargestImmuneRegion() { + uintptr_t best_begin = 0u; + uintptr_t best_end = 0u; + uintptr_t cur_begin = 0u; + uintptr_t cur_end = 0u; + // TODO: If the last space is an image space, we may include its oat file in the immune region. + // This could potentially hide heap corruption bugs if there is invalid pointers that point into + // the boot oat code + for (space::ContinuousSpace* space : GetSpaces()) { + uintptr_t space_begin = reinterpret_cast<uintptr_t>(space->Begin()); + uintptr_t space_end = reinterpret_cast<uintptr_t>(space->Limit()); + if (space->IsImageSpace()) { + // For the boot image, the boot oat file is always directly after. For app images it may not + // be if the app image was mapped at a random address. + space::ImageSpace* image_space = space->AsImageSpace(); + // Update the end to include the other non-heap sections. + space_end = RoundUp(reinterpret_cast<uintptr_t>(image_space->GetImageEnd()), kPageSize); + uintptr_t oat_begin = reinterpret_cast<uintptr_t>(image_space->GetOatFileBegin()); + uintptr_t oat_end = reinterpret_cast<uintptr_t>(image_space->GetOatFileEnd()); + if (space_end == oat_begin) { + DCHECK_GE(oat_end, oat_begin); + space_end = oat_end; + } + } + if (cur_begin == 0u) { + cur_begin = space_begin; + cur_end = space_end; + } else if (cur_end == space_begin) { + // Extend current region. + cur_end = space_end; + } else { + // Reset. + cur_begin = 0; + cur_end = 0; + } + if (cur_end - cur_begin > best_end - best_begin) { + // Improvement, update the best range. + best_begin = cur_begin; + best_end = cur_end; + } + } + largest_immune_region_.SetBegin(reinterpret_cast<mirror::Object*>(best_begin)); + largest_immune_region_.SetEnd(reinterpret_cast<mirror::Object*>(best_end)); +} + +void ImmuneSpaces::AddSpace(space::ContinuousSpace* space) { + DCHECK(spaces_.find(space) == spaces_.end()) << *space; + // Bind live to mark bitmap if necessary. + if (space->GetLiveBitmap() != space->GetMarkBitmap()) { + CHECK(space->IsContinuousMemMapAllocSpace()); + space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap(); + } + spaces_.insert(space); + CreateLargestImmuneRegion(); +} + +bool ImmuneSpaces::CompareByBegin::operator()(space::ContinuousSpace* a, space::ContinuousSpace* b) + const { + return a->Begin() < b->Begin(); +} + +bool ImmuneSpaces::ContainsSpace(space::ContinuousSpace* space) const { + return spaces_.find(space) != spaces_.end(); +} + +} // namespace collector +} // namespace gc +} // namespace art diff --git a/runtime/gc/collector/immune_spaces.h b/runtime/gc/collector/immune_spaces.h new file mode 100644 index 0000000000..72cb60d465 --- /dev/null +++ b/runtime/gc/collector/immune_spaces.h @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_GC_COLLECTOR_IMMUNE_SPACES_H_ +#define ART_RUNTIME_GC_COLLECTOR_IMMUNE_SPACES_H_ + +#include "base/macros.h" +#include "base/mutex.h" +#include "gc/space/space.h" +#include "immune_region.h" + +#include <set> + +namespace art { +namespace gc { +namespace space { +class ContinuousSpace; +} // namespace space + +namespace collector { + +// ImmuneSpaces is a set of spaces which are not going to have any objects become marked during the +// GC. +class ImmuneSpaces { + class CompareByBegin { + public: + bool operator()(space::ContinuousSpace* a, space::ContinuousSpace* b) const; + }; + + public: + ImmuneSpaces() {} + void Reset(); + + // Add a continuous space to the immune spaces set. + void AddSpace(space::ContinuousSpace* space) REQUIRES(Locks::heap_bitmap_lock_); + + // Returns true if an object is inside of the immune region (assumed to be marked). Only returns + // true for the largest immune region. The object can still be inside of an immune space. + ALWAYS_INLINE bool IsInImmuneRegion(const mirror::Object* obj) const { + return largest_immune_region_.ContainsObject(obj); + } + + // Return true if the spaces is contained. + bool ContainsSpace(space::ContinuousSpace* space) const; + + // Return the set of spaces in the immune region. + const std::set<space::ContinuousSpace*, CompareByBegin>& GetSpaces() { + return spaces_; + } + + // Return the associated largest immune region. + const ImmuneRegion& GetLargestImmuneRegion() const { + return largest_immune_region_; + } + + // Return true if the object is contained by any of the immune space.s + ALWAYS_INLINE bool ContainsObject(const mirror::Object* obj) const { + if (largest_immune_region_.ContainsObject(obj)) { + return true; + } + for (space::ContinuousSpace* space : spaces_) { + if (space->HasAddress(obj)) { + return true; + } + } + return false; + } + + private: + // Setup the immune region to the largest continuous set of immune spaces. The immune region is + // just the for the fast path lookup. + void CreateLargestImmuneRegion(); + + std::set<space::ContinuousSpace*, CompareByBegin> spaces_; + ImmuneRegion largest_immune_region_; +}; + +} // namespace collector +} // namespace gc +} // namespace art + +#endif // ART_RUNTIME_GC_COLLECTOR_IMMUNE_SPACES_H_ diff --git a/runtime/gc/collector/immune_spaces_test.cc b/runtime/gc/collector/immune_spaces_test.cc new file mode 100644 index 0000000000..f741117bc1 --- /dev/null +++ b/runtime/gc/collector/immune_spaces_test.cc @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common_runtime_test.h" +#include "gc/collector/immune_spaces.h" +#include "gc/space/image_space.h" +#include "gc/space/space-inl.h" +#include "oat_file.h" +#include "thread-inl.h" + +namespace art { +namespace mirror { +class Object; +} // namespace mirror +namespace gc { +namespace collector { + +class ImmuneSpacesTest : public CommonRuntimeTest {}; + +class DummySpace : public space::ContinuousSpace { + public: + DummySpace(uint8_t* begin, uint8_t* end) + : ContinuousSpace("DummySpace", + space::kGcRetentionPolicyNeverCollect, + begin, + end, + /*limit*/end) {} + + space::SpaceType GetType() const OVERRIDE { + return space::kSpaceTypeMallocSpace; + } + + bool CanMoveObjects() const OVERRIDE { + return false; + } + + accounting::ContinuousSpaceBitmap* GetLiveBitmap() const OVERRIDE { + return nullptr; + } + + accounting::ContinuousSpaceBitmap* GetMarkBitmap() const OVERRIDE { + return nullptr; + } +}; + +TEST_F(ImmuneSpacesTest, AppendBasic) { + ImmuneSpaces spaces; + uint8_t* const base = reinterpret_cast<uint8_t*>(0x1000); + DummySpace a(base, base + 45 * KB); + DummySpace b(a.Limit(), a.Limit() + 813 * KB); + { + WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); + spaces.AddSpace(&a); + spaces.AddSpace(&b); + } + EXPECT_TRUE(spaces.ContainsSpace(&a)); + EXPECT_TRUE(spaces.ContainsSpace(&b)); + EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()), a.Begin()); + EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), b.Limit()); +} + +class DummyImageSpace : public space::ImageSpace { + public: + DummyImageSpace(MemMap* map, accounting::ContinuousSpaceBitmap* live_bitmap) + : ImageSpace("DummyImageSpace", + /*image_location*/"", + map, + live_bitmap, + map->End()) {} + + // OatSize is how large the oat file is after the image. + static DummyImageSpace* Create(size_t size, size_t oat_size) { + std::string error_str; + std::unique_ptr<MemMap> map(MemMap::MapAnonymous("DummyImageSpace", + nullptr, + size, + PROT_READ | PROT_WRITE, + /*low_4gb*/true, + /*reuse*/false, + &error_str)); + if (map == nullptr) { + LOG(ERROR) << error_str; + return nullptr; + } + std::unique_ptr<accounting::ContinuousSpaceBitmap> live_bitmap( + accounting::ContinuousSpaceBitmap::Create("bitmap", map->Begin(), map->Size())); + if (live_bitmap == nullptr) { + return nullptr; + } + // Create image header. + ImageSection sections[ImageHeader::kSectionCount]; + new (map->Begin()) ImageHeader( + /*image_begin*/PointerToLowMemUInt32(map->Begin()), + /*image_size*/map->Size(), + sections, + /*image_roots*/PointerToLowMemUInt32(map->Begin()) + 1, + /*oat_checksum*/0u, + /*oat_file_begin*/PointerToLowMemUInt32(map->End()), + /*oat_data_begin*/PointerToLowMemUInt32(map->End()), + /*oat_data_end*/PointerToLowMemUInt32(map->End() + oat_size), + /*oat_file_end*/PointerToLowMemUInt32(map->End() + oat_size), + /*pointer_size*/sizeof(void*), + /*compile_pic*/false); + return new DummyImageSpace(map.release(), live_bitmap.release()); + } +}; + +TEST_F(ImmuneSpacesTest, AppendAfterImage) { + ImmuneSpaces spaces; + constexpr size_t image_size = 123 * kPageSize; + constexpr size_t image_oat_size = 321 * kPageSize; + std::unique_ptr<DummyImageSpace> image_space(DummyImageSpace::Create(image_size, image_oat_size)); + ASSERT_TRUE(image_space != nullptr); + const ImageHeader& image_header = image_space->GetImageHeader(); + EXPECT_EQ(image_header.GetImageSize(), image_size); + EXPECT_EQ(static_cast<size_t>(image_header.GetOatFileEnd() - image_header.GetOatFileBegin()), + image_oat_size); + DummySpace space(image_header.GetOatFileEnd(), image_header.GetOatFileEnd() + 813 * kPageSize); + EXPECT_NE(image_space->Limit(), space.Begin()); + { + WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); + spaces.AddSpace(image_space.get()); + spaces.AddSpace(&space); + } + EXPECT_TRUE(spaces.ContainsSpace(image_space.get())); + EXPECT_TRUE(spaces.ContainsSpace(&space)); + // CreateLargestImmuneRegion should have coalesced the two spaces since the oat code after the + // image prevents gaps. + // Check that we have a continuous region. + EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().Begin()), + image_space->Begin()); + EXPECT_EQ(reinterpret_cast<uint8_t*>(spaces.GetLargestImmuneRegion().End()), space.Limit()); +} + +} // namespace collector +} // namespace gc +} // namespace art diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc index f561764ce4..ce6467a6cf 100644 --- a/runtime/gc/collector/mark_compact.cc +++ b/runtime/gc/collector/mark_compact.cc @@ -45,7 +45,7 @@ void MarkCompact::BindBitmaps() { for (const auto& space : GetHeap()->GetContinuousSpaces()) { if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) { - CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space; + immune_spaces_.AddSpace(space); } } } @@ -115,7 +115,7 @@ void MarkCompact::InitializePhase() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); mark_stack_ = heap_->GetMarkStack(); DCHECK(mark_stack_ != nullptr); - immune_region_.Reset(); + immune_spaces_.Reset(); CHECK(space_->CanMoveObjects()) << "Attempting compact non-movable space from " << *space_; // TODO: I don't think we should need heap bitmap lock to Get the mark bitmap. ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); @@ -148,7 +148,7 @@ inline mirror::Object* MarkCompact::MarkObject(mirror::Object* obj) { // Verify all the objects have the correct forward pointer installed. obj->AssertReadBarrierPointer(); } - if (!immune_region_.ContainsObject(obj)) { + if (!immune_spaces_.IsInImmuneRegion(obj)) { if (objects_before_forwarding_->HasAddress(obj)) { if (!objects_before_forwarding_->Set(obj)) { MarkStackPush(obj); // This object was not previously marked. @@ -218,7 +218,7 @@ void MarkCompact::UpdateAndMarkModUnion() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); for (auto& space : heap_->GetContinuousSpaces()) { // If the space is immune then we need to mark the references to other spaces. - if (immune_region_.ContainsSpace(space)) { + if (immune_spaces_.ContainsSpace(space)) { accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space); if (table != nullptr) { // TODO: Improve naming. @@ -475,7 +475,7 @@ inline mirror::Object* MarkCompact::GetMarkedForwardAddress(mirror::Object* obj) } mirror::Object* MarkCompact::IsMarked(mirror::Object* object) { - if (immune_region_.ContainsObject(object)) { + if (immune_spaces_.IsInImmuneRegion(object)) { return object; } if (updating_references_) { @@ -498,7 +498,7 @@ void MarkCompact::SweepSystemWeaks() { } bool MarkCompact::ShouldSweepSpace(space::ContinuousSpace* space) const { - return space != space_ && !immune_region_.ContainsSpace(space); + return space != space_ && !immune_spaces_.ContainsSpace(space); } class MoveObjectVisitor { diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h index 8d91939057..8a12094168 100644 --- a/runtime/gc/collector/mark_compact.h +++ b/runtime/gc/collector/mark_compact.h @@ -26,7 +26,7 @@ #include "garbage_collector.h" #include "gc_root.h" #include "gc/accounting/heap_bitmap.h" -#include "immune_region.h" +#include "immune_spaces.h" #include "lock_word.h" #include "object_callbacks.h" #include "offsets.h" @@ -194,8 +194,8 @@ class MarkCompact : public GarbageCollector { accounting::ObjectStack* mark_stack_; - // Immune region, every object inside the immune region is assumed to be marked. - ImmuneRegion immune_region_; + // Every object inside the immune spaces is assumed to be marked. + ImmuneSpaces immune_spaces_; // Bump pointer space which we are collecting. space::BumpPointerSpace* space_; diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc index db516a0a87..5427f88563 100644 --- a/runtime/gc/collector/mark_sweep.cc +++ b/runtime/gc/collector/mark_sweep.cc @@ -86,7 +86,7 @@ void MarkSweep::BindBitmaps() { // Mark all of the spaces we never collect as immune. for (const auto& space : GetHeap()->GetContinuousSpaces()) { if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect) { - CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space; + immune_spaces_.AddSpace(space); } } } @@ -115,7 +115,7 @@ void MarkSweep::InitializePhase() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); mark_stack_ = heap_->GetMarkStack(); DCHECK(mark_stack_ != nullptr); - immune_region_.Reset(); + immune_spaces_.Reset(); no_reference_class_count_.StoreRelaxed(0); normal_count_.StoreRelaxed(0); class_count_.StoreRelaxed(0); @@ -268,16 +268,41 @@ void MarkSweep::MarkingPhase() { PreCleanCards(); } +class ScanObjectVisitor { + public: + explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE + : mark_sweep_(mark_sweep) {} + + void operator()(mirror::Object* obj) const + ALWAYS_INLINE + REQUIRES(Locks::heap_bitmap_lock_) + SHARED_REQUIRES(Locks::mutator_lock_) { + if (kCheckLocks) { + Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); + Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current()); + } + mark_sweep_->ScanObject(obj); + } + + private: + MarkSweep* const mark_sweep_; +}; + void MarkSweep::UpdateAndMarkModUnion() { - for (const auto& space : heap_->GetContinuousSpaces()) { - if (immune_region_.ContainsSpace(space)) { - const char* name = space->IsZygoteSpace() - ? "UpdateAndMarkZygoteModUnionTable" - : "UpdateAndMarkImageModUnionTable"; - TimingLogger::ScopedTiming t(name, GetTimings()); - accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space); - CHECK(mod_union_table != nullptr); + for (const auto& space : immune_spaces_.GetSpaces()) { + const char* name = space->IsZygoteSpace() + ? "UpdateAndMarkZygoteModUnionTable" + : "UpdateAndMarkImageModUnionTable"; + DCHECK(space->IsZygoteSpace() || space->IsImageSpace()) << *space; + TimingLogger::ScopedTiming t(name, GetTimings()); + accounting::ModUnionTable* mod_union_table = heap_->FindModUnionTableFromSpace(space); + if (mod_union_table != nullptr) { mod_union_table->UpdateAndMarkReferences(this); + } else { + // No mod-union table, scan all the live bits. This can only occur for app images. + space->GetLiveBitmap()->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()), + reinterpret_cast<uintptr_t>(space->End()), + ScanObjectVisitor(this)); } } } @@ -460,7 +485,7 @@ inline void MarkSweep::MarkObjectNonNull(mirror::Object* obj, // Verify all the objects have the correct pointer installed. obj->AssertReadBarrierPointer(); } - if (immune_region_.ContainsObject(obj)) { + if (immune_spaces_.IsInImmuneRegion(obj)) { if (kCountMarkedObjects) { ++mark_immune_count_; } @@ -501,7 +526,7 @@ inline bool MarkSweep::MarkObjectParallel(mirror::Object* obj) { // Verify all the objects have the correct pointer installed. obj->AssertReadBarrierPointer(); } - if (immune_region_.ContainsObject(obj)) { + if (immune_spaces_.IsInImmuneRegion(obj)) { DCHECK(IsMarked(obj) != nullptr); return false; } @@ -606,26 +631,6 @@ void MarkSweep::MarkConcurrentRoots(VisitRootFlags flags) { this, static_cast<VisitRootFlags>(flags | kVisitRootFlagNonMoving)); } -class ScanObjectVisitor { - public: - explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE - : mark_sweep_(mark_sweep) {} - - void operator()(mirror::Object* obj) const - ALWAYS_INLINE - REQUIRES(Locks::heap_bitmap_lock_) - SHARED_REQUIRES(Locks::mutator_lock_) { - if (kCheckLocks) { - Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); - Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current()); - } - mark_sweep_->ScanObject(obj); - } - - private: - MarkSweep* const mark_sweep_; -}; - class DelayReferenceReferentVisitor { public: explicit DelayReferenceReferentVisitor(MarkSweep* collector) : collector_(collector) {} @@ -1193,7 +1198,8 @@ void MarkSweep::SweepArray(accounting::ObjectStack* allocations, bool swap_bitma std::vector<space::ContinuousSpace*> sweep_spaces; space::ContinuousSpace* non_moving_space = nullptr; for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) { - if (space->IsAllocSpace() && !immune_region_.ContainsSpace(space) && + if (space->IsAllocSpace() && + !immune_spaces_.ContainsSpace(space) && space->GetLiveBitmap() != nullptr) { if (space == heap_->GetNonMovingSpace()) { non_moving_space = space; @@ -1422,7 +1428,7 @@ void MarkSweep::ProcessMarkStack(bool paused) { } inline mirror::Object* MarkSweep::IsMarked(mirror::Object* object) { - if (immune_region_.ContainsObject(object)) { + if (immune_spaces_.IsInImmuneRegion(object)) { return object; } if (current_space_bitmap_->HasAddress(object)) { diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h index 8f7df78d53..245f96bdb3 100644 --- a/runtime/gc/collector/mark_sweep.h +++ b/runtime/gc/collector/mark_sweep.h @@ -26,7 +26,7 @@ #include "garbage_collector.h" #include "gc_root.h" #include "gc/accounting/heap_bitmap.h" -#include "immune_region.h" +#include "immune_spaces.h" #include "object_callbacks.h" #include "offsets.h" @@ -314,8 +314,9 @@ class MarkSweep : public GarbageCollector { accounting::ObjectStack* mark_stack_; - // Immune region, every object inside the immune range is assumed to be marked. - ImmuneRegion immune_region_; + // Every object inside the immune spaces is assumed to be marked. Immune spaces that aren't in the + // immune region are handled by the normal marking logic. + ImmuneSpaces immune_spaces_; // Parallel finger. AtomicInteger atomic_finger_; diff --git a/runtime/gc/collector/partial_mark_sweep.cc b/runtime/gc/collector/partial_mark_sweep.cc index 15f782aea8..984779484e 100644 --- a/runtime/gc/collector/partial_mark_sweep.cc +++ b/runtime/gc/collector/partial_mark_sweep.cc @@ -39,7 +39,7 @@ void PartialMarkSweep::BindBitmaps() { for (const auto& space : GetHeap()->GetContinuousSpaces()) { if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) { CHECK(space->IsZygoteSpace()); - CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space; + immune_spaces_.AddSpace(space); } } } diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h index 06d20f583a..12cf3dbf98 100644 --- a/runtime/gc/collector/semi_space-inl.h +++ b/runtime/gc/collector/semi_space-inl.h @@ -74,7 +74,7 @@ inline void SemiSpace::MarkObject( MarkStackPush(forward_address); } obj_ptr->Assign(forward_address); - } else if (!collect_from_space_only_ && !immune_region_.ContainsObject(obj)) { + } else if (!collect_from_space_only_ && !immune_spaces_.IsInImmuneRegion(obj)) { BitmapSetSlowPathVisitor visitor(this); if (!mark_bitmap_->Set(obj, visitor)) { // This object was not previously marked. diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc index 7f57f30b27..e9497a2223 100644 --- a/runtime/gc/collector/semi_space.cc +++ b/runtime/gc/collector/semi_space.cc @@ -66,8 +66,9 @@ void SemiSpace::BindBitmaps() { for (const auto& space : GetHeap()->GetContinuousSpaces()) { if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) { - CHECK(immune_region_.AddContinuousSpace(space)) << "Failed to add space " << *space; + immune_spaces_.AddSpace(space); } else if (space->GetLiveBitmap() != nullptr) { + // TODO: We can probably also add this space to the immune region. if (space == to_space_ || collect_from_space_only_) { if (collect_from_space_only_) { // Bind the bitmaps of the main free list space and the non-moving space we are doing a @@ -144,7 +145,7 @@ void SemiSpace::InitializePhase() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); mark_stack_ = heap_->GetMarkStack(); DCHECK(mark_stack_ != nullptr); - immune_region_.Reset(); + immune_spaces_.Reset(); is_large_object_space_immune_ = false; saved_bytes_ = 0; bytes_moved_ = 0; @@ -376,7 +377,13 @@ void SemiSpace::MarkReachableObjects() { << "generational_=" << generational_ << " " << "collect_from_space_only_=" << collect_from_space_only_; accounting::RememberedSet* rem_set = GetHeap()->FindRememberedSetFromSpace(space); - CHECK_EQ(rem_set != nullptr, kUseRememberedSet); + if (kUseRememberedSet) { + // App images currently do not have remembered sets. + DCHECK((space->IsImageSpace() && space != heap_->GetBootImageSpace()) || + rem_set != nullptr); + } else { + DCHECK(rem_set == nullptr); + } if (rem_set != nullptr) { TimingLogger::ScopedTiming t2("UpdateAndMarkRememberedSet", GetTimings()); rem_set->UpdateAndMarkReferences(from_space_, this); @@ -767,7 +774,8 @@ mirror::Object* SemiSpace::IsMarked(mirror::Object* obj) { if (from_space_->HasAddress(obj)) { // Returns either the forwarding address or null. return GetForwardingAddressInFromSpace(obj); - } else if (collect_from_space_only_ || immune_region_.ContainsObject(obj) || + } else if (collect_from_space_only_ || + immune_spaces_.IsInImmuneRegion(obj) || to_space_->HasAddress(obj)) { return obj; // Already forwarded, must be marked. } diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h index b9246ca2fc..a905904115 100644 --- a/runtime/gc/collector/semi_space.h +++ b/runtime/gc/collector/semi_space.h @@ -25,7 +25,7 @@ #include "garbage_collector.h" #include "gc_root.h" #include "gc/accounting/heap_bitmap.h" -#include "immune_region.h" +#include "immune_spaces.h" #include "mirror/object_reference.h" #include "object_callbacks.h" #include "offsets.h" @@ -201,8 +201,8 @@ class SemiSpace : public GarbageCollector { // object. accounting::ObjectStack* mark_stack_; - // Immune region, every object inside the immune region is assumed to be marked. - ImmuneRegion immune_region_; + // Every object inside the immune spaces is assumed to be marked. + ImmuneSpaces immune_spaces_; // If true, the large object space is immune. bool is_large_object_space_immune_; diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc index 77f606ddce..e754a52e7e 100644 --- a/runtime/gc/space/dlmalloc_space.cc +++ b/runtime/gc/space/dlmalloc_space.cc @@ -20,6 +20,8 @@ #include "gc/accounting/card_table.h" #include "gc/accounting/space_bitmap-inl.h" #include "gc/heap.h" +#include "jit/jit.h" +#include "jit/jit_code_cache.h" #include "memory_tool_malloc_space-inl.h" #include "mirror/class-inl.h" #include "mirror/object-inl.h" @@ -318,10 +320,17 @@ namespace allocator { // Implement the dlmalloc morecore callback. void* ArtDlMallocMoreCore(void* mspace, intptr_t increment) { - Heap* heap = Runtime::Current()->GetHeap(); + Runtime* runtime = Runtime::Current(); + Heap* heap = runtime->GetHeap(); ::art::gc::space::DlMallocSpace* dlmalloc_space = heap->GetDlMallocSpace(); // Support for multiple DlMalloc provided by a slow path. if (UNLIKELY(dlmalloc_space == nullptr || dlmalloc_space->GetMspace() != mspace)) { + if (LIKELY(runtime->GetJit() != nullptr)) { + jit::JitCodeCache* code_cache = runtime->GetJit()->GetCodeCache(); + if (code_cache->OwnsSpace(mspace)) { + return code_cache->MoreCore(mspace, increment); + } + } dlmalloc_space = nullptr; for (space::ContinuousSpace* space : heap->GetContinuousSpaces()) { if (space->IsDlMallocSpace()) { diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc index 1fe9a03159..e2b2431054 100644 --- a/runtime/gc/space/image_space.cc +++ b/runtime/gc/space/image_space.cc @@ -58,10 +58,7 @@ static int32_t ChooseRelocationOffsetDelta(int32_t min_delta, int32_t max_delta) CHECK_ALIGNED(max_delta, kPageSize); CHECK_LT(min_delta, max_delta); - std::default_random_engine generator; - generator.seed(NanoTime() * getpid()); - std::uniform_int_distribution<int32_t> distribution(min_delta, max_delta); - int32_t r = distribution(generator); + int32_t r = GetRandomNumber<int32_t>(min_delta, max_delta); if (r % 2 == 0) { r = RoundUp(r, kPageSize); } else { diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h index 99207426a0..babd672cee 100644 --- a/runtime/gc/space/image_space.h +++ b/runtime/gc/space/image_space.h @@ -119,7 +119,22 @@ class ImageSpace : public MemMapSpace { bool* has_data, bool *is_global_cache); - private: + // Return the end of the image which includes non-heap objects such as ArtMethods and ArtFields. + uint8_t* GetImageEnd() const { + return Begin() + GetImageHeader().GetImageSize(); + } + + // Return the start of the associated oat file. + uint8_t* GetOatFileBegin() const { + return GetImageHeader().GetOatFileBegin(); + } + + // Return the end of the associated oat file. + uint8_t* GetOatFileEnd() const { + return GetImageHeader().GetOatFileEnd(); + } + + protected: // Tries to initialize an ImageSpace from the given image path, // returning null on error. // @@ -157,6 +172,7 @@ class ImageSpace : public MemMapSpace { const std::string image_location_; + private: DISALLOW_COPY_AND_ASSIGN(ImageSpace); }; diff --git a/runtime/image.h b/runtime/image.h index 20e4159b09..555cf5ddb7 100644 --- a/runtime/image.h +++ b/runtime/image.h @@ -84,7 +84,7 @@ class PACKED(4) ImageHeader { image_roots_(0U), pointer_size_(0U), compile_pic_(0) {} ImageHeader(uint32_t image_begin, - uint32_t image_size_, + uint32_t image_size, ImageSection* sections, uint32_t image_roots, uint32_t oat_checksum, @@ -93,7 +93,7 @@ class PACKED(4) ImageHeader { uint32_t oat_data_end, uint32_t oat_file_end, uint32_t pointer_size, - bool compile_pic_); + bool compile_pic); bool IsValid() const; const char* GetMagic() const; diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index bf95a0e46f..c9831e67aa 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -66,6 +66,11 @@ namespace interpreter { } \ } while (false) +#define BACKWARD_BRANCH_INSTRUMENTATION(offset) \ + do { \ + instrumentation->BackwardBranch(self, shadow_frame.GetMethod(), offset); \ + } while (false) + static bool IsExperimentalInstructionEnabled(const Instruction *inst) { DCHECK(inst->IsExperimental()); return Runtime::Current()->AreExperimentalFlagsEnabled(ExperimentalFlags::kLambdas); @@ -542,6 +547,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, PREAMBLE(); int8_t offset = inst->VRegA_10t(inst_data); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -551,6 +557,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, PREAMBLE(); int16_t offset = inst->VRegA_20t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -560,6 +567,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, PREAMBLE(); int32_t offset = inst->VRegA_30t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -569,6 +577,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, PREAMBLE(); int32_t offset = DoPackedSwitch(inst, shadow_frame, inst_data); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -578,6 +587,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, PREAMBLE(); int32_t offset = DoSparseSwitch(inst, shadow_frame, inst_data); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -681,6 +691,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) { int16_t offset = inst->VRegC_22t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -695,6 +706,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) { int16_t offset = inst->VRegC_22t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -709,6 +721,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) { int16_t offset = inst->VRegC_22t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -723,6 +736,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) { int16_t offset = inst->VRegC_22t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -737,6 +751,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) { int16_t offset = inst->VRegC_22t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -751,6 +766,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, shadow_frame.GetVReg(inst->VRegB_22t(inst_data))) { int16_t offset = inst->VRegC_22t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -764,6 +780,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) == 0) { int16_t offset = inst->VRegB_21t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -777,6 +794,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) != 0) { int16_t offset = inst->VRegB_21t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -790,6 +808,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) < 0) { int16_t offset = inst->VRegB_21t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -803,6 +822,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) >= 0) { int16_t offset = inst->VRegB_21t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -816,6 +836,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) > 0) { int16_t offset = inst->VRegB_21t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); @@ -829,6 +850,7 @@ JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, if (shadow_frame.GetVReg(inst->VRegA_21t(inst_data)) <= 0) { int16_t offset = inst->VRegB_21t(); if (IsBackwardBranch(offset)) { + BACKWARD_BRANCH_INSTRUMENTATION(offset); self->AllowThreadSuspension(); } inst = inst->RelativeAt(offset); diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc index 7cc05f7cd4..58da7f285a 100644 --- a/runtime/java_vm_ext.cc +++ b/runtime/java_vm_ext.cc @@ -736,7 +736,7 @@ bool JavaVMExt::LoadNativeLibrary(JNIEnv* env, const std::string& path, jobject // it's okay to decode it without worrying about unexpectedly marking it alive. mirror::ClassLoader* loader = soa.Decode<mirror::ClassLoader*>(class_loader); class_loader_allocator = - Runtime::Current()->GetClassLinker()->GetAllocatorForClassLoader(loader); + Runtime::Current()->GetClassLinker()->GetOrCreateAllocatorForClassLoader(loader); CHECK(class_loader_allocator != nullptr); } if (library != nullptr) { diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc index f69115159f..27a0e2d1af 100644 --- a/runtime/jit/jit.cc +++ b/runtime/jit/jit.cc @@ -24,6 +24,8 @@ #include "interpreter/interpreter.h" #include "jit_code_cache.h" #include "jit_instrumentation.h" +#include "oat_file_manager.h" +#include "offline_profiling_info.h" #include "runtime.h" #include "runtime_options.h" #include "utils.h" @@ -34,14 +36,18 @@ namespace jit { JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) { auto* jit_options = new JitOptions; jit_options->use_jit_ = options.GetOrDefault(RuntimeArgumentMap::UseJIT); - jit_options->code_cache_capacity_ = - options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheCapacity); + jit_options->code_cache_initial_capacity_ = + options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheInitialCapacity); + jit_options->code_cache_max_capacity_ = + options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheMaxCapacity); jit_options->compile_threshold_ = options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold); jit_options->warmup_threshold_ = options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold); jit_options->dump_info_on_shutdown_ = options.Exists(RuntimeArgumentMap::DumpJITInfoOnShutdown); + jit_options->save_profiling_info_ = + options.GetOrDefault(RuntimeArgumentMap::JITSaveProfilingInfo);; return jit_options; } @@ -69,13 +75,19 @@ Jit* Jit::Create(JitOptions* options, std::string* error_msg) { if (!jit->LoadCompiler(error_msg)) { return nullptr; } - jit->code_cache_.reset(JitCodeCache::Create(options->GetCodeCacheCapacity(), error_msg)); + jit->code_cache_.reset(JitCodeCache::Create( + options->GetCodeCacheInitialCapacity(), options->GetCodeCacheMaxCapacity(), error_msg)); if (jit->GetCodeCache() == nullptr) { return nullptr; } - LOG(INFO) << "JIT created with code_cache_capacity=" - << PrettySize(options->GetCodeCacheCapacity()) - << " compile_threshold=" << options->GetCompileThreshold(); + jit->offline_profile_info_.reset(nullptr); + if (options->GetSaveProfilingInfo()) { + jit->offline_profile_info_.reset(new OfflineProfilingInfo()); + } + LOG(INFO) << "JIT created with initial_capacity=" + << PrettySize(options->GetCodeCacheInitialCapacity()) + << ", max_capacity=" << PrettySize(options->GetCodeCacheMaxCapacity()) + << ", compile_threshold=" << options->GetCompileThreshold(); return jit.release(); } @@ -148,6 +160,33 @@ void Jit::DeleteThreadPool() { } } +void Jit::SaveProfilingInfo(const std::string& filename) { + if (offline_profile_info_ == nullptr) { + return; + } + // Note that we can't check the PrimaryOatFile when constructing the offline_profilie_info_ + // because it becomes known to the Runtime after we create and initialize the JIT. + const OatFile* primary_oat_file = Runtime::Current()->GetOatFileManager().GetPrimaryOatFile(); + if (primary_oat_file == nullptr) { + LOG(WARNING) << "Couldn't find a primary oat file when trying to save profile info to " + << filename; + return; + } + + uint64_t last_update_ns = code_cache_->GetLastUpdateTimeNs(); + if (offline_profile_info_->NeedsSaving(last_update_ns)) { + VLOG(profiler) << "Iniate save profiling information to: " << filename; + std::set<ArtMethod*> methods; + { + ScopedObjectAccess soa(Thread::Current()); + code_cache_->GetCompiledArtMethods(primary_oat_file, methods); + } + offline_profile_info_->SaveProfilingInfo(filename, last_update_ns, methods); + } else { + VLOG(profiler) << "No need to save profiling information to: " << filename; + } +} + Jit::~Jit() { if (dump_info_on_shutdown_) { DumpInfo(LOG(INFO)); diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h index 1f89f9b1b7..630eba34af 100644 --- a/runtime/jit/jit.h +++ b/runtime/jit/jit.h @@ -26,6 +26,7 @@ #include "gc_root.h" #include "jni.h" #include "object_callbacks.h" +#include "offline_profiling_info.h" #include "thread_pool.h" namespace art { @@ -71,6 +72,8 @@ class Jit { return instrumentation_cache_.get(); } + void SaveProfilingInfo(const std::string& filename); + private: Jit(); bool LoadCompiler(std::string* error_msg); @@ -90,6 +93,7 @@ class Jit { std::unique_ptr<jit::JitCodeCache> code_cache_; CompilerCallbacks* compiler_callbacks_; // Owned by the jit compiler. + std::unique_ptr<OfflineProfilingInfo> offline_profile_info_; DISALLOW_COPY_AND_ASSIGN(Jit); }; @@ -102,28 +106,44 @@ class JitOptions { size_t GetWarmupThreshold() const { return warmup_threshold_; } - size_t GetCodeCacheCapacity() const { - return code_cache_capacity_; + size_t GetCodeCacheInitialCapacity() const { + return code_cache_initial_capacity_; + } + size_t GetCodeCacheMaxCapacity() const { + return code_cache_max_capacity_; } bool DumpJitInfoOnShutdown() const { return dump_info_on_shutdown_; } + bool GetSaveProfilingInfo() const { + return save_profiling_info_; + } bool UseJIT() const { return use_jit_; } void SetUseJIT(bool b) { use_jit_ = b; } + void SetSaveProfilingInfo(bool b) { + save_profiling_info_ = b; + } private: bool use_jit_; - size_t code_cache_capacity_; + size_t code_cache_initial_capacity_; + size_t code_cache_max_capacity_; size_t compile_threshold_; size_t warmup_threshold_; bool dump_info_on_shutdown_; - - JitOptions() : use_jit_(false), code_cache_capacity_(0), compile_threshold_(0), - dump_info_on_shutdown_(false) { } + bool save_profiling_info_; + + JitOptions() + : use_jit_(false), + code_cache_initial_capacity_(0), + code_cache_max_capacity_(0), + compile_threshold_(0), + dump_info_on_shutdown_(false), + save_profiling_info_(false) { } DISALLOW_COPY_AND_ASSIGN(JitOptions); }; diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index a291a09430..804d69fbf8 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -19,6 +19,7 @@ #include <sstream> #include "art_method-inl.h" +#include "base/time_utils.h" #include "entrypoints/runtime_asm_entrypoints.h" #include "gc/accounting/bitmap-inl.h" #include "jit/profiling_info.h" @@ -44,73 +45,90 @@ static constexpr int kProtCode = PROT_READ | PROT_EXEC; } \ } while (false) \ -JitCodeCache* JitCodeCache::Create(size_t capacity, std::string* error_msg) { - CHECK_GT(capacity, 0U); - CHECK_LT(capacity, kMaxCapacity); +JitCodeCache* JitCodeCache::Create(size_t initial_capacity, + size_t max_capacity, + std::string* error_msg) { + CHECK_GE(max_capacity, initial_capacity); + // We need to have 32 bit offsets from method headers in code cache which point to things + // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work. + // Ensure we're below 1 GB to be safe. + if (max_capacity > 1 * GB) { + std::ostringstream oss; + oss << "Maxium code cache capacity is limited to 1 GB, " + << PrettySize(max_capacity) << " is too big"; + *error_msg = oss.str(); + return nullptr; + } + std::string error_str; // Map name specific for android_os_Debug.cpp accounting. MemMap* data_map = MemMap::MapAnonymous( - "data-code-cache", nullptr, capacity, kProtAll, false, false, &error_str); + "data-code-cache", nullptr, max_capacity, kProtAll, false, false, &error_str); if (data_map == nullptr) { std::ostringstream oss; - oss << "Failed to create read write execute cache: " << error_str << " size=" << capacity; + oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity; *error_msg = oss.str(); return nullptr; } + // Align both capacities to page size, as that's the unit mspaces use. + initial_capacity = RoundDown(initial_capacity, 2 * kPageSize); + max_capacity = RoundDown(max_capacity, 2 * kPageSize); + // Data cache is 1 / 2 of the map. // TODO: Make this variable? - size_t data_size = RoundUp(data_map->Size() / 2, kPageSize); - size_t code_size = data_map->Size() - data_size; + size_t data_size = max_capacity / 2; + size_t code_size = max_capacity - data_size; + DCHECK_EQ(code_size + data_size, max_capacity); uint8_t* divider = data_map->Begin() + data_size; - // We need to have 32 bit offsets from method headers in code cache which point to things - // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work. MemMap* code_map = data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str); if (code_map == nullptr) { std::ostringstream oss; - oss << "Failed to create read write execute cache: " << error_str << " size=" << capacity; + oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity; *error_msg = oss.str(); return nullptr; } - DCHECK_EQ(code_map->Size(), code_size); DCHECK_EQ(code_map->Begin(), divider); - return new JitCodeCache(code_map, data_map); + data_size = initial_capacity / 2; + code_size = initial_capacity - data_size; + DCHECK_EQ(code_size + data_size, initial_capacity); + return new JitCodeCache(code_map, data_map, code_size, data_size, max_capacity); } -JitCodeCache::JitCodeCache(MemMap* code_map, MemMap* data_map) +JitCodeCache::JitCodeCache(MemMap* code_map, + MemMap* data_map, + size_t initial_code_capacity, + size_t initial_data_capacity, + size_t max_capacity) : lock_("Jit code cache", kJitCodeCacheLock), lock_cond_("Jit code cache variable", lock_), collection_in_progress_(false), code_map_(code_map), - data_map_(data_map) { + data_map_(data_map), + max_capacity_(max_capacity), + current_capacity_(initial_code_capacity + initial_data_capacity), + code_end_(initial_code_capacity), + data_end_(initial_data_capacity), + has_done_one_collection_(false), + last_update_time_ns_(0) { - code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_map_->Size(), false /*locked*/); - data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_map_->Size(), false /*locked*/); + code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/); + data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/); if (code_mspace_ == nullptr || data_mspace_ == nullptr) { PLOG(FATAL) << "create_mspace_with_base failed"; } - // Prevent morecore requests from the mspace. - mspace_set_footprint_limit(code_mspace_, code_map_->Size()); - mspace_set_footprint_limit(data_mspace_, data_map_->Size()); + SetFootprintLimit(current_capacity_); CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtCode); CHECKED_MPROTECT(data_map_->Begin(), data_map_->Size(), kProtData); - live_bitmap_.reset(CodeCacheBitmap::Create("code-cache-bitmap", - reinterpret_cast<uintptr_t>(code_map_->Begin()), - reinterpret_cast<uintptr_t>(code_map_->End()))); - - if (live_bitmap_.get() == nullptr) { - PLOG(FATAL) << "creating bitmaps for the JIT code cache failed"; - } - - VLOG(jit) << "Created jit code cache: data size=" - << PrettySize(data_map_->Size()) - << ", code size=" - << PrettySize(code_map_->Size()); + VLOG(jit) << "Created jit code cache: initial data size=" + << PrettySize(initial_data_capacity) + << ", initial code size=" + << PrettySize(initial_code_capacity); } bool JitCodeCache::ContainsPc(const void* ptr) const { @@ -298,6 +316,7 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self, // code. GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr)); } + last_update_time_ns_ = NanoTime(); VLOG(jit) << "JIT added " << PrettyMethod(method) << "@" << method @@ -433,13 +452,48 @@ class MarkCodeClosure FINAL : public Closure { Barrier* const barrier_; }; -void JitCodeCache::GarbageCollectCache(Thread* self) { +void JitCodeCache::NotifyCollectionDone(Thread* self) { + collection_in_progress_ = false; + lock_cond_.Broadcast(self); +} + +void JitCodeCache::SetFootprintLimit(size_t new_footprint) { + size_t per_space_footprint = new_footprint / 2; + DCHECK(IsAlignedParam(per_space_footprint, kPageSize)); + DCHECK_EQ(per_space_footprint * 2, new_footprint); + mspace_set_footprint_limit(data_mspace_, per_space_footprint); + { + ScopedCodeCacheWrite scc(code_map_.get()); + mspace_set_footprint_limit(code_mspace_, per_space_footprint); + } +} + +bool JitCodeCache::IncreaseCodeCacheCapacity() { + if (current_capacity_ == max_capacity_) { + return false; + } + + // Double the capacity if we're below 1MB, or increase it by 1MB if + // we're above. + if (current_capacity_ < 1 * MB) { + current_capacity_ *= 2; + } else { + current_capacity_ += 1 * MB; + } + if (current_capacity_ > max_capacity_) { + current_capacity_ = max_capacity_; + } + if (!kIsDebugBuild || VLOG_IS_ON(jit)) { - LOG(INFO) << "Clearing code cache, code=" - << PrettySize(CodeCacheSize()) - << ", data=" << PrettySize(DataCacheSize()); + LOG(INFO) << "Increasing code cache capacity to " << PrettySize(current_capacity_); } + SetFootprintLimit(current_capacity_); + + return true; +} + +void JitCodeCache::GarbageCollectCache(Thread* self) { instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); // Wait for an existing collection, or let everyone know we are starting one. @@ -452,6 +506,28 @@ void JitCodeCache::GarbageCollectCache(Thread* self) { collection_in_progress_ = true; } } + + // Check if we just need to grow the capacity. If we don't, allocate the bitmap while + // we hold the lock. + { + MutexLock mu(self, lock_); + if (has_done_one_collection_ && IncreaseCodeCacheCapacity()) { + has_done_one_collection_ = false; + NotifyCollectionDone(self); + return; + } else { + live_bitmap_.reset(CodeCacheBitmap::Create( + "code-cache-bitmap", + reinterpret_cast<uintptr_t>(code_map_->Begin()), + reinterpret_cast<uintptr_t>(code_map_->Begin() + current_capacity_ / 2))); + } + } + + if (!kIsDebugBuild || VLOG_IS_ON(jit)) { + LOG(INFO) << "Clearing code cache, code=" + << PrettySize(CodeCacheSize()) + << ", data=" << PrettySize(DataCacheSize()); + } // Walk over all compiled methods and set the entry points of these // methods to interpreter. { @@ -500,7 +576,6 @@ void JitCodeCache::GarbageCollectCache(Thread* self) { } } } - GetLiveBitmap()->Bitmap::Clear(); // Free all profiling info. for (ProfilingInfo* info : profiling_infos_) { @@ -509,8 +584,9 @@ void JitCodeCache::GarbageCollectCache(Thread* self) { } profiling_infos_.clear(); - collection_in_progress_ = false; - lock_cond_.Broadcast(self); + live_bitmap_.reset(nullptr); + has_done_one_collection_ = true; + NotifyCollectionDone(self); } if (!kIsDebugBuild || VLOG_IS_ON(jit)) { @@ -589,5 +665,34 @@ ProfilingInfo* JitCodeCache::AddProfilingInfoInternal(Thread* self, return info; } +// NO_THREAD_SAFETY_ANALYSIS as this is called from mspace code, at which point the lock +// is already held. +void* JitCodeCache::MoreCore(const void* mspace, intptr_t increment) NO_THREAD_SAFETY_ANALYSIS { + if (code_mspace_ == mspace) { + size_t result = code_end_; + code_end_ += increment; + return reinterpret_cast<void*>(result + code_map_->Begin()); + } else { + DCHECK_EQ(data_mspace_, mspace); + size_t result = data_end_; + data_end_ += increment; + return reinterpret_cast<void*>(result + data_map_->Begin()); + } +} + +void JitCodeCache::GetCompiledArtMethods(const OatFile* oat_file, + std::set<ArtMethod*>& methods) { + MutexLock mu(Thread::Current(), lock_); + for (auto it : method_code_map_) { + if (it.second->GetDexFile()->GetOatDexFile()->GetOatFile() == oat_file) { + methods.insert(it.second); + } + } +} + +uint64_t JitCodeCache::GetLastUpdateTimeNs() { + MutexLock mu(Thread::Current(), lock_); + return last_update_time_ns_; +} } // namespace jit } // namespace art diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h index 131446c484..acd7c62940 100644 --- a/runtime/jit/jit_code_cache.h +++ b/runtime/jit/jit_code_cache.h @@ -41,20 +41,20 @@ namespace jit { class JitInstrumentationCache; -// Alignment that will suit all architectures. +// Alignment in bits that will suit all architectures. static constexpr int kJitCodeAlignment = 16; using CodeCacheBitmap = gc::accounting::MemoryRangeBitmap<kJitCodeAlignment>; class JitCodeCache { public: - static constexpr size_t kMaxCapacity = 1 * GB; + static constexpr size_t kMaxCapacity = 64 * MB; // Put the default to a very low amount for debug builds to stress the code cache // collection. - static constexpr size_t kDefaultCapacity = kIsDebugBuild ? 20 * KB : 2 * MB; + static constexpr size_t kInitialCapacity = kIsDebugBuild ? 16 * KB : 64 * KB; // Create the code cache with a code + data capacity equal to "capacity", error message is passed // in the out arg error_msg. - static JitCodeCache* Create(size_t capacity, std::string* error_msg); + static JitCodeCache* Create(size_t initial_capacity, size_t max_capacity, std::string* error_msg); // Number of bytes allocated in the code cache. size_t CodeCacheSize() REQUIRES(!lock_); @@ -133,9 +133,26 @@ class JitCodeCache { REQUIRES(!lock_) SHARED_REQUIRES(Locks::mutator_lock_); + bool OwnsSpace(const void* mspace) const NO_THREAD_SAFETY_ANALYSIS { + return mspace == code_mspace_ || mspace == data_mspace_; + } + + void* MoreCore(const void* mspace, intptr_t increment); + + // Adds to `methods` all the compiled ArtMethods which are part of the given `oat_file`. + void GetCompiledArtMethods(const OatFile* oat_file, std::set<ArtMethod*>& methods) + REQUIRES(!lock_) + SHARED_REQUIRES(Locks::mutator_lock_); + + uint64_t GetLastUpdateTimeNs() REQUIRES(!lock_); + private: - // Take ownership of code_mem_map. - JitCodeCache(MemMap* code_map, MemMap* data_map); + // Take ownership of maps. + JitCodeCache(MemMap* code_map, + MemMap* data_map, + size_t initial_code_capacity, + size_t initial_data_capacity, + size_t max_capacity); // Internal version of 'CommitCode' that will not retry if the // allocation fails. Return null if the allocation fails. @@ -172,6 +189,16 @@ class JitCodeCache { // Number of bytes allocated in the data cache. size_t DataCacheSizeLocked() REQUIRES(lock_); + // Notify all waiting threads that a collection is done. + void NotifyCollectionDone(Thread* self) REQUIRES(lock_); + + // Try to increase the current capacity of the code cache. Return whether we + // succeeded at doing so. + bool IncreaseCodeCacheCapacity() REQUIRES(lock_); + + // Set the footprint limit of the code cache. + void SetFootprintLimit(size_t new_footprint) REQUIRES(lock_); + // Lock for guarding allocations, collections, and the method_code_map_. Mutex lock_; // Condition to wait on during collection. @@ -193,6 +220,24 @@ class JitCodeCache { // ProfilingInfo objects we have allocated. std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_); + // The maximum capacity in bytes this code cache can go to. + size_t max_capacity_ GUARDED_BY(lock_); + + // The current capacity in bytes of the code cache. + size_t current_capacity_ GUARDED_BY(lock_); + + // The current footprint in bytes of the code portion of the code cache. + size_t code_end_ GUARDED_BY(lock_); + + // The current footprint in bytes of the data portion of the code cache. + size_t data_end_ GUARDED_BY(lock_); + + // Whether a collection has already been done on the current capacity. + bool has_done_one_collection_ GUARDED_BY(lock_); + + // Last time the the code_cache was updated. + uint64_t last_update_time_ns_ GUARDED_BY(lock_); + DISALLOW_IMPLICIT_CONSTRUCTORS(JitCodeCache); }; diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc new file mode 100644 index 0000000000..4450653a90 --- /dev/null +++ b/runtime/jit/offline_profiling_info.cc @@ -0,0 +1,171 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "offline_profiling_info.h" + +#include <fstream> +#include <set> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/uio.h> + +#include "art_method-inl.h" +#include "base/mutex.h" +#include "jit/profiling_info.h" +#include "safe_map.h" +#include "utils.h" + +namespace art { + +// An arbitrary value to throttle save requests. Set to 500ms for now. +static constexpr const uint64_t kMilisecondsToNano = 1000000; +static constexpr const uint64_t kMinimumTimeBetweenSavesNs = 500 * kMilisecondsToNano; + +bool OfflineProfilingInfo::NeedsSaving(uint64_t last_update_time_ns) const { + return last_update_time_ns - last_update_time_ns_.LoadRelaxed() > kMinimumTimeBetweenSavesNs; +} + +void OfflineProfilingInfo::SaveProfilingInfo(const std::string& filename, + uint64_t last_update_time_ns, + const std::set<ArtMethod*>& methods) { + if (!NeedsSaving(last_update_time_ns)) { + VLOG(profiler) << "No need to saved profile info to " << filename; + return; + } + + if (methods.empty()) { + VLOG(profiler) << "No info to save to " << filename; + return; + } + + DexFileToMethodsMap info; + { + ScopedObjectAccess soa(Thread::Current()); + for (auto it = methods.begin(); it != methods.end(); it++) { + AddMethodInfo(*it, &info); + } + } + + // This doesn't need locking because we are trying to lock the file for exclusive + // access and fail immediately if we can't. + if (Serialize(filename, info)) { + last_update_time_ns_.StoreRelaxed(last_update_time_ns); + VLOG(profiler) << "Successfully saved profile info to " + << filename << " with time stamp: " << last_update_time_ns; + } +} + + +void OfflineProfilingInfo::AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info) { + DCHECK(method != nullptr); + const DexFile* dex_file = method->GetDexFile(); + + auto info_it = info->find(dex_file); + if (info_it == info->end()) { + info_it = info->Put(dex_file, std::set<uint32_t>()); + } + info_it->second.insert(method->GetDexMethodIndex()); +} + +static int OpenOrCreateFile(const std::string& filename) { + // TODO(calin) allow the shared uid of the app to access the file. + int fd = open(filename.c_str(), + O_CREAT | O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC, + S_IRUSR | S_IWUSR); + if (fd < 0) { + PLOG(WARNING) << "Failed to open profile file " << filename; + return -1; + } + + // Lock the file for exclusive access but don't wait if we can't lock it. + int err = flock(fd, LOCK_EX | LOCK_NB); + if (err < 0) { + PLOG(WARNING) << "Failed to lock profile file " << filename; + return -1; + } + + return fd; +} + +static bool CloseDescriptorForFile(int fd, const std::string& filename) { + // Now unlock the file, allowing another process in. + int err = flock(fd, LOCK_UN); + if (err < 0) { + PLOG(WARNING) << "Failed to unlock profile file " << filename; + return false; + } + + // Done, close the file. + err = ::close(fd); + if (err < 0) { + PLOG(WARNING) << "Failed to close descriptor for profile file" << filename; + return false; + } + + return true; +} + +static void WriteToFile(int fd, const std::ostringstream& os) { + std::string data(os.str()); + const char *p = data.c_str(); + size_t length = data.length(); + do { + int n = ::write(fd, p, length); + p += n; + length -= n; + } while (length > 0); +} + +static constexpr char kFieldSeparator = ','; +static constexpr char kLineSeparator = '\n'; + +/** + * Serialization format: + * multidex_suffix1,dex_location_checksum1,method_id11,method_id12... + * multidex_suffix2,dex_location_checksum2,method_id21,method_id22... + * e.g. + * ,131232145,11,23,454,54 -> this is the first dex file, it has no multidex suffix + * :classes5.dex,218490184,39,13,49,1 -> this is the fifth dex file. + **/ +bool OfflineProfilingInfo::Serialize(const std::string& filename, + const DexFileToMethodsMap& info) const { + int fd = OpenOrCreateFile(filename); + if (fd == -1) { + return false; + } + + // TODO(calin): Merge with a previous existing profile. + // TODO(calin): Profile this and see how much memory it takes. If too much, + // write to file directly. + std::ostringstream os; + for (auto it : info) { + const DexFile* dex_file = it.first; + const std::set<uint32_t>& method_dex_ids = it.second; + + os << DexFile::GetMultiDexSuffix(dex_file->GetLocation()) + << kFieldSeparator + << dex_file->GetLocationChecksum(); + for (auto method_it : method_dex_ids) { + os << kFieldSeparator << method_it; + } + os << kLineSeparator; + } + + WriteToFile(fd, os); + + return CloseDescriptorForFile(fd, filename); +} +} // namespace art diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h new file mode 100644 index 0000000000..e3117eb5ee --- /dev/null +++ b/runtime/jit/offline_profiling_info.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_ +#define ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_ + +#include <set> + +#include "atomic.h" +#include "dex_file.h" +#include "safe_map.h" + +namespace art { + +class ArtMethod; + +/** + * Profiling information in a format that can be serialized to disk. + * It is a serialize-friendly format based on information collected + * by the interpreter (ProfileInfo). + * Currently it stores only the hot compiled methods. + */ +class OfflineProfilingInfo { + public: + bool NeedsSaving(uint64_t last_update_time_ns) const; + void SaveProfilingInfo(const std::string& filename, + uint64_t last_update_time_ns, + const std::set<ArtMethod*>& methods); + + private: + // Map identifying the location of the profiled methods. + // dex_file_ -> [dex_method_index]+ + using DexFileToMethodsMap = SafeMap<const DexFile*, std::set<uint32_t>>; + + void AddMethodInfo(ArtMethod* method, DexFileToMethodsMap* info) + SHARED_REQUIRES(Locks::mutator_lock_); + bool Serialize(const std::string& filename, const DexFileToMethodsMap& info) const; + + // TODO(calin): Verify if Atomic is really needed (are we sure to be called from a + // singe thread?) + Atomic<uint64_t> last_update_time_ns_; +}; + +} // namespace art + +#endif // ART_RUNTIME_JIT_OFFLINE_PROFILING_INFO_H_ diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc index 415109fb06..5e3fa199e5 100644 --- a/runtime/jni_internal.cc +++ b/runtime/jni_internal.cc @@ -1689,7 +1689,8 @@ class JNI { } else { CHECK_NON_NULL_MEMCPY_ARGUMENT(length, buf); const jchar* chars = s->GetValue(); - ConvertUtf16ToModifiedUtf8(buf, chars + start, length); + size_t bytes = CountUtf8Bytes(chars + start, length); + ConvertUtf16ToModifiedUtf8(buf, bytes, chars + start, length); } } @@ -1772,7 +1773,7 @@ class JNI { char* bytes = new char[byte_count + 1]; CHECK(bytes != nullptr); // bionic aborts anyway. const uint16_t* chars = s->GetValue(); - ConvertUtf16ToModifiedUtf8(bytes, chars, s->GetLength()); + ConvertUtf16ToModifiedUtf8(bytes, byte_count, chars, s->GetLength()); bytes[byte_count] = '\0'; return bytes; } diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h index 5c12091ecb..460342807a 100644 --- a/runtime/mirror/object-inl.h +++ b/runtime/mirror/object-inl.h @@ -163,6 +163,7 @@ inline void Object::SetReadBarrierPointer(Object* rb_ptr) { #endif } +template<bool kCasRelease> inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) { #ifdef USE_BAKER_READ_BARRIER DCHECK(kUseBakerReadBarrier); @@ -181,10 +182,13 @@ inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* static_cast<uint32_t>(reinterpret_cast<uintptr_t>(expected_rb_ptr))); new_lw = lw; new_lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr))); - // This CAS is a CAS release so that when GC updates all the fields of an object and then - // changes the object from gray to black, the field updates (stores) will be visible (won't be - // reordered after this CAS.) - } while (!CasLockWordWeakRelease(expected_lw, new_lw)); + // ConcurrentCopying::ProcessMarkStackRef uses this with kCasRelease == true. + // If kCasRelease == true, use a CAS release so that when GC updates all the fields of + // an object and then changes the object from gray to black, the field updates (stores) will be + // visible (won't be reordered after this CAS.) + } while (!(kCasRelease ? + CasLockWordWeakRelease(expected_lw, new_lw) : + CasLockWordWeakRelaxed(expected_lw, new_lw))); return true; #elif USE_BROOKS_READ_BARRIER DCHECK(kUseBrooksReadBarrier); diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h index 5c6520fcab..71e704e704 100644 --- a/runtime/mirror/object.h +++ b/runtime/mirror/object.h @@ -92,13 +92,13 @@ class MANAGED LOCKABLE Object { void SetClass(Class* new_klass) SHARED_REQUIRES(Locks::mutator_lock_); Object* GetReadBarrierPointer() SHARED_REQUIRES(Locks::mutator_lock_); + #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER NO_RETURN #endif void SetReadBarrierPointer(Object* rb_ptr) SHARED_REQUIRES(Locks::mutator_lock_); -#ifndef USE_BAKER_OR_BROOKS_READ_BARRIER - NO_RETURN -#endif + + template<bool kCasRelease = false> ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) SHARED_REQUIRES(Locks::mutator_lock_); void AssertReadBarrierPointer() const SHARED_REQUIRES(Locks::mutator_lock_); diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc index be869d4e6a..33aca0304c 100644 --- a/runtime/mirror/string.cc +++ b/runtime/mirror/string.cc @@ -109,12 +109,17 @@ String* String::AllocFromUtf16(Thread* self, int32_t utf16_length, const uint16_ String* String::AllocFromModifiedUtf8(Thread* self, const char* utf) { DCHECK(utf != nullptr); - size_t char_count = CountModifiedUtf8Chars(utf); - return AllocFromModifiedUtf8(self, char_count, utf); + size_t byte_count = strlen(utf); + size_t char_count = CountModifiedUtf8Chars(utf, byte_count); + return AllocFromModifiedUtf8(self, char_count, utf, byte_count); +} + +String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in) { + return AllocFromModifiedUtf8(self, utf16_length, utf8_data_in, strlen(utf8_data_in)); } String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, - const char* utf8_data_in) { + const char* utf8_data_in, int32_t utf8_length) { gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator(); SetStringCountVisitor visitor(utf16_length); String* string = Alloc<true>(self, utf16_length, allocator_type, visitor); @@ -122,7 +127,7 @@ String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, return nullptr; } uint16_t* utf16_data_out = string->GetValue(); - ConvertModifiedUtf8ToUtf16(utf16_data_out, utf8_data_in); + ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length); return string; } @@ -217,7 +222,7 @@ std::string String::ToModifiedUtf8() { const uint16_t* chars = GetValue(); size_t byte_count = GetUtfLength(); std::string result(byte_count, static_cast<char>(0)); - ConvertUtf16ToModifiedUtf8(&result[0], chars, GetLength()); + ConvertUtf16ToModifiedUtf8(&result[0], byte_count, chars, GetLength()); return result; } diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h index 80ebd2cf0f..e2cfb8d5ad 100644 --- a/runtime/mirror/string.h +++ b/runtime/mirror/string.h @@ -116,6 +116,10 @@ class MANAGED String FINAL : public Object { static String* AllocFromModifiedUtf8(Thread* self, const char* utf) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); + static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, + const char* utf8_data_in, int32_t utf8_length) + SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); + static String* AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_); diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index 4cd3c3d730..da6cf1f198 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -155,7 +155,9 @@ static jobject DexFile_openDexFileNative(JNIEnv* env, jstring javaOutputName, jint flags ATTRIBUTE_UNUSED, // class_loader will be used for app images. - jobject class_loader ATTRIBUTE_UNUSED) { + jobject class_loader ATTRIBUTE_UNUSED, + // dex_elements will be used for app images. + jobject dex_elements ATTRIBUTE_UNUSED) { ScopedUtfChars sourceName(env, javaSourceName); if (sourceName.c_str() == nullptr) { return 0; @@ -445,7 +447,12 @@ static JNINativeMethod gMethods[] = { NATIVE_METHOD(DexFile, getDexOptNeeded, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)I"), NATIVE_METHOD(DexFile, openDexFileNative, - "(Ljava/lang/String;Ljava/lang/String;ILjava/lang/ClassLoader;)Ljava/lang/Object;"), + "(Ljava/lang/String;" + "Ljava/lang/String;" + "I" + "Ljava/lang/ClassLoader;" + "[Ldalvik/system/DexPathList$Element;" + ")Ljava/lang/Object;"), }; void register_dalvik_system_DexFile(JNIEnv* env) { diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc index 4c5dc3ad25..b49d68f6ce 100644 --- a/runtime/native/dalvik_system_VMRuntime.cc +++ b/runtime/native/dalvik_system_VMRuntime.cc @@ -562,17 +562,20 @@ static void VMRuntime_preloadDexCaches(JNIEnv* env, jobject) { /* * This is called by the framework when it knows the application directory and - * process name. We use this information to start up the sampling profiler for - * for ART. + * process name. */ -static void VMRuntime_registerAppInfo(JNIEnv* env, jclass, jstring pkgName, - jstring appDir ATTRIBUTE_UNUSED, +static void VMRuntime_registerAppInfo(JNIEnv* env, + jclass clazz ATTRIBUTE_UNUSED, + jstring pkgName, + jstring appDir, jstring procName ATTRIBUTE_UNUSED) { - const char *pkgNameChars = env->GetStringUTFChars(pkgName, nullptr); - std::string profileFile = StringPrintf("/data/dalvik-cache/profiles/%s", pkgNameChars); + const char* appDirChars = env->GetStringUTFChars(appDir, nullptr); + const char* pkgNameChars = env->GetStringUTFChars(pkgName, nullptr); + std::string profileFile = StringPrintf("%s/code_cache/%s.prof", appDirChars, pkgNameChars); - Runtime::Current()->StartProfiler(profileFile.c_str()); + Runtime::Current()->SetJitProfilingFilename(profileFile.c_str()); + env->ReleaseStringUTFChars(appDir, appDirChars); env->ReleaseStringUTFChars(pkgName, pkgNameChars); } diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc index ae16c7f373..585c7c4596 100644 --- a/runtime/parsed_options.cc +++ b/runtime/parsed_options.cc @@ -152,15 +152,21 @@ std::unique_ptr<RuntimeParser> ParsedOptions::MakeParser(bool ignore_unrecognize .WithType<bool>() .WithValueMap({{"false", false}, {"true", true}}) .IntoKey(M::UseJIT) - .Define("-Xjitcodecachesize:_") + .Define("-Xjitinitialsize:_") .WithType<MemoryKiB>() - .IntoKey(M::JITCodeCacheCapacity) + .IntoKey(M::JITCodeCacheInitialCapacity) + .Define("-Xjitmaxsize:_") + .WithType<MemoryKiB>() + .IntoKey(M::JITCodeCacheMaxCapacity) .Define("-Xjitthreshold:_") .WithType<unsigned int>() .IntoKey(M::JITCompileThreshold) .Define("-Xjitwarmupthreshold:_") .WithType<unsigned int>() .IntoKey(M::JITWarmupThreshold) + .Define("-Xjitsaveprofilinginfo") + .WithValue(true) + .IntoKey(M::JITSaveProfilingInfo) .Define("-XX:HspaceCompactForOOMMinIntervalMs=_") // in ms .WithType<MillisecondsToNanoseconds>() // store as ns .IntoKey(M::HSpaceCompactForOOMMinIntervalsMs) @@ -640,7 +646,6 @@ void ParsedOptions::Usage(const char* fmt, ...) { UsageMessage(stream, " -XX:ForegroundHeapGrowthMultiplier=doublevalue\n"); UsageMessage(stream, " -XX:LowMemoryMode\n"); UsageMessage(stream, " -Xprofile:{threadcpuclock,wallclock,dualclock}\n"); - UsageMessage(stream, " -Xjitcodecachesize:N\n"); UsageMessage(stream, " -Xjitthreshold:integervalue\n"); UsageMessage(stream, "\n"); @@ -684,6 +689,8 @@ void ParsedOptions::Usage(const char* fmt, ...) { UsageMessage(stream, " -Ximage-compiler-option dex2oat-option\n"); UsageMessage(stream, " -Xpatchoat:filename\n"); UsageMessage(stream, " -Xusejit:booleanvalue\n"); + UsageMessage(stream, " -Xjitinitialsize:N\n"); + UsageMessage(stream, " -Xjitmaxsize:N\n"); UsageMessage(stream, " -X[no]relocate\n"); UsageMessage(stream, " -X[no]dex2oat (Whether to invoke dex2oat on the application)\n"); UsageMessage(stream, " -X[no]image-dex2oat (Whether to create and use a boot image)\n"); @@ -718,6 +725,7 @@ void ParsedOptions::Usage(const char* fmt, ...) { UsageMessage(stream, " -Xjitblocking\n"); UsageMessage(stream, " -Xjitmethod:signature[,signature]* (eg Ljava/lang/String\\;replace)\n"); UsageMessage(stream, " -Xjitclass:classname[,classname]*\n"); + UsageMessage(stream, " -Xjitcodecachesize:N\n"); UsageMessage(stream, " -Xjitoffset:offset[,offset]\n"); UsageMessage(stream, " -Xjitconfig:filename\n"); UsageMessage(stream, " -Xjitcheckcg\n"); diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 0077389801..931e581ce3 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -218,6 +218,9 @@ Runtime::~Runtime() { if (is_native_bridge_loaded_) { UnloadNativeBridge(); } + + MaybeSaveJitProfilingInfo(); + if (dump_gc_performance_on_shutdown_) { // This can't be called from the Heap destructor below because it // could call RosAlloc::InspectAll() which needs the thread_list @@ -601,7 +604,6 @@ bool Runtime::Start() { LOG(INFO) << "Failed to access the profile file. Profiler disabled."; return true; } - StartProfiler(profile_output_filename_.c_str()); } if (trace_config_.get() != nullptr && trace_config_->trace_file != "") { @@ -1044,8 +1046,13 @@ bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) class_linker_ = new ClassLinker(intern_table_); if (GetHeap()->HasImageSpace()) { ATRACE_BEGIN("InitFromImage"); - class_linker_->InitFromImage(); + std::string error_msg; + bool result = class_linker_->InitFromImage(&error_msg); ATRACE_END(); + if (!result) { + LOG(ERROR) << "Could not initialize from image: " << error_msg; + return false; + } if (kIsDebugBuild) { GetHeap()->GetBootImageSpace()->VerifyImageAllocations(); } @@ -1077,7 +1084,11 @@ bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) runtime_options.GetOrDefault(Opt::Image), &boot_class_path); instruction_set_ = runtime_options.GetOrDefault(Opt::ImageInstructionSet); - class_linker_->InitWithoutImage(std::move(boot_class_path)); + std::string error_msg; + if (!class_linker_->InitWithoutImage(std::move(boot_class_path), &error_msg)) { + LOG(ERROR) << "Could not initialize without image: " << error_msg; + return false; + } // TODO: Should we move the following to InitWithoutImage? SetInstructionSet(instruction_set_); @@ -1609,10 +1620,8 @@ void Runtime::SetCalleeSaveMethod(ArtMethod* method, CalleeSaveType type) { callee_save_methods_[type] = reinterpret_cast<uintptr_t>(method); } -void Runtime::StartProfiler(const char* profile_output_filename) { +void Runtime::SetJitProfilingFilename(const char* profile_output_filename) { profile_output_filename_ = profile_output_filename; - profiler_started_ = - BackgroundMethodSamplingProfiler::Start(profile_output_filename_, profiler_options_); } // Transaction support. @@ -1758,8 +1767,16 @@ void Runtime::AddCurrentRuntimeFeaturesAsDex2OatArguments(std::vector<std::strin argv->push_back(feature_string); } +void Runtime::MaybeSaveJitProfilingInfo() { + if (jit_.get() != nullptr && !profile_output_filename_.empty()) { + jit_->SaveProfilingInfo(profile_output_filename_); + } +} + void Runtime::UpdateProfilerState(int state) { - VLOG(profiler) << "Profiler state updated to " << state; + if (state == kProfileBackground) { + MaybeSaveJitProfilingInfo(); + } } void Runtime::CreateJit() { diff --git a/runtime/runtime.h b/runtime/runtime.h index d61663cd10..bd3641405d 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -457,7 +457,7 @@ class Runtime { return &instrumentation_; } - void StartProfiler(const char* profile_output_filename); + void SetJitProfilingFilename(const char* profile_output_filename); void UpdateProfilerState(int state); // Transaction support. @@ -608,12 +608,14 @@ class Runtime { void StartDaemonThreads(); void StartSignalCatcher(); + void MaybeSaveJitProfilingInfo(); + // A pointer to the active runtime or null. static Runtime* instance_; // NOTE: these must match the gc::ProcessState values as they come directly from the framework. static constexpr int kProfileForground = 0; - static constexpr int kProfileBackgrouud = 1; + static constexpr int kProfileBackground = 1; // 64 bit so that we can share the same asm offsets for both 32 and 64 bits. uint64_t callee_save_methods_[kLastCalleeSaveType]; diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def index 3489834f30..5624285b09 100644 --- a/runtime/runtime_options.def +++ b/runtime/runtime_options.def @@ -69,7 +69,9 @@ RUNTIME_OPTIONS_KEY (bool, EnableHSpaceCompactForOOM, true) RUNTIME_OPTIONS_KEY (bool, UseJIT, false) RUNTIME_OPTIONS_KEY (unsigned int, JITCompileThreshold, jit::Jit::kDefaultCompileThreshold) RUNTIME_OPTIONS_KEY (unsigned int, JITWarmupThreshold, jit::Jit::kDefaultWarmupThreshold) -RUNTIME_OPTIONS_KEY (MemoryKiB, JITCodeCacheCapacity, jit::JitCodeCache::kDefaultCapacity) +RUNTIME_OPTIONS_KEY (MemoryKiB, JITCodeCacheInitialCapacity, jit::JitCodeCache::kInitialCapacity) +RUNTIME_OPTIONS_KEY (MemoryKiB, JITCodeCacheMaxCapacity, jit::JitCodeCache::kMaxCapacity) +RUNTIME_OPTIONS_KEY (bool, JITSaveProfilingInfo, false) RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \ HSpaceCompactForOOMMinIntervalsMs,\ MsToNs(100 * 1000)) // 100s diff --git a/runtime/safe_map.h b/runtime/safe_map.h index 7ac17b60d6..4e62dda8dd 100644 --- a/runtime/safe_map.h +++ b/runtime/safe_map.h @@ -92,7 +92,7 @@ class SafeMap { DCHECK(result.second); // Check we didn't accidentally overwrite an existing value. return result.first; } - iterator Put(const K& k, const V&& v) { + iterator Put(const K& k, V&& v) { std::pair<iterator, bool> result = map_.emplace(k, std::move(v)); DCHECK(result.second); // Check we didn't accidentally overwrite an existing value. return result.first; @@ -105,7 +105,7 @@ class SafeMap { DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k)); return map_.emplace_hint(pos, k, v); } - iterator PutBefore(iterator pos, const K& k, const V&& v) { + iterator PutBefore(iterator pos, const K& k, V&& v) { // Check that we're using the correct position and the key is not in the map. DCHECK(pos == map_.end() || map_.key_comp()(k, pos->first)); DCHECK(pos == map_.begin() || map_.key_comp()((--iterator(pos))->first, k)); diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index b09b87fb58..a390908635 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -948,7 +948,12 @@ void ThreadList::SuspendAllForDebugger() { Locks::mutator_lock_->ExclusiveLock(self); Locks::mutator_lock_->ExclusiveUnlock(self); #endif - AssertThreadsAreSuspended(self, self, debug_thread); + // Disabled for the following race condition: + // Thread 1 calls SuspendAllForDebugger, gets preempted after pulsing the mutator lock. + // Thread 2 calls SuspendAll and SetStateUnsafe (perhaps from Dbg::Disconnected). + // Thread 1 fails assertion that all threads are suspended due to thread 2 being in a runnable + // state (from SetStateUnsafe). + // AssertThreadsAreSuspended(self, self, debug_thread); VLOG(threads) << *self << " SuspendAllForDebugger complete"; } diff --git a/runtime/utf.cc b/runtime/utf.cc index 10600e2153..5a116980c9 100644 --- a/runtime/utf.cc +++ b/runtime/utf.cc @@ -23,28 +23,50 @@ namespace art { +// This is used only from debugger and test code. size_t CountModifiedUtf8Chars(const char* utf8) { + return CountModifiedUtf8Chars(utf8, strlen(utf8)); +} + +/* + * This does not validate UTF8 rules (nor did older code). But it gets the right answer + * for valid UTF-8 and that's fine because it's used only to size a buffer for later + * conversion. + * + * Modified UTF-8 consists of a series of bytes up to 21 bit Unicode code points as follows: + * U+0001 - U+007F 0xxxxxxx + * U+0080 - U+07FF 110xxxxx 10xxxxxx + * U+0800 - U+FFFF 1110xxxx 10xxxxxx 10xxxxxx + * U+10000 - U+1FFFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + * + * U+0000 is encoded using the 2nd form to avoid nulls inside strings (this differs from + * standard UTF-8). + * The four byte encoding converts to two utf16 characters. + */ +size_t CountModifiedUtf8Chars(const char* utf8, size_t byte_count) { + DCHECK_LE(byte_count, strlen(utf8)); size_t len = 0; - int ic; - while ((ic = *utf8++) != '\0') { + const char* end = utf8 + byte_count; + for (; utf8 < end; ++utf8) { + int ic = *utf8; len++; - if ((ic & 0x80) == 0) { - // one-byte encoding + if (LIKELY((ic & 0x80) == 0)) { + // One-byte encoding. continue; } - // two- or three-byte encoding + // Two- or three-byte encoding. utf8++; if ((ic & 0x20) == 0) { - // two-byte encoding + // Two-byte encoding. continue; } utf8++; if ((ic & 0x10) == 0) { - // three-byte encoding + // Three-byte encoding. continue; } - // four-byte encoding: needs to be converted into a surrogate + // Four-byte encoding: needs to be converted into a surrogate // pair. utf8++; len++; @@ -52,6 +74,7 @@ size_t CountModifiedUtf8Chars(const char* utf8) { return len; } +// This is used only from debugger and test code. void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, const char* utf8_data_in) { while (*utf8_data_in != '\0') { const uint32_t ch = GetUtf16FromUtf8(&utf8_data_in); @@ -65,13 +88,53 @@ void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, const char* utf8_data_ } } -void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count) { +void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, size_t out_chars, + const char* utf8_data_in, size_t in_bytes) { + const char *in_start = utf8_data_in; + const char *in_end = utf8_data_in + in_bytes; + uint16_t *out_p = utf16_data_out; + + if (LIKELY(out_chars == in_bytes)) { + // Common case where all characters are ASCII. + for (const char *p = in_start; p < in_end;) { + // Safe even if char is signed because ASCII characters always have + // the high bit cleared. + *out_p++ = dchecked_integral_cast<uint16_t>(*p++); + } + return; + } + + // String contains non-ASCII characters. + for (const char *p = in_start; p < in_end;) { + const uint32_t ch = GetUtf16FromUtf8(&p); + const uint16_t leading = GetLeadingUtf16Char(ch); + const uint16_t trailing = GetTrailingUtf16Char(ch); + + *out_p++ = leading; + if (trailing != 0) { + *out_p++ = trailing; + } + } +} + +void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count, + const uint16_t* utf16_in, size_t char_count) { + if (LIKELY(byte_count == char_count)) { + // Common case where all characters are ASCII. + const uint16_t *utf16_end = utf16_in + char_count; + for (const uint16_t *p = utf16_in; p < utf16_end;) { + *utf8_out++ = dchecked_integral_cast<char>(*p++); + } + return; + } + + // String contains non-ASCII characters. while (char_count--) { const uint16_t ch = *utf16_in++; if (ch > 0 && ch <= 0x7f) { *utf8_out++ = ch; } else { - // char_count == 0 here implies we've encountered an unpaired + // Char_count == 0 here implies we've encountered an unpaired // surrogate and we have no choice but to encode it as 3-byte UTF // sequence. Note that unpaired surrogates can occur as a part of // "normal" operation. @@ -161,34 +224,31 @@ int CompareModifiedUtf8ToUtf16AsCodePointValues(const char* utf8, const uint16_t size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count) { size_t result = 0; - while (char_count--) { + const uint16_t *end = chars + char_count; + while (chars < end) { const uint16_t ch = *chars++; - if (ch > 0 && ch <= 0x7f) { - ++result; - } else if (ch >= 0xd800 && ch <= 0xdbff) { - if (char_count > 0) { + if (LIKELY(ch != 0 && ch < 0x80)) { + result++; + continue; + } + if (ch < 0x800) { + result += 2; + continue; + } + if (ch >= 0xd800 && ch < 0xdc00) { + if (chars < end) { const uint16_t ch2 = *chars; // If we find a properly paired surrogate, we emit it as a 4 byte // UTF sequence. If we find an unpaired leading or trailing surrogate, // we emit it as a 3 byte sequence like would have done earlier. - if (ch2 >= 0xdc00 && ch2 <= 0xdfff) { + if (ch2 >= 0xdc00 && ch2 < 0xe000) { chars++; - char_count--; - result += 4; - } else { - result += 3; + continue; } - } else { - // This implies we found an unpaired trailing surrogate at the end - // of a string. - result += 3; } - } else if (ch > 0x7ff) { - result += 3; - } else { - result += 2; } + result += 3; } return result; } diff --git a/runtime/utf.h b/runtime/utf.h index 1193d29c7d..03158c492d 100644 --- a/runtime/utf.h +++ b/runtime/utf.h @@ -40,6 +40,7 @@ namespace mirror { * Returns the number of UTF-16 characters in the given modified UTF-8 string. */ size_t CountModifiedUtf8Chars(const char* utf8); +size_t CountModifiedUtf8Chars(const char* utf8, size_t byte_count); /* * Returns the number of modified UTF-8 bytes needed to represent the given @@ -51,6 +52,8 @@ size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count); * Convert from Modified UTF-8 to UTF-16. */ void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_out, const char* utf8_in); +void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_out, size_t out_chars, + const char* utf8_in, size_t in_bytes); /* * Compare two modified UTF-8 strings as UTF-16 code point values in a non-locale sensitive manner @@ -71,7 +74,8 @@ int CompareModifiedUtf8ToUtf16AsCodePointValues(const char* utf8, const uint16_t * this anyway, so if you want a NUL-terminated string, you know where to * put the NUL byte. */ -void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count); +void ConvertUtf16ToModifiedUtf8(char* utf8_out, size_t byte_count, + const uint16_t* utf16_in, size_t char_count); /* * The java.lang.String hashCode() algorithm. diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc index 94a6ea57e2..5239e40540 100644 --- a/runtime/utf_test.cc +++ b/runtime/utf_test.cc @@ -19,6 +19,7 @@ #include "common_runtime_test.h" #include "utf-inl.h" +#include <map> #include <vector> namespace art { @@ -48,7 +49,7 @@ static const uint8_t kAllSequences[] = { }; // A test string that contains a UTF-8 encoding of a surrogate pair -// (code point = U+10400) +// (code point = U+10400). static const uint8_t kSurrogateEncoding[] = { 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x80, @@ -66,13 +67,13 @@ TEST_F(UtfTest, GetUtf16FromUtf8) { EXPECT_EQ(0, GetTrailingUtf16Char(pair)); EXPECT_ARRAY_POSITION(1, ptr, start); - // Two byte sequence + // Two byte sequence. pair = GetUtf16FromUtf8(&ptr); EXPECT_EQ(0xa2, GetLeadingUtf16Char(pair)); EXPECT_EQ(0, GetTrailingUtf16Char(pair)); EXPECT_ARRAY_POSITION(3, ptr, start); - // Three byte sequence + // Three byte sequence. pair = GetUtf16FromUtf8(&ptr); EXPECT_EQ(0x20ac, GetLeadingUtf16Char(pair)); EXPECT_EQ(0, GetTrailingUtf16Char(pair)); @@ -84,7 +85,7 @@ TEST_F(UtfTest, GetUtf16FromUtf8) { EXPECT_EQ(0xdfe0, GetTrailingUtf16Char(pair)); EXPECT_ARRAY_POSITION(10, ptr, start); - // Null terminator + // Null terminator. pair = GetUtf16FromUtf8(&ptr); EXPECT_EQ(0, GetLeadingUtf16Char(pair)); EXPECT_EQ(0, GetTrailingUtf16Char(pair)); @@ -117,7 +118,8 @@ static void AssertConversion(const std::vector<uint16_t> input, ASSERT_EQ(expected.size(), CountUtf8Bytes(&input[0], input.size())); std::vector<uint8_t> output(expected.size()); - ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), &input[0], input.size()); + ConvertUtf16ToModifiedUtf8(reinterpret_cast<char*>(&output[0]), expected.size(), + &input[0], input.size()); EXPECT_EQ(expected, output); } @@ -139,10 +141,10 @@ TEST_F(UtfTest, CountAndConvertUtf8Bytes) { AssertConversion({ 'h', 'e', 'l', 'l', 'o' }, { 0x68, 0x65, 0x6c, 0x6c, 0x6f }); AssertConversion({ - 0xd802, 0xdc02, // Surrogate pair - 0xdef0, 0xdcff, // Three byte encodings - 0x0101, 0x0000, // Two byte encodings - 'p' , 'p' // One byte encoding + 0xd802, 0xdc02, // Surrogate pair. + 0xdef0, 0xdcff, // Three byte encodings. + 0x0101, 0x0000, // Two byte encodings. + 'p' , 'p' // One byte encoding. }, { 0xf0, 0x90, 0xa0, 0x82, 0xed, 0xbb, 0xb0, 0xed, 0xb3, 0xbf, @@ -155,9 +157,225 @@ TEST_F(UtfTest, CountAndConvertUtf8Bytes_UnpairedSurrogate) { // Unpaired trailing surrogate at the end of input. AssertConversion({ 'h', 'e', 0xd801 }, { 'h', 'e', 0xed, 0xa0, 0x81 }); // Unpaired (or incorrectly paired) surrogates in the middle of the input. - AssertConversion({ 'h', 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 'e' }); - AssertConversion({ 'h', 0xd801, 0xd801, 'e' }, { 'h', 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81, 'e' }); - AssertConversion({ 'h', 0xdc00, 0xdc00, 'e' }, { 'h', 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 'e' }); + const std::map<std::vector<uint16_t>, std::vector<uint8_t>> prefixes { + {{ 'h' }, { 'h' }}, + {{ 0 }, { 0xc0, 0x80 }}, + {{ 0x81 }, { 0xc2, 0x81 }}, + {{ 0x801 }, { 0xe0, 0xa0, 0x81 }}, + }; + const std::map<std::vector<uint16_t>, std::vector<uint8_t>> suffixes { + {{ 'e' }, { 'e' }}, + {{ 0 }, { 0xc0, 0x80 }}, + {{ 0x7ff }, { 0xdf, 0xbf }}, + {{ 0xffff }, { 0xef, 0xbf, 0xbf }}, + }; + const std::map<std::vector<uint16_t>, std::vector<uint8_t>> tests { + {{ 0xd801 }, { 0xed, 0xa0, 0x81 }}, + {{ 0xdc00 }, { 0xed, 0xb0, 0x80 }}, + {{ 0xd801, 0xd801 }, { 0xed, 0xa0, 0x81, 0xed, 0xa0, 0x81 }}, + {{ 0xdc00, 0xdc00 }, { 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80 }}, + }; + for (const auto& prefix : prefixes) { + const std::vector<uint16_t>& prefix_in = prefix.first; + const std::vector<uint8_t>& prefix_out = prefix.second; + for (const auto& test : tests) { + const std::vector<uint16_t>& test_in = test.first; + const std::vector<uint8_t>& test_out = test.second; + for (const auto& suffix : suffixes) { + const std::vector<uint16_t>& suffix_in = suffix.first; + const std::vector<uint8_t>& suffix_out = suffix.second; + std::vector<uint16_t> in = prefix_in; + in.insert(in.end(), test_in.begin(), test_in.end()); + in.insert(in.end(), suffix_in.begin(), suffix_in.end()); + std::vector<uint8_t> out = prefix_out; + out.insert(out.end(), test_out.begin(), test_out.end()); + out.insert(out.end(), suffix_out.begin(), suffix_out.end()); + AssertConversion(in, out); + } + } + } +} + +// Old versions of functions, here to compare answers with optimized versions. + +size_t CountModifiedUtf8Chars_reference(const char* utf8) { + size_t len = 0; + int ic; + while ((ic = *utf8++) != '\0') { + len++; + if ((ic & 0x80) == 0) { + // one-byte encoding + continue; + } + // two- or three-byte encoding + utf8++; + if ((ic & 0x20) == 0) { + // two-byte encoding + continue; + } + utf8++; + if ((ic & 0x10) == 0) { + // three-byte encoding + continue; + } + + // four-byte encoding: needs to be converted into a surrogate + // pair. + utf8++; + len++; + } + return len; +} + +static size_t CountUtf8Bytes_reference(const uint16_t* chars, size_t char_count) { + size_t result = 0; + while (char_count--) { + const uint16_t ch = *chars++; + if (ch > 0 && ch <= 0x7f) { + ++result; + } else if (ch >= 0xd800 && ch <= 0xdbff) { + if (char_count > 0) { + const uint16_t ch2 = *chars; + // If we find a properly paired surrogate, we emit it as a 4 byte + // UTF sequence. If we find an unpaired leading or trailing surrogate, + // we emit it as a 3 byte sequence like would have done earlier. + if (ch2 >= 0xdc00 && ch2 <= 0xdfff) { + chars++; + char_count--; + + result += 4; + } else { + result += 3; + } + } else { + // This implies we found an unpaired trailing surrogate at the end + // of a string. + result += 3; + } + } else if (ch > 0x7ff) { + result += 3; + } else { + result += 2; + } + } + return result; +} + +static void ConvertUtf16ToModifiedUtf8_reference(char* utf8_out, const uint16_t* utf16_in, + size_t char_count) { + while (char_count--) { + const uint16_t ch = *utf16_in++; + if (ch > 0 && ch <= 0x7f) { + *utf8_out++ = ch; + } else { + // Char_count == 0 here implies we've encountered an unpaired + // surrogate and we have no choice but to encode it as 3-byte UTF + // sequence. Note that unpaired surrogates can occur as a part of + // "normal" operation. + if ((ch >= 0xd800 && ch <= 0xdbff) && (char_count > 0)) { + const uint16_t ch2 = *utf16_in; + + // Check if the other half of the pair is within the expected + // range. If it isn't, we will have to emit both "halves" as + // separate 3 byte sequences. + if (ch2 >= 0xdc00 && ch2 <= 0xdfff) { + utf16_in++; + char_count--; + const uint32_t code_point = (ch << 10) + ch2 - 0x035fdc00; + *utf8_out++ = (code_point >> 18) | 0xf0; + *utf8_out++ = ((code_point >> 12) & 0x3f) | 0x80; + *utf8_out++ = ((code_point >> 6) & 0x3f) | 0x80; + *utf8_out++ = (code_point & 0x3f) | 0x80; + continue; + } + } + + if (ch > 0x07ff) { + // Three byte encoding. + *utf8_out++ = (ch >> 12) | 0xe0; + *utf8_out++ = ((ch >> 6) & 0x3f) | 0x80; + *utf8_out++ = (ch & 0x3f) | 0x80; + } else /*(ch > 0x7f || ch == 0)*/ { + // Two byte encoding. + *utf8_out++ = (ch >> 6) | 0xc0; + *utf8_out++ = (ch & 0x3f) | 0x80; + } + } + } +} + +// Exhaustive test of converting a single code point to UTF-16, then UTF-8, and back again. + +static void codePointToSurrogatePair(uint32_t code_point, uint16_t &first, uint16_t &second) { + first = (code_point >> 10) + 0xd7c0; + second = (code_point & 0x03ff) + 0xdc00; +} + +static void testConversions(uint16_t *buf, int char_count) { + char bytes_test[8], bytes_reference[8]; + uint16_t out_buf_test[4], out_buf_reference[4]; + int byte_count_test, byte_count_reference; + int char_count_test, char_count_reference; + + // Calculate the number of utf-8 bytes for the utf-16 chars. + byte_count_reference = CountUtf8Bytes_reference(buf, char_count); + byte_count_test = CountUtf8Bytes(buf, char_count); + EXPECT_EQ(byte_count_reference, byte_count_test); + + // Convert the utf-16 string to utf-8 bytes. + ConvertUtf16ToModifiedUtf8_reference(bytes_reference, buf, char_count); + ConvertUtf16ToModifiedUtf8(bytes_test, byte_count_test, buf, char_count); + for (int i = 0; i < byte_count_test; ++i) { + EXPECT_EQ(bytes_reference[i], bytes_test[i]); + } + + // Calculate the number of utf-16 chars from the utf-8 bytes. + bytes_reference[byte_count_reference] = 0; // Reference function needs null termination. + char_count_reference = CountModifiedUtf8Chars_reference(bytes_reference); + char_count_test = CountModifiedUtf8Chars(bytes_test, byte_count_test); + EXPECT_EQ(char_count, char_count_reference); + EXPECT_EQ(char_count, char_count_test); + + // Convert the utf-8 bytes back to utf-16 chars. + // Does not need copied _reference version of the function because the original + // function with the old API is retained for debug/testing code. + ConvertModifiedUtf8ToUtf16(out_buf_reference, bytes_reference); + ConvertModifiedUtf8ToUtf16(out_buf_test, char_count_test, bytes_test, byte_count_test); + for (int i = 0; i < char_count_test; ++i) { + EXPECT_EQ(buf[i], out_buf_reference[i]); + EXPECT_EQ(buf[i], out_buf_test[i]); + } +} + +TEST_F(UtfTest, ExhaustiveBidirectionalCodePointCheck) { + for (int codePoint = 0; codePoint <= 0x10ffff; ++codePoint) { + uint16_t buf[4]; + if (codePoint <= 0xffff) { + if (codePoint >= 0xd800 && codePoint <= 0xdfff) { + // According to the Unicode standard, no character will ever + // be assigned to these code points, and they can not be encoded + // into either utf-16 or utf-8. + continue; + } + buf[0] = 'h'; + buf[1] = codePoint; + buf[2] = 'e'; + testConversions(buf, 2); + testConversions(buf, 3); + testConversions(buf + 1, 1); + testConversions(buf + 1, 2); + } else { + buf[0] = 'h'; + codePointToSurrogatePair(codePoint, buf[1], buf[2]); + buf[3] = 'e'; + testConversions(buf, 2); + testConversions(buf, 3); + testConversions(buf, 4); + testConversions(buf + 1, 1); + testConversions(buf + 1, 2); + testConversions(buf + 1, 3); + } + } } } // namespace art diff --git a/runtime/utils.h b/runtime/utils.h index 3690f86a80..8b7941a1b2 100644 --- a/runtime/utils.h +++ b/runtime/utils.h @@ -18,9 +18,11 @@ #define ART_RUNTIME_UTILS_H_ #include <pthread.h> +#include <stdlib.h> #include <limits> #include <memory> +#include <random> #include <string> #include <type_traits> #include <vector> @@ -350,6 +352,26 @@ void ParseDouble(const std::string& option, double* parsed_value, UsageFn Usage); +#if defined(__BIONIC__) +struct Arc4RandomGenerator { + typedef uint32_t result_type; + static constexpr uint32_t min() { return std::numeric_limits<uint32_t>::min(); } + static constexpr uint32_t max() { return std::numeric_limits<uint32_t>::max(); } + uint32_t operator() () { return arc4random(); } +}; +using RNG = Arc4RandomGenerator; +#else +using RNG = std::random_device; +#endif + +template <typename T> +T GetRandomNumber(T min, T max) { + CHECK_LT(min, max); + std::uniform_int_distribution<T> dist(min, max); + RNG rng; + return dist(rng); +} + } // namespace art #endif // ART_RUNTIME_UTILS_H_ diff --git a/test/005-annotations/build b/test/005-annotations/build index 3f00a1a3cd..057b351dab 100644 --- a/test/005-annotations/build +++ b/test/005-annotations/build @@ -21,6 +21,8 @@ mkdir classes # android.test.anno.MissingAnnotation is available at compile time... ${JAVAC} -d classes `find src -name '*.java'` +# overwrite RenamedEnum +${JAVAC} -d classes `find src2 -name '*.java'` # ...but not at run time. rm 'classes/android/test/anno/MissingAnnotation.class' diff --git a/test/005-annotations/expected.txt b/test/005-annotations/expected.txt index e1c3dad2af..180adf8687 100644 --- a/test/005-annotations/expected.txt +++ b/test/005-annotations/expected.txt @@ -108,3 +108,4 @@ Canonical:null Simple: Get annotation with missing class should not throw Got expected TypeNotPresentException +Got expected NoSuchFieldError diff --git a/test/005-annotations/src/android/test/anno/AnnoRenamedEnumMethod.java b/test/005-annotations/src/android/test/anno/AnnoRenamedEnumMethod.java new file mode 100644 index 0000000000..7a15c6404f --- /dev/null +++ b/test/005-annotations/src/android/test/anno/AnnoRenamedEnumMethod.java @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package android.test.anno; + +import java.lang.annotation.*; + +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) + +public @interface AnnoRenamedEnumMethod { + RenamedEnumClass.RenamedEnum renamed() default RenamedEnumClass.RenamedEnum.FOO; +} diff --git a/test/005-annotations/src/android/test/anno/RenamedEnumClass.java b/test/005-annotations/src/android/test/anno/RenamedEnumClass.java new file mode 100644 index 0000000000..cfba8197ef --- /dev/null +++ b/test/005-annotations/src/android/test/anno/RenamedEnumClass.java @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package android.test.anno; + +import java.lang.annotation.*; + +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) + +public @interface RenamedEnumClass { + enum RenamedEnum { FOO, BAR }; +} diff --git a/test/005-annotations/src/android/test/anno/RenamedNoted.java b/test/005-annotations/src/android/test/anno/RenamedNoted.java new file mode 100644 index 0000000000..aae3a3fd58 --- /dev/null +++ b/test/005-annotations/src/android/test/anno/RenamedNoted.java @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package android.test.anno; + +public class RenamedNoted { + @AnnoRenamedEnumMethod(renamed=RenamedEnumClass.RenamedEnum.BAR) + public int bar() { + return 0; + } +} diff --git a/test/005-annotations/src/android/test/anno/TestAnnotations.java b/test/005-annotations/src/android/test/anno/TestAnnotations.java index 7b74a73082..2f0a8d31c3 100644 --- a/test/005-annotations/src/android/test/anno/TestAnnotations.java +++ b/test/005-annotations/src/android/test/anno/TestAnnotations.java @@ -1,3 +1,19 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package android.test.anno; import java.lang.annotation.Annotation; @@ -199,5 +215,15 @@ public class TestAnnotations { } catch (TypeNotPresentException expected) { System.out.println("Got expected TypeNotPresentException"); } + + // Test renamed enums. + try { + for (Method m: RenamedNoted.class.getDeclaredMethods()) { + Annotation[] annos = m.getDeclaredAnnotations(); + System.out.println(" annotations on METH " + m + ":"); + } + } catch (NoSuchFieldError expected) { + System.out.println("Got expected NoSuchFieldError"); + } } } diff --git a/test/005-annotations/src2/android/test/anno/RenamedEnumClass.java b/test/005-annotations/src2/android/test/anno/RenamedEnumClass.java new file mode 100644 index 0000000000..5a2fe36e88 --- /dev/null +++ b/test/005-annotations/src2/android/test/anno/RenamedEnumClass.java @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package android.test.anno; + +import java.lang.annotation.*; + +@Target(ElementType.METHOD) +@Retention(RetentionPolicy.RUNTIME) + +public @interface RenamedEnumClass { + enum RenamedEnum { FOOBAR }; +} diff --git a/test/068-classloader/expected.txt b/test/068-classloader/expected.txt index 8725799fe1..36e4f4872c 100644 --- a/test/068-classloader/expected.txt +++ b/test/068-classloader/expected.txt @@ -13,3 +13,4 @@ Got LinkageError on DI (early) Got LinkageError on IDI (early) class Main Got expected ClassNotFoundException +JNI_OnLoad called diff --git a/test/068-classloader/src/FancyLoader.java b/test/068-classloader/src/FancyLoader.java index 6a153cc9e2..b8eac7b52f 100644 --- a/test/068-classloader/src/FancyLoader.java +++ b/test/068-classloader/src/FancyLoader.java @@ -38,7 +38,7 @@ public class FancyLoader extends ClassLoader { static final String CLASS_PATH = "classes-ex/"; /* this is the "alternate" DEX/Jar file */ - static final String DEX_FILE = System.getenv("DEX_LOCATION") + "/068-classloader-ex.jar"; + public static final String DEX_FILE = System.getenv("DEX_LOCATION") + "/068-classloader-ex.jar"; /* on Dalvik, this is a DexFile; otherwise, it's null */ private Class mDexClass; diff --git a/test/068-classloader/src/Main.java b/test/068-classloader/src/Main.java index 361e2938e3..e3bf82c8c3 100644 --- a/test/068-classloader/src/Main.java +++ b/test/068-classloader/src/Main.java @@ -14,6 +14,8 @@ * limitations under the License. */ +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; /** * Class loader test. */ @@ -62,6 +64,28 @@ public class Main { testSeparation(); testClassForName(); + + // Attempt to load without a class table, regression test for b/25866849. + testLoadNativeLibrary(args[0]); + } + + static void testLoadNativeLibrary(String libName) throws Exception { + Class pathClassLoader = Class.forName("dalvik.system.PathClassLoader"); + if (pathClassLoader == null) { + throw new AssertionError("Couldn't find path class loader class"); + } + Constructor constructor = + pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class); + ClassLoader loader = (ClassLoader) constructor.newInstance( + FancyLoader.DEX_FILE, ClassLoader.getSystemClassLoader()); + Runtime runtime = Runtime.getRuntime(); + Method method = runtime.getClass().getDeclaredMethod("loadLibrary", String.class, + ClassLoader.class); + if (method == null) { + throw new RuntimeException("loadLibrary not found"); + } + method.setAccessible(true); + method.invoke(runtime, libName, loader); } static void testSeparation() { diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java index c32d34aa6f..6151fc10f2 100644 --- a/test/458-checker-instruction-simplification/src/Main.java +++ b/test/458-checker-instruction-simplification/src/Main.java @@ -389,24 +389,6 @@ public class Main { return arg << 0; } - /// CHECK-START: int Main.Shl1(int) instruction_simplifier (before) - /// CHECK-DAG: <<Arg:i\d+>> ParameterValue - /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 - /// CHECK-DAG: <<Shl:i\d+>> Shl [<<Arg>>,<<Const1>>] - /// CHECK-DAG: Return [<<Shl>>] - - /// CHECK-START: int Main.Shl1(int) instruction_simplifier (after) - /// CHECK-DAG: <<Arg:i\d+>> ParameterValue - /// CHECK-DAG: <<Add:i\d+>> Add [<<Arg>>,<<Arg>>] - /// CHECK-DAG: Return [<<Add>>] - - /// CHECK-START: int Main.Shl1(int) instruction_simplifier (after) - /// CHECK-NOT: Shl - - public static int Shl1(int arg) { - return arg << 1; - } - /// CHECK-START: long Main.Shr0(long) instruction_simplifier (before) /// CHECK-DAG: <<Arg:j\d+>> ParameterValue /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 @@ -1226,6 +1208,130 @@ public class Main { return arg / -0.25f; } + /** + * Test strength reduction of factors of the form (2^n + 1). + */ + + /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const9:i\d+>> IntConstant 9 + /// CHECK: Mul [<<Arg>>,<<Const9>>] + + /// CHECK-START: int Main.mulPow2Plus1(int) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const3:i\d+>> IntConstant 3 + /// CHECK: <<Shift:i\d+>> Shl [<<Arg>>,<<Const3>>] + /// CHECK-NEXT: Add [<<Arg>>,<<Shift>>] + + public static int mulPow2Plus1(int arg) { + return arg * 9; + } + + /** + * Test strength reduction of factors of the form (2^n - 1). + */ + + /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (before) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const31:j\d+>> LongConstant 31 + /// CHECK: Mul [<<Arg>>,<<Const31>>] + + /// CHECK-START: long Main.mulPow2Minus1(long) instruction_simplifier (after) + /// CHECK-DAG: <<Arg:j\d+>> ParameterValue + /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5 + /// CHECK: <<Shift:j\d+>> Shl [<<Arg>>,<<Const5>>] + /// CHECK-NEXT: Sub [<<Shift>>,<<Arg>>] + + public static long mulPow2Minus1(long arg) { + return arg * 31; + } + + /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier (before) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<Field:z\d+>> StaticFieldGet + /// CHECK-DAG: <<NE:z\d+>> NotEqual [<<Field>>,<<Const1>>] + /// CHECK-DAG: If [<<NE>>] + + /// CHECK-START: int Main.booleanFieldNotEqualOne() instruction_simplifier (after) + /// CHECK-DAG: <<Field:z\d+>> StaticFieldGet + /// CHECK-DAG: <<Not:z\d+>> BooleanNot [<<Field>>] + /// CHECK-DAG: If [<<Not>>] + + public static int booleanFieldNotEqualOne() { + return (booleanField == true) ? 13 : 54; + } + + /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 + /// CHECK-DAG: <<Field:z\d+>> StaticFieldGet + /// CHECK-DAG: <<EQ:z\d+>> Equal [<<Field>>,<<Const0>>] + /// CHECK-DAG: If [<<EQ>>] + + /// CHECK-START: int Main.booleanFieldEqualZero() instruction_simplifier (after) + /// CHECK-DAG: <<Field:z\d+>> StaticFieldGet + /// CHECK-DAG: <<Not:z\d+>> BooleanNot [<<Field>>] + /// CHECK-DAG: If [<<Not>>] + + public static int booleanFieldEqualZero() { + return (booleanField != false) ? 13 : 54; + } + + /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<Const42:i\d+>> IntConstant 42 + /// CHECK-DAG: <<GT:z\d+>> GreaterThan [<<Arg>>,<<Const42>>] + /// CHECK-DAG: <<NE:z\d+>> NotEqual [<<GT>>,<<Const1>>] + /// CHECK-DAG: If [<<NE>>] + + /// CHECK-START: int Main.intConditionNotEqualOne(int) instruction_simplifier_after_bce (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const42:i\d+>> IntConstant 42 + /// CHECK-DAG: If [<<LE:z\d+>>] + /// CHECK-DAG: <<LE>> LessThanOrEqual [<<Arg>>,<<Const42>>] + // Note that we match `LE` from If because there are two identical LessThanOrEqual instructions. + + public static int intConditionNotEqualOne(int i) { + return ((i > 42) == true) ? 13 : 54; + } + + /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (before) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 + /// CHECK-DAG: <<Const42:i\d+>> IntConstant 42 + /// CHECK-DAG: <<GT:z\d+>> GreaterThan [<<Arg>>,<<Const42>>] + /// CHECK-DAG: <<EQ:z\d+>> Equal [<<GT>>,<<Const0>>] + /// CHECK-DAG: If [<<EQ>>] + + /// CHECK-START: int Main.intConditionEqualZero(int) instruction_simplifier_after_bce (after) + /// CHECK-DAG: <<Arg:i\d+>> ParameterValue + /// CHECK-DAG: <<Const42:i\d+>> IntConstant 42 + /// CHECK-DAG: If [<<LE:z\d+>>] + /// CHECK-DAG: <<LE>> LessThanOrEqual [<<Arg>>,<<Const42>>] + // Note that we match `LE` from If because there are two identical LessThanOrEqual instructions. + + public static int intConditionEqualZero(int i) { + return ((i > 42) != false) ? 13 : 54; + } + + // Test that conditions on float/double are not flipped. + + /// CHECK-START: int Main.floatConditionNotEqualOne(float) register (before) + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: NotEqual [{{i\d+}},<<Const1>>] + + public static int floatConditionNotEqualOne(float f) { + return ((f > 42.0f) == true) ? 13 : 54; + } + + /// CHECK-START: int Main.doubleConditionEqualZero(double) register (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 + /// CHECK-DAG: Equal [{{i\d+}},<<Const0>>] + + public static int doubleConditionEqualZero(double d) { + return ((d > 42.0) != false) ? 13 : 54; + } + public static void main(String[] args) { int arg = 123456; @@ -1274,7 +1380,6 @@ public class Main { assertDoubleEquals(Div2(150.0), 75.0); assertFloatEquals(DivMP25(100.0f), -400.0f); assertDoubleEquals(DivMP25(150.0), -600.0); - assertLongEquals(Shl1(100), 200); assertIntEquals(UShr28And15(0xc1234567), 0xc); assertLongEquals(UShr60And15(0xc123456787654321L), 0xcL); assertIntEquals(UShr28And7(0xc1234567), 0x4); @@ -1283,5 +1388,32 @@ public class Main { assertLongEquals(Shr56And255(0xc123456787654321L), 0xc1L); assertIntEquals(Shr24And127(0xc1234567), 0x41); assertLongEquals(Shr56And127(0xc123456787654321L), 0x41L); + assertIntEquals(0, mulPow2Plus1(0)); + assertIntEquals(9, mulPow2Plus1(1)); + assertIntEquals(18, mulPow2Plus1(2)); + assertIntEquals(900, mulPow2Plus1(100)); + assertIntEquals(111105, mulPow2Plus1(12345)); + assertLongEquals(0, mulPow2Minus1(0)); + assertLongEquals(31, mulPow2Minus1(1)); + assertLongEquals(62, mulPow2Minus1(2)); + assertLongEquals(3100, mulPow2Minus1(100)); + assertLongEquals(382695, mulPow2Minus1(12345)); + + booleanField = false; + assertIntEquals(booleanFieldNotEqualOne(), 54); + assertIntEquals(booleanFieldEqualZero(), 54); + booleanField = true; + assertIntEquals(booleanFieldNotEqualOne(), 13); + assertIntEquals(booleanFieldEqualZero(), 13); + assertIntEquals(intConditionNotEqualOne(6), 54); + assertIntEquals(intConditionNotEqualOne(43), 13); + assertIntEquals(intConditionEqualZero(6), 54); + assertIntEquals(intConditionEqualZero(43), 13); + assertIntEquals(floatConditionNotEqualOne(6.0f), 54); + assertIntEquals(floatConditionNotEqualOne(43.0f), 13); + assertIntEquals(doubleConditionEqualZero(6.0), 54); + assertIntEquals(doubleConditionEqualZero(43.0), 13); } + + public static boolean booleanField; } diff --git a/test/478-checker-clinit-check-pruning/expected.txt b/test/478-checker-clinit-check-pruning/expected.txt index 387e1a7cb1..7de097f666 100644 --- a/test/478-checker-clinit-check-pruning/expected.txt +++ b/test/478-checker-clinit-check-pruning/expected.txt @@ -4,3 +4,9 @@ Main$ClassWithClinit3's static initializer Main$ClassWithClinit4's static initializer Main$ClassWithClinit5's static initializer Main$ClassWithClinit6's static initializer +Main$ClassWithClinit7's static initializer +Main$ClassWithClinit8's static initializer +Main$ClassWithClinit9's static initializer +Main$ClassWithClinit10's static initializer +Main$ClassWithClinit11's static initializer +Main$ClassWithClinit12's static initializer diff --git a/test/478-checker-clinit-check-pruning/src/Main.java b/test/478-checker-clinit-check-pruning/src/Main.java index cff627373d..79935134b4 100644 --- a/test/478-checker-clinit-check-pruning/src/Main.java +++ b/test/478-checker-clinit-check-pruning/src/Main.java @@ -83,7 +83,7 @@ public class Main { // before the next pass (liveness analysis) instead. /// CHECK-START: void Main.invokeStaticNotInlined() liveness (before) - /// CHECK: InvokeStaticOrDirect + /// CHECK: InvokeStaticOrDirect clinit_check:implicit /// CHECK-START: void Main.invokeStaticNotInlined() liveness (before) /// CHECK-NOT: LoadClass @@ -269,7 +269,7 @@ public class Main { /// CHECK-START: void Main.noClinitBecauseOfInvokeStatic() liveness (before) /// CHECK-DAG: <<IntConstant:i\d+>> IntConstant 0 /// CHECK-DAG: <<LoadClass:l\d+>> LoadClass gen_clinit_check:false - /// CHECK-DAG: InvokeStaticOrDirect + /// CHECK-DAG: InvokeStaticOrDirect clinit_check:implicit /// CHECK-DAG: StaticFieldSet [<<LoadClass>>,<<IntConstant>>] /// CHECK-START: void Main.noClinitBecauseOfInvokeStatic() liveness (before) @@ -289,7 +289,7 @@ public class Main { /// CHECK-DAG: <<IntConstant:i\d+>> IntConstant 0 /// CHECK-DAG: <<LoadClass:l\d+>> LoadClass gen_clinit_check:true /// CHECK-DAG: StaticFieldSet [<<LoadClass>>,<<IntConstant>>] - /// CHECK-DAG: InvokeStaticOrDirect + /// CHECK-DAG: InvokeStaticOrDirect clinit_check:none /// CHECK-START: void Main.clinitBecauseOfFieldAccess() liveness (before) /// CHECK-NOT: ClinitCheck @@ -298,6 +298,206 @@ public class Main { ClassWithClinit2.$noinline$staticMethod(); } + /* + * Verify that LoadClass from const-class is not merged with + * later invoke-static (or it's ClinitCheck). + */ + + /// CHECK-START: void Main.constClassAndInvokeStatic(java.lang.Iterable) liveness (before) + /// CHECK: LoadClass gen_clinit_check:false + /// CHECK: InvokeStaticOrDirect clinit_check:implicit + + /// CHECK-START: void Main.constClassAndInvokeStatic(java.lang.Iterable) liveness (before) + /// CHECK-NOT: ClinitCheck + + static void constClassAndInvokeStatic(Iterable it) { + $opt$inline$ignoreClass(ClassWithClinit7.class); + ClassWithClinit7.someStaticMethod(it); + } + + static void $opt$inline$ignoreClass(Class c) { + } + + static class ClassWithClinit7 { + static { + System.out.println("Main$ClassWithClinit7's static initializer"); + } + + // Note: not inlined from constClassAndInvokeStatic() but fully inlined from main(). + static void someStaticMethod(Iterable it) { + // We're not inlining invoke-interface at the moment. + it.iterator(); + } + } + + /* + * Verify that LoadClass from sget is not merged with later invoke-static. + */ + + /// CHECK-START: void Main.sgetAndInvokeStatic(java.lang.Iterable) liveness (before) + /// CHECK: LoadClass gen_clinit_check:true + /// CHECK: InvokeStaticOrDirect clinit_check:none + + /// CHECK-START: void Main.sgetAndInvokeStatic(java.lang.Iterable) liveness (before) + /// CHECK-NOT: ClinitCheck + + static void sgetAndInvokeStatic(Iterable it) { + $opt$inline$ignoreInt(ClassWithClinit8.value); + ClassWithClinit8.someStaticMethod(it); + } + + static void $opt$inline$ignoreInt(int i) { + } + + static class ClassWithClinit8 { + public static int value = 0; + static { + System.out.println("Main$ClassWithClinit8's static initializer"); + } + + // Note: not inlined from sgetAndInvokeStatic() but fully inlined from main(). + static void someStaticMethod(Iterable it) { + // We're not inlining invoke-interface at the moment. + it.iterator(); + } + } + + /* + * Verify that LoadClass from const-class, ClinitCheck from sget and + * InvokeStaticOrDirect from invoke-static are not merged. + */ + + /// CHECK-START: void Main.constClassSgetAndInvokeStatic(java.lang.Iterable) liveness (before) + /// CHECK: LoadClass gen_clinit_check:false + /// CHECK: ClinitCheck + /// CHECK: InvokeStaticOrDirect clinit_check:none + + static void constClassSgetAndInvokeStatic(Iterable it) { + $opt$inline$ignoreClass(ClassWithClinit9.class); + $opt$inline$ignoreInt(ClassWithClinit9.value); + ClassWithClinit9.someStaticMethod(it); + } + + static class ClassWithClinit9 { + public static int value = 0; + static { + System.out.println("Main$ClassWithClinit9's static initializer"); + } + + // Note: not inlined from constClassSgetAndInvokeStatic() but fully inlined from main(). + static void someStaticMethod(Iterable it) { + // We're not inlining invoke-interface at the moment. + it.iterator(); + } + } + + /* + * Verify that LoadClass from a fully-inlined invoke-static is not merged + * with InvokeStaticOrDirect from a later invoke-static to the same method. + */ + + /// CHECK-START: void Main.inlinedInvokeStaticViaNonStatic(java.lang.Iterable) liveness (before) + /// CHECK: LoadClass gen_clinit_check:true + /// CHECK: InvokeStaticOrDirect clinit_check:none + + /// CHECK-START: void Main.inlinedInvokeStaticViaNonStatic(java.lang.Iterable) liveness (before) + /// CHECK-NOT: ClinitCheck + + static void inlinedInvokeStaticViaNonStatic(Iterable it) { + inlinedInvokeStaticViaNonStaticHelper(null); + inlinedInvokeStaticViaNonStaticHelper(it); + } + + static void inlinedInvokeStaticViaNonStaticHelper(Iterable it) { + ClassWithClinit10.inlinedForNull(it); + } + + static class ClassWithClinit10 { + public static int value = 0; + static { + System.out.println("Main$ClassWithClinit10's static initializer"); + } + + static void inlinedForNull(Iterable it) { + if (it != null) { + // We're not inlining invoke-interface at the moment. + it.iterator(); + } + } + } + + /* + * Check that the LoadClass from an invoke-static C.foo() doesn't get merged with + * an invoke-static inside C.foo(). This would mess up the stack walk in the + * resolution trampoline where we would have to load C (if C isn't loaded yet) + * which is not permitted there. + * + * Note: In case of failure, we would get an failed assertion during compilation, + * so we wouldn't really get to the checker tests below. + */ + + /// CHECK-START: void Main.inlinedInvokeStaticViaStatic(java.lang.Iterable) liveness (before) + /// CHECK: LoadClass gen_clinit_check:true + /// CHECK: InvokeStaticOrDirect clinit_check:none + + /// CHECK-START: void Main.inlinedInvokeStaticViaStatic(java.lang.Iterable) liveness (before) + /// CHECK-NOT: ClinitCheck + + static void inlinedInvokeStaticViaStatic(Iterable it) { + ClassWithClinit11.callInlinedForNull(it); + } + + static class ClassWithClinit11 { + public static int value = 0; + static { + System.out.println("Main$ClassWithClinit11's static initializer"); + } + + static void callInlinedForNull(Iterable it) { + inlinedForNull(it); + } + + static void inlinedForNull(Iterable it) { + // We're not inlining invoke-interface at the moment. + it.iterator(); + } + } + + /* + * A test similar to inlinedInvokeStaticViaStatic() but doing the indirect invoke + * twice with the first one to be fully inlined. + */ + + /// CHECK-START: void Main.inlinedInvokeStaticViaStaticTwice(java.lang.Iterable) liveness (before) + /// CHECK: LoadClass gen_clinit_check:true + /// CHECK: InvokeStaticOrDirect clinit_check:none + + /// CHECK-START: void Main.inlinedInvokeStaticViaStaticTwice(java.lang.Iterable) liveness (before) + /// CHECK-NOT: ClinitCheck + + static void inlinedInvokeStaticViaStaticTwice(Iterable it) { + ClassWithClinit12.callInlinedForNull(null); + ClassWithClinit12.callInlinedForNull(it); + } + + static class ClassWithClinit12 { + public static int value = 0; + static { + System.out.println("Main$ClassWithClinit12's static initializer"); + } + + static void callInlinedForNull(Iterable it) { + inlinedForNull(it); + } + + static void inlinedForNull(Iterable it) { + if (it != null) { + // We're not inlining invoke-interface at the moment. + it.iterator(); + } + } + } + // TODO: Add a test for the case of a static method whose declaring // class type index is not available (i.e. when `storage_index` // equals `DexFile::kDexNoIndex` in @@ -310,5 +510,12 @@ public class Main { ClassWithClinit4.invokeStaticNotInlined(); SubClassOfClassWithClinit5.invokeStaticInlined(); SubClassOfClassWithClinit6.invokeStaticNotInlined(); + Iterable it = new Iterable() { public java.util.Iterator iterator() { return null; } }; + constClassAndInvokeStatic(it); + sgetAndInvokeStatic(it); + constClassSgetAndInvokeStatic(it); + inlinedInvokeStaticViaNonStatic(it); + inlinedInvokeStaticViaStatic(it); + inlinedInvokeStaticViaStaticTwice(it); } } diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali index ab4afdb547..1de0baeabd 100644 --- a/test/485-checker-dce-loop-update/smali/TestCase.smali +++ b/test/485-checker-dce-loop-update/smali/TestCase.smali @@ -136,11 +136,11 @@ ## CHECK-DAG: <<Cst1:i\d+>> IntConstant 1 ## CHECK-DAG: <<Cst5:i\d+>> IntConstant 5 ## CHECK-DAG: <<Cst7:i\d+>> IntConstant 7 -## CHECK-DAG: <<Cst9:i\d+>> IntConstant 9 +## CHECK-DAG: <<Cst11:i\d+>> IntConstant 11 ## CHECK-DAG: <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add5:i\d+>>,<<Add7:i\d+>>] loop:<<HeaderY:B\d+>> ## CHECK-DAG: If [<<ArgY>>] loop:<<HeaderY>> ## CHECK-DAG: If [<<ArgZ>>] loop:<<HeaderY>> -## CHECK-DAG: <<Mul9:i\d+>> Mul [<<PhiX1>>,<<Cst9>>] loop:<<HeaderY>> +## CHECK-DAG: <<Mul9:i\d+>> Mul [<<PhiX1>>,<<Cst11>>] loop:<<HeaderY>> ## CHECK-DAG: <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>] loop:<<HeaderY>> ## CHECK-DAG: If [<<Cst1>>] loop:<<HeaderY>> ## CHECK-DAG: <<Add5>> Add [<<PhiX2>>,<<Cst5>>] loop:<<HeaderY>> @@ -152,12 +152,12 @@ ## CHECK-DAG: <<ArgY:z\d+>> ParameterValue ## CHECK-DAG: <<ArgZ:z\d+>> ParameterValue ## CHECK-DAG: <<Cst7:i\d+>> IntConstant 7 -## CHECK-DAG: <<Cst9:i\d+>> IntConstant 9 +## CHECK-DAG: <<Cst11:i\d+>> IntConstant 11 ## CHECK-DAG: <<PhiX1:i\d+>> Phi [<<ArgX>>,<<Add7:i\d+>>] loop:<<HeaderY:B\d+>> ## CHECK-DAG: If [<<ArgY>>] loop:<<HeaderY>> ## CHECK-DAG: <<Add7>> Add [<<PhiX1>>,<<Cst7>>] loop:<<HeaderY>> ## CHECK-DAG: If [<<ArgZ>>] loop:none -## CHECK-DAG: <<Mul9:i\d+>> Mul [<<PhiX1>>,<<Cst9>>] loop:none +## CHECK-DAG: <<Mul9:i\d+>> Mul [<<PhiX1>>,<<Cst11>>] loop:none ## CHECK-DAG: <<PhiX2:i\d+>> Phi [<<PhiX1>>,<<Mul9>>] loop:none ## CHECK-DAG: Return [<<PhiX2>>] loop:none @@ -177,7 +177,7 @@ # Additional logic which will end up outside the loop if-eqz p2, :skip_if - mul-int/lit8 p0, p0, 9 + mul-int/lit8 p0, p0, 11 :skip_if if-nez v0, :loop_end # will always take the branch diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java index 13c4722bc4..17e88ceb21 100644 --- a/test/530-checker-lse/src/Main.java +++ b/test/530-checker-lse/src/Main.java @@ -136,6 +136,9 @@ public class Main { // A new allocation shouldn't alias with pre-existing values. static int test3(TestClass obj) { + // Do an allocation here to avoid the HLoadClass and HClinitCheck + // at the second allocation. + new TestClass(); obj.i = 1; obj.next.j = 2; TestClass obj2 = new TestClass(); diff --git a/test/538-checker-embed-constants/src/Main.java b/test/538-checker-embed-constants/src/Main.java index 12f0380df0..f791adfd9a 100644 --- a/test/538-checker-embed-constants/src/Main.java +++ b/test/538-checker-embed-constants/src/Main.java @@ -260,26 +260,43 @@ public class Main { return arg ^ 0xf00000000000000fL; } + /// CHECK-START-ARM: long Main.shl1(long) disassembly (after) + /// CHECK: lsls{{(\.w)?}} {{r\d+}}, {{r\d+}}, #1 + /// CHECK: adc{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}} + + /// CHECK-START-ARM: long Main.shl1(long) disassembly (after) + /// CHECK-NOT: lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + + /// CHECK-START-X86: long Main.shl1(long) disassembly (after) + /// CHECK: add + /// CHECK: adc + + /// CHECK-START-X86: long Main.shl1(long) disassembly (after) + /// CHECK-NOT: shl + + public static long shl1(long arg) { + return arg << 1; + } + /// CHECK-START-ARM: long Main.shl2(long) disassembly (after) - /// CHECK: lsl{{s?|.w}} <<oh:r\d+>>, {{r\d+}}, #2 + /// CHECK: lsl{{s?|\.w}} <<oh:r\d+>>, {{r\d+}}, #2 /// CHECK: orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #30 - /// CHECK-DAG: lsl{{s?|.w}} {{r\d+}}, <<low>>, #2 + /// CHECK: lsl{{s?|\.w}} {{r\d+}}, <<low>>, #2 /// CHECK-START-ARM: long Main.shl2(long) disassembly (after) - /// CHECK-NOT: lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shl2(long arg) { - // Note: Shl(x, 1) is transformed to Add(x, x), so test Shl(x, 2). return arg << 2; } /// CHECK-START-ARM: long Main.shl31(long) disassembly (after) - /// CHECK: lsl{{s?|.w}} <<oh:r\d+>>, {{r\d+}}, #31 + /// CHECK: lsl{{s?|\.w}} <<oh:r\d+>>, {{r\d+}}, #31 /// CHECK: orr.w <<oh>>, <<oh>>, <<low:r\d+>>, lsr #1 - /// CHECK: lsl{{s?|.w}} {{r\d+}}, <<low>>, #31 + /// CHECK: lsl{{s?|\.w}} {{r\d+}}, <<low>>, #31 /// CHECK-START-ARM: long Main.shl31(long) disassembly (after) - /// CHECK-NOT: lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shl31(long arg) { return arg << 31; @@ -287,114 +304,136 @@ public class Main { /// CHECK-START-ARM: long Main.shl32(long) disassembly (after) /// CHECK-DAG: mov {{r\d+}}, {{r\d+}} - /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0 + /// CHECK-DAG: mov{{s?|\.w}} {{r\d+}}, #0 /// CHECK-START-ARM: long Main.shl32(long) disassembly (after) - /// CHECK-NOT: lsl{{s?|.w}} + /// CHECK-NOT: lsl{{s?|\.w}} public static long shl32(long arg) { return arg << 32; } /// CHECK-START-ARM: long Main.shl33(long) disassembly (after) - /// CHECK-DAG: lsl{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #1 - /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0 + /// CHECK-DAG: lsl{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #1 + /// CHECK-DAG: mov{{s?|\.w}} {{r\d+}}, #0 /// CHECK-START-ARM: long Main.shl33(long) disassembly (after) - /// CHECK-NOT: lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shl33(long arg) { return arg << 33; } /// CHECK-START-ARM: long Main.shl63(long) disassembly (after) - /// CHECK-DAG: lsl{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31 - /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0 + /// CHECK-DAG: lsl{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31 + /// CHECK-DAG: mov{{s?|\.w}} {{r\d+}}, #0 /// CHECK-START-ARM: long Main.shl63(long) disassembly (after) - /// CHECK-NOT: lsl{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsl{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shl63(long arg) { return arg << 63; } /// CHECK-START-ARM: long Main.shr1(long) disassembly (after) - /// CHECK: lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #1 - /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #31 - /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high>>, #1 + /// CHECK: asrs{{(\.w)?}} {{r\d+}}, {{r\d+}}, #1 + /// CHECK: mov.w {{r\d+}}, {{r\d+}}, rrx /// CHECK-START-ARM: long Main.shr1(long) disassembly (after) - /// CHECK-NOT: asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shr1(long arg) { return arg >> 1; } + /// CHECK-START-ARM: long Main.shr2(long) disassembly (after) + /// CHECK: lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #2 + /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #30 + /// CHECK-DAG: asr{{s?|\.w}} {{r\d+}}, <<high>>, #2 + + /// CHECK-START-ARM: long Main.shr2(long) disassembly (after) + /// CHECK-NOT: asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + + public static long shr2(long arg) { + return arg >> 2; + } + /// CHECK-START-ARM: long Main.shr31(long) disassembly (after) - /// CHECK: lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #31 + /// CHECK: lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #31 /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1 - /// CHECK: asr{{s?|.w}} {{r\d+}}, <<high>>, #31 + /// CHECK: asr{{s?|\.w}} {{r\d+}}, <<high>>, #31 /// CHECK-START-ARM: long Main.shr31(long) disassembly (after) - /// CHECK-NOT: asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shr31(long arg) { return arg >> 31; } /// CHECK-START-ARM: long Main.shr32(long) disassembly (after) - /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31 + /// CHECK-DAG: asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31 /// CHECK-DAG: mov {{r\d+}}, <<high>> /// CHECK-START-ARM: long Main.shr32(long) disassembly (after) - /// CHECK-NOT: asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} - /// CHECK-NOT: lsr{{s?|.w}} + /// CHECK-NOT: asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsr{{s?|\.w}} public static long shr32(long arg) { return arg >> 32; } /// CHECK-START-ARM: long Main.shr33(long) disassembly (after) - /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #1 - /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high>>, #31 + /// CHECK-DAG: asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #1 + /// CHECK-DAG: asr{{s?|\.w}} {{r\d+}}, <<high>>, #31 /// CHECK-START-ARM: long Main.shr33(long) disassembly (after) - /// CHECK-NOT: asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shr33(long arg) { return arg >> 33; } /// CHECK-START-ARM: long Main.shr63(long) disassembly (after) - /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high:r\d+>>, #31 - /// CHECK-DAG: asr{{s?|.w}} {{r\d+}}, <<high>>, #31 + /// CHECK-DAG: asr{{s?|\.w}} {{r\d+}}, <<high:r\d+>>, #31 + /// CHECK-DAG: asr{{s?|\.w}} {{r\d+}}, <<high>>, #31 /// CHECK-START-ARM: long Main.shr63(long) disassembly (after) - /// CHECK-NOT: asr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: asr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long shr63(long arg) { return arg >> 63; } /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after) - /// CHECK: lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #1 - /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #31 - /// CHECK-DAG: lsr{{s?|.w}} {{r\d+}}, <<high>>, #1 + /// CHECK: lsrs{{|.w}} {{r\d+}}, {{r\d+}}, #1 + /// CHECK: mov.w {{r\d+}}, {{r\d+}}, rrx /// CHECK-START-ARM: long Main.ushr1(long) disassembly (after) - /// CHECK-NOT: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long ushr1(long arg) { return arg >>> 1; } + /// CHECK-START-ARM: long Main.ushr2(long) disassembly (after) + /// CHECK: lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #2 + /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #30 + /// CHECK-DAG: lsr{{s?|\.w}} {{r\d+}}, <<high>>, #2 + + /// CHECK-START-ARM: long Main.ushr2(long) disassembly (after) + /// CHECK-NOT: lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + + public static long ushr2(long arg) { + return arg >>> 2; + } + /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after) - /// CHECK: lsr{{s?|.w}} <<ol:r\d+>>, {{r\d+}}, #31 + /// CHECK: lsr{{s?|\.w}} <<ol:r\d+>>, {{r\d+}}, #31 /// CHECK: orr.w <<ol>>, <<ol>>, <<high:r\d+>>, lsl #1 - /// CHECK: lsr{{s?|.w}} {{r\d+}}, <<high>>, #31 + /// CHECK: lsr{{s?|\.w}} {{r\d+}}, <<high>>, #31 /// CHECK-START-ARM: long Main.ushr31(long) disassembly (after) - /// CHECK-NOT: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long ushr31(long arg) { return arg >>> 31; @@ -402,32 +441,32 @@ public class Main { /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after) /// CHECK-DAG: mov {{r\d+}}, {{r\d+}} - /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0 + /// CHECK-DAG: mov{{s?|\.w}} {{r\d+}}, #0 /// CHECK-START-ARM: long Main.ushr32(long) disassembly (after) - /// CHECK-NOT: lsr{{s?|.w}} + /// CHECK-NOT: lsr{{s?|\.w}} public static long ushr32(long arg) { return arg >>> 32; } /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after) - /// CHECK-DAG: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, #1 - /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0 + /// CHECK-DAG: lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, #1 + /// CHECK-DAG: mov{{s?|\.w}} {{r\d+}}, #0 /// CHECK-START-ARM: long Main.ushr33(long) disassembly (after) - /// CHECK-NOT: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long ushr33(long arg) { return arg >>> 33; } /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after) - /// CHECK-DAG: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, #31 - /// CHECK-DAG: mov{{s?|.w}} {{r\d+}}, #0 + /// CHECK-DAG: lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, #31 + /// CHECK-DAG: mov{{s?|\.w}} {{r\d+}}, #0 /// CHECK-START-ARM: long Main.ushr63(long) disassembly (after) - /// CHECK-NOT: lsr{{s?|.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} + /// CHECK-NOT: lsr{{s?|\.w}} {{r\d+}}, {{r\d+}}, {{r\d+}} public static long ushr63(long arg) { return arg >>> 63; @@ -485,11 +524,13 @@ public class Main { assertLongEquals(14, addM1(7)); + assertLongEquals(shl1(longArg), 0x2468acf10eca8642L); assertLongEquals(shl2(longArg), 0x48d159e21d950c84L); assertLongEquals(shl31(longArg), 0x43b2a19080000000L); assertLongEquals(shl32(longArg), 0x8765432100000000L); assertLongEquals(shl33(longArg), 0x0eca864200000000L); assertLongEquals(shl63(longArg), 0x8000000000000000L); + assertLongEquals(shl1(~longArg), 0xdb97530ef13579bcL); assertLongEquals(shl2(~longArg), 0xb72ea61de26af378L); assertLongEquals(shl31(~longArg), 0xbc4d5e6f00000000L); assertLongEquals(shl32(~longArg), 0x789abcde00000000L); @@ -497,22 +538,26 @@ public class Main { assertLongEquals(shl63(~longArg), 0x0000000000000000L); assertLongEquals(shr1(longArg), 0x091a2b3c43b2a190L); + assertLongEquals(shr2(longArg), 0x048d159e21d950c8L); assertLongEquals(shr31(longArg), 0x000000002468acf1L); assertLongEquals(shr32(longArg), 0x0000000012345678L); assertLongEquals(shr33(longArg), 0x00000000091a2b3cL); assertLongEquals(shr63(longArg), 0x0000000000000000L); assertLongEquals(shr1(~longArg), 0xf6e5d4c3bc4d5e6fL); + assertLongEquals(shr2(~longArg), 0xfb72ea61de26af37L); assertLongEquals(shr31(~longArg), 0xffffffffdb97530eL); assertLongEquals(shr32(~longArg), 0xffffffffedcba987L); assertLongEquals(shr33(~longArg), 0xfffffffff6e5d4c3L); assertLongEquals(shr63(~longArg), 0xffffffffffffffffL); assertLongEquals(ushr1(longArg), 0x091a2b3c43b2a190L); + assertLongEquals(ushr2(longArg), 0x048d159e21d950c8L); assertLongEquals(ushr31(longArg), 0x000000002468acf1L); assertLongEquals(ushr32(longArg), 0x0000000012345678L); assertLongEquals(ushr33(longArg), 0x00000000091a2b3cL); assertLongEquals(ushr63(longArg), 0x0000000000000000L); assertLongEquals(ushr1(~longArg), 0x76e5d4c3bc4d5e6fL); + assertLongEquals(ushr2(~longArg), 0x3b72ea61de26af37L); assertLongEquals(ushr31(~longArg), 0x00000001db97530eL); assertLongEquals(ushr32(~longArg), 0x00000000edcba987L); assertLongEquals(ushr33(~longArg), 0x0000000076e5d4c3L); diff --git a/test/543-env-long-ref/env_long_ref.cc b/test/543-env-long-ref/env_long_ref.cc new file mode 100644 index 0000000000..41083235d9 --- /dev/null +++ b/test/543-env-long-ref/env_long_ref.cc @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arch/context.h" +#include "art_method-inl.h" +#include "jni.h" +#include "scoped_thread_state_change.h" +#include "stack.h" +#include "thread.h" + +namespace art { + +namespace { + +class TestVisitor : public StackVisitor { + public: + TestVisitor(const ScopedObjectAccess& soa, Context* context, jobject expected_value) + SHARED_REQUIRES(Locks::mutator_lock_) + : StackVisitor(soa.Self(), context, StackVisitor::StackWalkKind::kIncludeInlinedFrames), + expected_value_(expected_value), + found_(false), + soa_(soa) {} + + bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) { + ArtMethod* m = GetMethod(); + std::string m_name(m->GetName()); + + if (m_name == "testCase") { + found_ = true; + uint32_t value = 0; + CHECK(GetVReg(m, 1, kReferenceVReg, &value)); + CHECK_EQ(reinterpret_cast<mirror::Object*>(value), + soa_.Decode<mirror::Object*>(expected_value_)); + } + return true; + } + + jobject expected_value_; + bool found_; + const ScopedObjectAccess& soa_; +}; + +} // namespace + +extern "C" JNIEXPORT void JNICALL Java_Main_lookForMyRegisters(JNIEnv*, jclass, jobject value) { + ScopedObjectAccess soa(Thread::Current()); + std::unique_ptr<Context> context(Context::Create()); + TestVisitor visitor(soa, context.get(), value); + visitor.WalkStack(); + CHECK(visitor.found_); +} + +} // namespace art diff --git a/test/543-env-long-ref/expected.txt b/test/543-env-long-ref/expected.txt new file mode 100644 index 0000000000..89f155b8c9 --- /dev/null +++ b/test/543-env-long-ref/expected.txt @@ -0,0 +1,2 @@ +JNI_OnLoad called +42 diff --git a/test/543-env-long-ref/info.txt b/test/543-env-long-ref/info.txt new file mode 100644 index 0000000000..6a4253364e --- /dev/null +++ b/test/543-env-long-ref/info.txt @@ -0,0 +1,3 @@ +Regression test for optimizing that used to not return +the right dex register in debuggable when a new value +was overwriting the high dex register of a wide value. diff --git a/test/543-env-long-ref/smali/TestCase.smali b/test/543-env-long-ref/smali/TestCase.smali new file mode 100644 index 0000000000..608d6eb96a --- /dev/null +++ b/test/543-env-long-ref/smali/TestCase.smali @@ -0,0 +1,26 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LTestCase; +.super Ljava/lang/Object; + +.method public static testCase()I + .registers 5 + const-wide/16 v0, 0x1 + invoke-static {v0, v1}, LMain;->$noinline$allocate(J)LMain; + move-result-object v1 + invoke-static {v1}, LMain;->lookForMyRegisters(LMain;)V + iget v2, v1, LMain;->field:I + return v2 +.end method diff --git a/test/543-env-long-ref/src/Main.java b/test/543-env-long-ref/src/Main.java new file mode 100644 index 0000000000..e723789ce2 --- /dev/null +++ b/test/543-env-long-ref/src/Main.java @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Method; + +public class Main { + // Workaround for b/18051191. + class InnerClass {} + + public static void main(String[] args) throws Throwable { + System.loadLibrary(args[0]); + Class<?> c = Class.forName("TestCase"); + Method m = c.getMethod("testCase"); + Integer a = (Integer)m.invoke(null, (Object[]) null); + System.out.println(a); + } + + public static Main $noinline$allocate(long a) { + try { + return new Main(); + } catch (Exception e) { + throw new Error(e); + } + } + + public static native void lookForMyRegisters(Main m); + + int field = 42; +} diff --git a/test/550-checker-multiply-accumulate/expected.txt b/test/550-checker-multiply-accumulate/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/550-checker-multiply-accumulate/expected.txt diff --git a/test/550-checker-multiply-accumulate/info.txt b/test/550-checker-multiply-accumulate/info.txt new file mode 100644 index 0000000000..10e998cb18 --- /dev/null +++ b/test/550-checker-multiply-accumulate/info.txt @@ -0,0 +1 @@ +Test the merging of instructions into the shifter operand on arm64. diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java new file mode 100644 index 0000000000..2d0688d57e --- /dev/null +++ b/test/550-checker-multiply-accumulate/src/Main.java @@ -0,0 +1,234 @@ +/* +* Copyright (C) 2015 The Android Open Source Project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +public class Main { + + // A dummy value to defeat inlining of these routines. + static boolean doThrow = false; + + public static void assertIntEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void assertLongEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + /** + * Test basic merging of `MUL+ADD` into `MULADD`. + */ + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (before) + /// CHECK: <<Acc:i\d+>> ParameterValue + /// CHECK: <<Left:i\d+>> ParameterValue + /// CHECK: <<Right:i\d+>> ParameterValue + /// CHECK: <<Mul:i\d+>> Mul [<<Left>>,<<Right>>] + /// CHECK: <<Add:i\d+>> Add [<<Acc>>,<<Mul>>] + /// CHECK: Return [<<Add>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (after) + /// CHECK: <<Acc:i\d+>> ParameterValue + /// CHECK: <<Left:i\d+>> ParameterValue + /// CHECK: <<Right:i\d+>> ParameterValue + /// CHECK: <<MulAdd:i\d+>> Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Add + /// CHECK: Return [<<MulAdd>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Mul + /// CHECK-NOT: Add + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulAdd(int, int, int) disassembly (after) + /// CHECK: madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}} + + public static int $opt$noinline$mulAdd(int acc, int left, int right) { + if (doThrow) throw new Error(); + return acc + left * right; + } + + /** + * Test basic merging of `MUL+SUB` into `MULSUB`. + */ + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (before) + /// CHECK: <<Acc:j\d+>> ParameterValue + /// CHECK: <<Left:j\d+>> ParameterValue + /// CHECK: <<Right:j\d+>> ParameterValue + /// CHECK: <<Mul:j\d+>> Mul [<<Left>>,<<Right>>] + /// CHECK: <<Sub:j\d+>> Sub [<<Acc>>,<<Mul>>] + /// CHECK: Return [<<Sub>>] + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (after) + /// CHECK: <<Acc:j\d+>> ParameterValue + /// CHECK: <<Left:j\d+>> ParameterValue + /// CHECK: <<Right:j\d+>> ParameterValue + /// CHECK: <<MulSub:j\d+>> Arm64MultiplyAccumulate [<<Acc>>,<<Left>>,<<Right>>] kind:Sub + /// CHECK: Return [<<MulSub>>] + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Mul + /// CHECK-NOT: Sub + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulSub(long, long, long) disassembly (after) + /// CHECK: msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}} + + public static long $opt$noinline$mulSub(long acc, long left, long right) { + if (doThrow) throw new Error(); + return acc - left * right; + } + + /** + * Test that we do not create a multiply-accumulate instruction when there + * are other uses of the multiplication that cannot merge it. + */ + + /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (before) + /// CHECK: <<Acc:i\d+>> ParameterValue + /// CHECK: <<Left:i\d+>> ParameterValue + /// CHECK: <<Right:i\d+>> ParameterValue + /// CHECK: <<Mul:i\d+>> Mul [<<Left>>,<<Right>>] + /// CHECK: <<Add:i\d+>> Add [<<Acc>>,<<Mul>>] + /// CHECK: <<Or:i\d+>> Or [<<Mul>>,<<Add>>] + /// CHECK: Return [<<Or>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (after) + /// CHECK: <<Acc:i\d+>> ParameterValue + /// CHECK: <<Left:i\d+>> ParameterValue + /// CHECK: <<Right:i\d+>> ParameterValue + /// CHECK: <<Mul:i\d+>> Mul [<<Left>>,<<Right>>] + /// CHECK: <<Add:i\d+>> Add [<<Acc>>,<<Mul>>] + /// CHECK: <<Or:i\d+>> Or [<<Mul>>,<<Add>>] + /// CHECK: Return [<<Or>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses1(int, int, int) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Arm64MultiplyAccumulate + + public static int $opt$noinline$multipleUses1(int acc, int left, int right) { + if (doThrow) throw new Error(); + int temp = left * right; + return temp | (acc + temp); + } + + /** + * Test that we do not create a multiply-accumulate instruction even when all + * uses of the multiplication can merge it. + */ + + /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (before) + /// CHECK: <<Acc:j\d+>> ParameterValue + /// CHECK: <<Left:j\d+>> ParameterValue + /// CHECK: <<Right:j\d+>> ParameterValue + /// CHECK: <<Mul:j\d+>> Mul [<<Left>>,<<Right>>] + /// CHECK: <<Add:j\d+>> Add [<<Acc>>,<<Mul>>] + /// CHECK: <<Sub:j\d+>> Sub [<<Acc>>,<<Mul>>] + /// CHECK: <<Res:j\d+>> Add [<<Add>>,<<Sub>>] + /// CHECK: Return [<<Res>>] + + /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (after) + /// CHECK: <<Acc:j\d+>> ParameterValue + /// CHECK: <<Left:j\d+>> ParameterValue + /// CHECK: <<Right:j\d+>> ParameterValue + /// CHECK: <<Mul:j\d+>> Mul [<<Left>>,<<Right>>] + /// CHECK: <<Add:j\d+>> Add [<<Acc>>,<<Mul>>] + /// CHECK: <<Sub:j\d+>> Sub [<<Acc>>,<<Mul>>] + /// CHECK: <<Res:j\d+>> Add [<<Add>>,<<Sub>>] + /// CHECK: Return [<<Res>>] + + /// CHECK-START-ARM64: long Main.$opt$noinline$multipleUses2(long, long, long) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Arm64MultiplyAccumulate + + + public static long $opt$noinline$multipleUses2(long acc, long left, long right) { + if (doThrow) throw new Error(); + long temp = left * right; + return (acc + temp) + (acc - temp); + } + + + /** + * Test the interpretation of `a * (b + 1)` as `a + (a * b)`. + */ + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (before) + /// CHECK: <<Acc:i\d+>> ParameterValue + /// CHECK: <<Var:i\d+>> ParameterValue + /// CHECK: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<Add:i\d+>> Add [<<Var>>,<<Const1>>] + /// CHECK: <<Mul:i\d+>> Mul [<<Acc>>,<<Add>>] + /// CHECK: Return [<<Mul>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after) + /// CHECK: <<Acc:i\d+>> ParameterValue + /// CHECK: <<Var:i\d+>> ParameterValue + /// CHECK: <<MulAdd:i\d+>> Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Add + /// CHECK: Return [<<MulAdd>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Mul + /// CHECK-NOT: Add + + /// CHECK-START-ARM64: int Main.$opt$noinline$mulPlusOne(int, int) disassembly (after) + /// CHECK: madd w{{\d+}}, w{{\d+}}, w{{\d+}}, w{{\d+}} + + public static int $opt$noinline$mulPlusOne(int acc, int var) { + if (doThrow) throw new Error(); + return acc * (var + 1); + } + + + /** + * Test the interpretation of `a * (1 - b)` as `a - (a * b)`. + */ + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (before) + /// CHECK: <<Acc:j\d+>> ParameterValue + /// CHECK: <<Var:j\d+>> ParameterValue + /// CHECK: <<Const1:j\d+>> LongConstant 1 + /// CHECK: <<Sub:j\d+>> Sub [<<Const1>>,<<Var>>] + /// CHECK: <<Mul:j\d+>> Mul [<<Acc>>,<<Sub>>] + /// CHECK: Return [<<Mul>>] + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after) + /// CHECK: <<Acc:j\d+>> ParameterValue + /// CHECK: <<Var:j\d+>> ParameterValue + /// CHECK: <<MulSub:j\d+>> Arm64MultiplyAccumulate [<<Acc>>,<<Acc>>,<<Var>>] kind:Sub + /// CHECK: Return [<<MulSub>>] + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Mul + /// CHECK-NOT: Sub + + /// CHECK-START-ARM64: long Main.$opt$noinline$mulMinusOne(long, long) disassembly (after) + /// CHECK: msub x{{\d+}}, x{{\d+}}, x{{\d+}}, x{{\d+}} + + public static long $opt$noinline$mulMinusOne(long acc, long var) { + if (doThrow) throw new Error(); + return acc * (1 - var); + } + + + public static void main(String[] args) { + assertIntEquals(7, $opt$noinline$mulAdd(1, 2, 3)); + assertLongEquals(-26, $opt$noinline$mulSub(4, 5, 6)); + assertIntEquals(79, $opt$noinline$multipleUses1(7, 8, 9)); + assertLongEquals(20, $opt$noinline$multipleUses2(10, 11, 12)); + assertIntEquals(195, $opt$noinline$mulPlusOne(13, 14)); + assertLongEquals(-225, $opt$noinline$mulMinusOne(15, 16)); + } +} diff --git a/test/550-checker-regression-wide-store/expected.txt b/test/550-checker-regression-wide-store/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/550-checker-regression-wide-store/expected.txt diff --git a/test/550-checker-regression-wide-store/info.txt b/test/550-checker-regression-wide-store/info.txt new file mode 100644 index 0000000000..6cf04bc35a --- /dev/null +++ b/test/550-checker-regression-wide-store/info.txt @@ -0,0 +1,3 @@ +Test an SsaBuilder regression where storing into the high vreg of a pair +would not invalidate the low vreg. The resulting environment would generate +an incorrect stack map, causing deopt and try/catch to use a wrong location.
\ No newline at end of file diff --git a/test/550-checker-regression-wide-store/smali/TestCase.smali b/test/550-checker-regression-wide-store/smali/TestCase.smali new file mode 100644 index 0000000000..7974d56a8f --- /dev/null +++ b/test/550-checker-regression-wide-store/smali/TestCase.smali @@ -0,0 +1,82 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LTestCase; +.super Ljava/lang/Object; + +.method public static $noinline$throw()V + .registers 1 + new-instance v0, Ljava/lang/Exception; + invoke-direct {v0}, Ljava/lang/Exception;-><init>()V + throw v0 +.end method + +# Test storing into the high vreg of a wide pair. This scenario has runtime +# behaviour implications so we run it from Main.main. + +## CHECK-START: int TestCase.invalidateLow(long) ssa_builder (after) +## CHECK-DAG: <<Cst0:i\d+>> IntConstant 0 +## CHECK-DAG: <<Arg:j\d+>> ParameterValue +## CHECK-DAG: <<Cast:i\d+>> TypeConversion [<<Arg>>] +## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[_,<<Cst0>>,<<Arg>>,_]] +## CHECK-DAG: InvokeStaticOrDirect method_name:TestCase.$noinline$throw env:[[_,<<Cast>>,<<Arg>>,_]] + +.method public static invalidateLow(J)I + .registers 4 + + const/4 v1, 0x0 + + :try_start + invoke-static {}, Ljava/lang/System;->nanoTime()J + move-wide v0, p0 + long-to-int v1, v0 + invoke-static {}, LTestCase;->$noinline$throw()V + :try_end + .catchall {:try_start .. :try_end} :catchall + + :catchall + return v1 + +.end method + +# Test that storing a wide invalidates the value in the high vreg. This +# cannot be detected from runtime so we only test the environment with Checker. + +## CHECK-START: void TestCase.invalidateHigh1(long) ssa_builder (after) +## CHECK-DAG: <<Arg:j\d+>> ParameterValue +## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[<<Arg>>,_,<<Arg>>,_]] + +.method public static invalidateHigh1(J)V + .registers 4 + + const/4 v1, 0x0 + move-wide v0, p0 + invoke-static {}, Ljava/lang/System;->nanoTime()J + return-void + +.end method + +## CHECK-START: void TestCase.invalidateHigh2(long) ssa_builder (after) +## CHECK-DAG: <<Arg:j\d+>> ParameterValue +## CHECK-DAG: InvokeStaticOrDirect method_name:java.lang.System.nanoTime env:[[<<Arg>>,_,_,<<Arg>>,_]] + +.method public static invalidateHigh2(J)V + .registers 5 + + move-wide v1, p0 + move-wide v0, p0 + invoke-static {}, Ljava/lang/System;->nanoTime()J + return-void + +.end method diff --git a/test/550-checker-regression-wide-store/src/Main.java b/test/550-checker-regression-wide-store/src/Main.java new file mode 100644 index 0000000000..9b502df632 --- /dev/null +++ b/test/550-checker-regression-wide-store/src/Main.java @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Method; + +public class Main { + + // Workaround for b/18051191. + class InnerClass {} + + private static int runTestCase(String name, long arg) throws Exception { + Class<?> c = Class.forName("TestCase"); + Method m = c.getMethod(name, long.class); + int result = (Integer) m.invoke(null, arg); + return result; + } + + private static void assertEquals(int expected, int actual) { + if (expected != actual) { + throw new Error("Wrong result: " + expected + " != " + actual); + } + } + + public static void main(String[] args) throws Exception { + assertEquals(42, runTestCase("invalidateLow", 42L)); + } +} diff --git a/test/550-new-instance-clinit/expected.txt b/test/550-new-instance-clinit/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/550-new-instance-clinit/expected.txt diff --git a/test/550-new-instance-clinit/info.txt b/test/550-new-instance-clinit/info.txt new file mode 100644 index 0000000000..c5fa3c7cc9 --- /dev/null +++ b/test/550-new-instance-clinit/info.txt @@ -0,0 +1,3 @@ +Regression test for optimizing which used to treat +HNewInstance as not having side effects even though it +could invoke a clinit method. diff --git a/test/550-new-instance-clinit/src/Main.java b/test/550-new-instance-clinit/src/Main.java new file mode 100644 index 0000000000..45e259ef2c --- /dev/null +++ b/test/550-new-instance-clinit/src/Main.java @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static void main(String[] args) { + int foo = Main.a; + new Bar(); + foo = Main.a; + if (foo != 43) { + throw new Error("Expected 43, got " + foo); + } + } + static int a = 42; +} + +class Bar { + static { + Main.a++; + } +} diff --git a/test/551-checker-clinit/expected.txt b/test/551-checker-clinit/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/551-checker-clinit/expected.txt diff --git a/test/551-checker-clinit/info.txt b/test/551-checker-clinit/info.txt new file mode 100644 index 0000000000..4d54bb5193 --- /dev/null +++ b/test/551-checker-clinit/info.txt @@ -0,0 +1 @@ +Checker test to ensure we optimize aways HClinitChecks as expected. diff --git a/test/551-checker-clinit/src/Main.java b/test/551-checker-clinit/src/Main.java new file mode 100644 index 0000000000..5ec304808b --- /dev/null +++ b/test/551-checker-clinit/src/Main.java @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + public static void main(String[] args) {} + public static int foo = 42; + + /// CHECK-START: void Main.inlinedMethod() builder (after) + /// CHECK: ClinitCheck + + /// CHECK-START: void Main.inlinedMethod() inliner (after) + /// CHECK: ClinitCheck + /// CHECK-NOT: ClinitCheck + /// CHECK-NOT: InvokeStaticOrDirect + public void inlinedMethod() { + SubSub.bar(); + } +} + +class Sub extends Main { + /// CHECK-START: void Sub.invokeSuperClass() builder (after) + /// CHECK-NOT: ClinitCheck + public void invokeSuperClass() { + int a = Main.foo; + } + + /// CHECK-START: void Sub.invokeItself() builder (after) + /// CHECK-NOT: ClinitCheck + public void invokeItself() { + int a = foo; + } + + /// CHECK-START: void Sub.invokeSubClass() builder (after) + /// CHECK: ClinitCheck + public void invokeSubClass() { + int a = SubSub.foo; + } + + public static int foo = 42; +} + +class SubSub { + public static void bar() { + int a = Main.foo; + } + public static int foo = 42; +} diff --git a/test/551-checker-shifter-operand/build b/test/551-checker-shifter-operand/build new file mode 100644 index 0000000000..18e8c59e91 --- /dev/null +++ b/test/551-checker-shifter-operand/build @@ -0,0 +1,212 @@ +#!/bin/bash +# +# Copyright (C) 2008 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# This is an almost exact copy of `art/test/etc/default-build`. Only the parsing +# of `dx` option has been overriden. + +# Stop if something fails. +set -e + +# Set default values for directories. +if [ -d smali ]; then + HAS_SMALI=true +else + HAS_SMALI=false +fi + +if [ -d src ]; then + HAS_SRC=true +else + HAS_SRC=false +fi + +if [ -d src2 ]; then + HAS_SRC2=true +else + HAS_SRC2=false +fi + +if [ -d src-multidex ]; then + HAS_SRC_MULTIDEX=true +else + HAS_SRC_MULTIDEX=false +fi + +if [ -d src-ex ]; then + HAS_SRC_EX=true +else + HAS_SRC_EX=false +fi + +DX_FLAGS="" +SKIP_DX_MERGER="false" +EXPERIMENTAL="" + +# Setup experimental flag mappings in a bash associative array. +declare -A JACK_EXPERIMENTAL_ARGS +JACK_EXPERIMENTAL_ARGS["default-methods"]="-D jack.java.source.version=1.8" +JACK_EXPERIMENTAL_ARGS["lambdas"]="-D jack.java.source.version=1.8" + +while true; do + if [ "x$1" = "x--dx-option" ]; then + shift + option="$1" + # Make sure we run this test *with* `dx` optimizations. + if [ "x$option" != "x--no-optimize" ]; then + DX_FLAGS="${DX_FLAGS} $option" + fi + shift + elif [ "x$1" = "x--jvm" ]; then + shift + elif [ "x$1" = "x--no-src" ]; then + HAS_SRC=false + shift + elif [ "x$1" = "x--no-src2" ]; then + HAS_SRC2=false + shift + elif [ "x$1" = "x--no-src-multidex" ]; then + HAS_SRC_MULTIDEX=false + shift + elif [ "x$1" = "x--no-src-ex" ]; then + HAS_SRC_EX=false + shift + elif [ "x$1" = "x--no-smali" ]; then + HAS_SMALI=false + shift + elif [ "x$1" = "x--experimental" ]; then + shift + EXPERIMENTAL="${EXPERIMENTAL} $1" + shift + elif expr "x$1" : "x--" >/dev/null 2>&1; then + echo "unknown $0 option: $1" 1>&2 + exit 1 + else + break + fi +done + +# Add args from the experimental mappings. +for experiment in ${EXPERIMENTAL}; do + JACK_ARGS="${JACK_ARGS} ${JACK_EXPERIMENTAL_ARGS[${experiment}]}" +done + +if [ -e classes.dex ]; then + zip $TEST_NAME.jar classes.dex + exit 0 +fi + +if ! [ "${HAS_SRC}" = "true" ] && ! [ "${HAS_SRC2}" = "true" ]; then + # No src directory? Then forget about trying to run dx. + SKIP_DX_MERGER="true" +fi + +if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then + # Jack does not support this configuration unless we specify how to partition the DEX file + # with a .jpp file. + USE_JACK="false" +fi + +if [ ${USE_JACK} = "true" ]; then + # Jack toolchain + if [ "${HAS_SRC}" = "true" ]; then + ${JACK} ${JACK_ARGS} --output-jack src.jack src + imported_jack_files="--import src.jack" + fi + + if [ "${HAS_SRC2}" = "true" ]; then + ${JACK} ${JACK_ARGS} --output-jack src2.jack src2 + imported_jack_files="--import src2.jack ${imported_jack_files}" + fi + + # Compile jack files into a DEX file. We set jack.import.type.policy=keep-first to consider + # class definitions from src2 first. + if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then + ${JACK} ${JACK_ARGS} ${imported_jack_files} -D jack.import.type.policy=keep-first --output-dex . + fi +else + # Legacy toolchain with javac+dx + if [ "${HAS_SRC}" = "true" ]; then + mkdir classes + ${JAVAC} ${JAVAC_ARGS} -implicit:none -classpath src-multidex -d classes `find src -name '*.java'` + fi + + if [ "${HAS_SRC_MULTIDEX}" = "true" ]; then + mkdir classes2 + ${JAVAC} -implicit:none -classpath src -d classes2 `find src-multidex -name '*.java'` + if [ ${NEED_DEX} = "true" ]; then + ${DX} -JXmx256m --debug --dex --dump-to=classes2.lst --output=classes2.dex \ + --dump-width=1000 ${DX_FLAGS} classes2 + fi + fi + + if [ "${HAS_SRC2}" = "true" ]; then + mkdir -p classes + ${JAVAC} ${JAVAC_ARGS} -d classes `find src2 -name '*.java'` + fi + + if [ "${HAS_SRC}" = "true" ] || [ "${HAS_SRC2}" = "true" ]; then + if [ ${NEED_DEX} = "true" -a ${SKIP_DX_MERGER} = "false" ]; then + ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex \ + --dump-width=1000 ${DX_FLAGS} classes + fi + fi +fi + +if [ "${HAS_SMALI}" = "true" ]; then + # Compile Smali classes + ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'` + + # Don't bother with dexmerger if we provide our own main function in a smali file. + if [ ${SKIP_DX_MERGER} = "false" ]; then + ${DXMERGER} classes.dex classes.dex smali_classes.dex + else + mv smali_classes.dex classes.dex + fi +fi + +if [ ${HAS_SRC_EX} = "true" ]; then + if [ ${USE_JACK} = "true" ]; then + # Rename previous "classes.dex" so it is not overwritten. + mv classes.dex classes-1.dex + #TODO find another way to append src.jack to the jack classpath + ${JACK}:src.jack ${JACK_ARGS} --output-dex . src-ex + zip $TEST_NAME-ex.jar classes.dex + # Restore previous "classes.dex" so it can be zipped. + mv classes-1.dex classes.dex + else + mkdir classes-ex + ${JAVAC} ${JAVAC_ARGS} -d classes-ex -cp classes `find src-ex -name '*.java'` + if [ ${NEED_DEX} = "true" ]; then + ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes-ex.dex \ + --dump-width=1000 ${DX_FLAGS} classes-ex + + # quick shuffle so that the stored name is "classes.dex" + mv classes.dex classes-1.dex + mv classes-ex.dex classes.dex + zip $TEST_NAME-ex.jar classes.dex + mv classes.dex classes-ex.dex + mv classes-1.dex classes.dex + fi + fi +fi + +# Create a single jar with two dex files for multidex. +if [ ${HAS_SRC_MULTIDEX} = "true" ]; then + zip $TEST_NAME.jar classes.dex classes2.dex +elif [ ${NEED_DEX} = "true" ]; then + zip $TEST_NAME.jar classes.dex +fi diff --git a/test/551-checker-shifter-operand/expected.txt b/test/551-checker-shifter-operand/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/551-checker-shifter-operand/expected.txt diff --git a/test/551-checker-shifter-operand/info.txt b/test/551-checker-shifter-operand/info.txt new file mode 100644 index 0000000000..10e998cb18 --- /dev/null +++ b/test/551-checker-shifter-operand/info.txt @@ -0,0 +1 @@ +Test the merging of instructions into the shifter operand on arm64. diff --git a/test/551-checker-shifter-operand/src/Main.java b/test/551-checker-shifter-operand/src/Main.java new file mode 100644 index 0000000000..decdd1f324 --- /dev/null +++ b/test/551-checker-shifter-operand/src/Main.java @@ -0,0 +1,678 @@ +/* +* Copyright (C) 2015 The Android Open Source Project +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +public class Main { + + // A dummy value to defeat inlining of these routines. + static boolean doThrow = false; + + public static void assertByteEquals(byte expected, byte result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void assertCharEquals(char expected, char result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void assertShortEquals(short expected, short result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void assertIntEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void assertLongEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + // Non-inlinable type-casting helpers. + static char $noinline$byteToChar (byte v) { if (doThrow) throw new Error(); return (char)v; } + static short $noinline$byteToShort (byte v) { if (doThrow) throw new Error(); return (short)v; } + static int $noinline$byteToInt (byte v) { if (doThrow) throw new Error(); return (int)v; } + static long $noinline$byteToLong (byte v) { if (doThrow) throw new Error(); return (long)v; } + static byte $noinline$charToByte (char v) { if (doThrow) throw new Error(); return (byte)v; } + static short $noinline$charToShort (char v) { if (doThrow) throw new Error(); return (short)v; } + static int $noinline$charToInt (char v) { if (doThrow) throw new Error(); return (int)v; } + static long $noinline$charToLong (char v) { if (doThrow) throw new Error(); return (long)v; } + static byte $noinline$shortToByte (short v) { if (doThrow) throw new Error(); return (byte)v; } + static char $noinline$shortToChar (short v) { if (doThrow) throw new Error(); return (char)v; } + static int $noinline$shortToInt (short v) { if (doThrow) throw new Error(); return (int)v; } + static long $noinline$shortToLong (short v) { if (doThrow) throw new Error(); return (long)v; } + static byte $noinline$intToByte (int v) { if (doThrow) throw new Error(); return (byte)v; } + static char $noinline$intToChar (int v) { if (doThrow) throw new Error(); return (char)v; } + static short $noinline$intToShort (int v) { if (doThrow) throw new Error(); return (short)v; } + static long $noinline$intToLong (int v) { if (doThrow) throw new Error(); return (long)v; } + static byte $noinline$longToByte (long v) { if (doThrow) throw new Error(); return (byte)v; } + static char $noinline$longToChar (long v) { if (doThrow) throw new Error(); return (char)v; } + static short $noinline$longToShort (long v) { if (doThrow) throw new Error(); return (short)v; } + static int $noinline$longToInt (long v) { if (doThrow) throw new Error(); return (int)v; } + + /** + * Basic test merging a bitfield move operation (here a type conversion) into + * the shifter operand. + */ + + /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (before) + /// CHECK-DAG: <<l:j\d+>> ParameterValue + /// CHECK-DAG: <<b:b\d+>> ParameterValue + /// CHECK: <<tmp:j\d+>> TypeConversion [<<b>>] + /// CHECK: Sub [<<l>>,<<tmp>>] + + /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (after) + /// CHECK-DAG: <<l:j\d+>> ParameterValue + /// CHECK-DAG: <<b:b\d+>> ParameterValue + /// CHECK: Arm64DataProcWithShifterOp [<<l>>,<<b>>] kind:Sub+SXTB + + /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) instruction_simplifier_arm64 (after) + /// CHECK-NOT: TypeConversion + /// CHECK-NOT: Sub + + /// CHECK-START-ARM64: long Main.$opt$noinline$translate(long, byte) disassembly (after) + /// CHECK: sub x{{\d+}}, x{{\d+}}, w{{\d+}}, sxtb + + public static long $opt$noinline$translate(long l, byte b) { + if (doThrow) throw new Error(); + long tmp = (long)b; + return l - tmp; + } + + + /** + * Test that we do not merge into the shifter operand when the left and right + * inputs are the the IR. + */ + + /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (before) + /// CHECK: <<a:i\d+>> ParameterValue + /// CHECK: <<Const2:i\d+>> IntConstant 2 + /// CHECK: <<tmp:i\d+>> Shl [<<a>>,<<Const2>>] + /// CHECK: Add [<<tmp>>,<<tmp>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (after) + /// CHECK-DAG: <<a:i\d+>> ParameterValue + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 + /// CHECK: <<Shl:i\d+>> Shl [<<a>>,<<Const2>>] + /// CHECK: Add [<<Shl>>,<<Shl>>] + + /// CHECK-START-ARM64: int Main.$opt$noinline$sameInput(int) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Arm64DataProcWithShifterOp + + public static int $opt$noinline$sameInput(int a) { + if (doThrow) throw new Error(); + int tmp = a << 2; + return tmp + tmp; + } + + /** + * Check that we perform the merge for multiple uses. + */ + + /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (before) + /// CHECK: <<arg:i\d+>> ParameterValue + /// CHECK: <<Const23:i\d+>> IntConstant 23 + /// CHECK: <<tmp:i\d+>> Shl [<<arg>>,<<Const23>>] + /// CHECK: Add [<<tmp>>,{{i\d+}}] + /// CHECK: Add [<<tmp>>,{{i\d+}}] + /// CHECK: Add [<<tmp>>,{{i\d+}}] + /// CHECK: Add [<<tmp>>,{{i\d+}}] + /// CHECK: Add [<<tmp>>,{{i\d+}}] + + /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (after) + /// CHECK: <<arg:i\d+>> ParameterValue + /// CHECK: Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23 + /// CHECK: Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23 + /// CHECK: Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23 + /// CHECK: Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23 + /// CHECK: Arm64DataProcWithShifterOp [{{i\d+}},<<arg>>] kind:Add+LSL shift:23 + + /// CHECK-START-ARM64: int Main.$opt$noinline$multipleUses(int) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Shl + /// CHECK-NOT: Add + + public static int $opt$noinline$multipleUses(int arg) { + if (doThrow) throw new Error(); + int tmp = arg << 23; + switch (arg) { + case 1: return (arg | 1) + tmp; + case 2: return (arg | 2) + tmp; + case 3: return (arg | 3) + tmp; + case 4: return (arg | 4) + tmp; + case (1 << 20): return (arg | 5) + tmp; + default: return 0; + } + } + + /** + * Logical instructions cannot take 'extend' operations into the shift + * operand, so test that only the shifts are merged. + */ + + /// CHECK-START-ARM64: void Main.$opt$noinline$testAnd(long, long) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK-NOT: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$noinline$testAnd(long, long) disassembly (after) + /// CHECK: and lsl + /// CHECK: sxtb + /// CHECK: and + + static void $opt$noinline$testAnd(long a, long b) { + if (doThrow) throw new Error(); + assertLongEquals((a & $noinline$LongShl(b, 5)) | (a & $noinline$longToByte(b)), + (a & (b << 5)) | (a & (byte)b)); + } + + /// CHECK-START-ARM64: void Main.$opt$noinline$testOr(int, int) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK-NOT: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$noinline$testOr(int, int) disassembly (after) + /// CHECK: orr asr + /// CHECK: uxth + /// CHECK: orr + + static void $opt$noinline$testOr(int a, int b) { + if (doThrow) throw new Error(); + assertIntEquals((a | $noinline$IntShr(b, 6)) | (a | $noinline$intToChar(b)), + (a | (b >> 6)) | (a | (char)b)); + } + + /// CHECK-START-ARM64: void Main.$opt$noinline$testXor(long, long) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK-NOT: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$noinline$testXor(long, long) disassembly (after) + /// CHECK: eor lsr + /// CHECK: sxtw + /// CHECK: eor + + static void $opt$noinline$testXor(long a, long b) { + if (doThrow) throw new Error(); + assertLongEquals((a ^ $noinline$LongUshr(b, 7)) | (a ^ $noinline$longToInt(b)), + (a ^ (b >>> 7)) | (a ^ (int)b)); + } + + /// CHECK-START-ARM64: void Main.$opt$noinline$testNeg(int) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK-NOT: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$noinline$testNeg(int) disassembly (after) + /// CHECK: neg lsl + /// CHECK: sxth + /// CHECK: neg + + static void $opt$noinline$testNeg(int a) { + if (doThrow) throw new Error(); + assertIntEquals(-$noinline$IntShl(a, 8) | -$noinline$intToShort(a), + (-(a << 8)) | (-(short)a)); + } + + /** + * The functions below are used to compare the result of optimized operations + * to non-optimized operations. + * On the left-hand side we use a non-inlined function call to ensure the + * optimization does not occur. The checker tests ensure that the optimization + * does occur on the right-hand. + */ + + /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt1(int, byte) instruction_simplifier_arm64 (after) + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendByteInt1(int a, byte b) { + assertIntEquals(a + $noinline$byteToChar (b), a + (char)b); + assertIntEquals(a + $noinline$byteToShort(b), a + (short)b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendByteInt2(int, byte) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Arm64DataProcWithShifterOp + /// CHECK-NOT: Arm64DataProcWithShifterOp + + public static void $opt$validateExtendByteInt2(int a, byte b) { + // The conversion to `int` has been optimized away, so there is nothing to merge. + assertIntEquals (a + $noinline$byteToInt (b), a + (int)b); + // There is an environment use for `(long)b`, preventing the merge. + assertLongEquals(a + $noinline$byteToLong(b), a + (long)b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendByteLong(long, byte) instruction_simplifier_arm64 (after) + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendByteLong(long a, byte b) { + // The first two tests have a type conversion. + assertLongEquals(a + $noinline$byteToChar (b), a + (char)b); + assertLongEquals(a + $noinline$byteToShort(b), a + (short)b); + // This test does not because the conversion to `int` is optimized away. + assertLongEquals(a + $noinline$byteToInt (b), a + (int)b); + } + + public static void $opt$validateExtendByte(long a, byte b) { + $opt$validateExtendByteInt1((int)a, b); + $opt$validateExtendByteInt2((int)a, b); + $opt$validateExtendByteLong(a, b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt1(int, char) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt1(int, char) instruction_simplifier_arm64 (after) + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendCharInt1(int a, char b) { + assertIntEquals(a + $noinline$charToByte (b), a + (byte)b); + assertIntEquals(a + $noinline$charToShort(b), a + (short)b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendCharInt2(int, char) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Arm64DataProcWithShifterOp + /// CHECK-NOT: Arm64DataProcWithShifterOp + + public static void $opt$validateExtendCharInt2(int a, char b) { + // The conversion to `int` has been optimized away, so there is nothing to merge. + assertIntEquals (a + $noinline$charToInt (b), a + (int)b); + // There is an environment use for `(long)b`, preventing the merge. + assertLongEquals(a + $noinline$charToLong(b), a + (long)b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendCharLong(long, char) instruction_simplifier_arm64 (after) + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendCharLong(long a, char b) { + // The first two tests have a type conversion. + assertLongEquals(a + $noinline$charToByte (b), a + (byte)b); + assertLongEquals(a + $noinline$charToShort(b), a + (short)b); + // This test does not because the conversion to `int` is optimized away. + assertLongEquals(a + $noinline$charToInt (b), a + (int)b); + } + + public static void $opt$validateExtendChar(long a, char b) { + $opt$validateExtendCharInt1((int)a, b); + $opt$validateExtendCharInt2((int)a, b); + $opt$validateExtendCharLong(a, b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt1(int, short) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt1(int, short) instruction_simplifier_arm64 (after) + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendShortInt1(int a, short b) { + assertIntEquals(a + $noinline$shortToByte (b), a + (byte)b); + assertIntEquals(a + $noinline$shortToChar (b), a + (char)b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendShortInt2(int, short) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Arm64DataProcWithShifterOp + /// CHECK-NOT: Arm64DataProcWithShifterOp + + public static void $opt$validateExtendShortInt2(int a, short b) { + // The conversion to `int` has been optimized away, so there is nothing to merge. + assertIntEquals (a + $noinline$shortToInt (b), a + (int)b); + // There is an environment use for `(long)b`, preventing the merge. + assertLongEquals(a + $noinline$shortToLong (b), a + (long)b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendShortLong(long, short) instruction_simplifier_arm64 (after) + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendShortLong(long a, short b) { + // The first two tests have a type conversion. + assertLongEquals(a + $noinline$shortToByte(b), a + (byte)b); + assertLongEquals(a + $noinline$shortToChar(b), a + (char)b); + // This test does not because the conversion to `int` is optimized away. + assertLongEquals(a + $noinline$shortToInt (b), a + (int)b); + } + + public static void $opt$validateExtendShort(long a, short b) { + $opt$validateExtendShortInt1((int)a, b); + $opt$validateExtendShortInt2((int)a, b); + $opt$validateExtendShortLong(a, b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendInt(long, int) instruction_simplifier_arm64 (after) + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendInt(long a, int b) { + // All tests have a conversion to `long`. The first three tests also have a + // conversion from `int` to the specified type. For each test the conversion + // to `long` is merged into the shifter operand. + assertLongEquals(a + $noinline$intToByte (b), a + (byte)b); + assertLongEquals(a + $noinline$intToChar (b), a + (char)b); + assertLongEquals(a + $noinline$intToShort(b), a + (short)b); + assertLongEquals(a + $noinline$intToLong (b), a + (long)b); + } + + /// CHECK-START-ARM64: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateExtendLong(long, long) instruction_simplifier_arm64 (after) + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK: TypeConversion + /// CHECK-NOT: TypeConversion + + public static void $opt$validateExtendLong(long a, long b) { + // Each test has two conversions, from `long` and then back to `long`. The + // conversions to `long` are merged. + assertLongEquals(a + $noinline$longToByte (b), a + (byte)b); + assertLongEquals(a + $noinline$longToChar (b), a + (char)b); + assertLongEquals(a + $noinline$longToShort(b), a + (short)b); + assertLongEquals(a + $noinline$longToInt (b), a + (int)b); + } + + + static int $noinline$IntShl(int b, int c) { + if (doThrow) throw new Error(); + return b << c; + } + static int $noinline$IntShr(int b, int c) { + if (doThrow) throw new Error(); + return b >> c; + } + static int $noinline$IntUshr(int b, int c) { + if (doThrow) throw new Error(); + return b >>> c; + } + + + // Each test line below should see one merge. + /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Shl + /// CHECK-NOT: Shr + /// CHECK-NOT: UShr + + public static void $opt$validateShiftInt(int a, int b) { + assertIntEquals(a + $noinline$IntShl(b, 1), a + (b << 1)); + assertIntEquals(a + $noinline$IntShl(b, 6), a + (b << 6)); + assertIntEquals(a + $noinline$IntShl(b, 7), a + (b << 7)); + assertIntEquals(a + $noinline$IntShl(b, 8), a + (b << 8)); + assertIntEquals(a + $noinline$IntShl(b, 14), a + (b << 14)); + assertIntEquals(a + $noinline$IntShl(b, 15), a + (b << 15)); + assertIntEquals(a + $noinline$IntShl(b, 16), a + (b << 16)); + assertIntEquals(a + $noinline$IntShl(b, 30), a + (b << 30)); + assertIntEquals(a + $noinline$IntShl(b, 31), a + (b << 31)); + assertIntEquals(a + $noinline$IntShl(b, 32), a + (b << 32)); + assertIntEquals(a + $noinline$IntShl(b, 62), a + (b << 62)); + assertIntEquals(a + $noinline$IntShl(b, 63), a + (b << 63)); + + assertIntEquals(a - $noinline$IntShr(b, 1), a - (b >> 1)); + assertIntEquals(a - $noinline$IntShr(b, 6), a - (b >> 6)); + assertIntEquals(a - $noinline$IntShr(b, 7), a - (b >> 7)); + assertIntEquals(a - $noinline$IntShr(b, 8), a - (b >> 8)); + assertIntEquals(a - $noinline$IntShr(b, 14), a - (b >> 14)); + assertIntEquals(a - $noinline$IntShr(b, 15), a - (b >> 15)); + assertIntEquals(a - $noinline$IntShr(b, 16), a - (b >> 16)); + assertIntEquals(a - $noinline$IntShr(b, 30), a - (b >> 30)); + assertIntEquals(a - $noinline$IntShr(b, 31), a - (b >> 31)); + assertIntEquals(a - $noinline$IntShr(b, 32), a - (b >> 32)); + assertIntEquals(a - $noinline$IntShr(b, 62), a - (b >> 62)); + assertIntEquals(a - $noinline$IntShr(b, 63), a - (b >> 63)); + + assertIntEquals(a ^ $noinline$IntUshr(b, 1), a ^ (b >>> 1)); + assertIntEquals(a ^ $noinline$IntUshr(b, 6), a ^ (b >>> 6)); + assertIntEquals(a ^ $noinline$IntUshr(b, 7), a ^ (b >>> 7)); + assertIntEquals(a ^ $noinline$IntUshr(b, 8), a ^ (b >>> 8)); + assertIntEquals(a ^ $noinline$IntUshr(b, 14), a ^ (b >>> 14)); + assertIntEquals(a ^ $noinline$IntUshr(b, 15), a ^ (b >>> 15)); + assertIntEquals(a ^ $noinline$IntUshr(b, 16), a ^ (b >>> 16)); + assertIntEquals(a ^ $noinline$IntUshr(b, 30), a ^ (b >>> 30)); + assertIntEquals(a ^ $noinline$IntUshr(b, 31), a ^ (b >>> 31)); + assertIntEquals(a ^ $noinline$IntUshr(b, 32), a ^ (b >>> 32)); + assertIntEquals(a ^ $noinline$IntUshr(b, 62), a ^ (b >>> 62)); + assertIntEquals(a ^ $noinline$IntUshr(b, 63), a ^ (b >>> 63)); + } + + + static long $noinline$LongShl(long b, long c) { + if (doThrow) throw new Error(); + return b << c; + } + static long $noinline$LongShr(long b, long c) { + if (doThrow) throw new Error(); + return b >> c; + } + static long $noinline$LongUshr(long b, long c) { + if (doThrow) throw new Error(); + return b >>> c; + } + + // Each test line below should see one merge. + /// CHECK-START-ARM64: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm64 (after) + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + /// CHECK: Arm64DataProcWithShifterOp + + /// CHECK-START-ARM64: void Main.$opt$validateShiftLong(long, long) instruction_simplifier_arm64 (after) + /// CHECK-NOT: Shl + /// CHECK-NOT: Shr + /// CHECK-NOT: UShr + + public static void $opt$validateShiftLong(long a, long b) { + assertLongEquals(a + $noinline$LongShl(b, 1), a + (b << 1)); + assertLongEquals(a + $noinline$LongShl(b, 6), a + (b << 6)); + assertLongEquals(a + $noinline$LongShl(b, 7), a + (b << 7)); + assertLongEquals(a + $noinline$LongShl(b, 8), a + (b << 8)); + assertLongEquals(a + $noinline$LongShl(b, 14), a + (b << 14)); + assertLongEquals(a + $noinline$LongShl(b, 15), a + (b << 15)); + assertLongEquals(a + $noinline$LongShl(b, 16), a + (b << 16)); + assertLongEquals(a + $noinline$LongShl(b, 30), a + (b << 30)); + assertLongEquals(a + $noinline$LongShl(b, 31), a + (b << 31)); + assertLongEquals(a + $noinline$LongShl(b, 32), a + (b << 32)); + assertLongEquals(a + $noinline$LongShl(b, 62), a + (b << 62)); + assertLongEquals(a + $noinline$LongShl(b, 63), a + (b << 63)); + + assertLongEquals(a - $noinline$LongShr(b, 1), a - (b >> 1)); + assertLongEquals(a - $noinline$LongShr(b, 6), a - (b >> 6)); + assertLongEquals(a - $noinline$LongShr(b, 7), a - (b >> 7)); + assertLongEquals(a - $noinline$LongShr(b, 8), a - (b >> 8)); + assertLongEquals(a - $noinline$LongShr(b, 14), a - (b >> 14)); + assertLongEquals(a - $noinline$LongShr(b, 15), a - (b >> 15)); + assertLongEquals(a - $noinline$LongShr(b, 16), a - (b >> 16)); + assertLongEquals(a - $noinline$LongShr(b, 30), a - (b >> 30)); + assertLongEquals(a - $noinline$LongShr(b, 31), a - (b >> 31)); + assertLongEquals(a - $noinline$LongShr(b, 32), a - (b >> 32)); + assertLongEquals(a - $noinline$LongShr(b, 62), a - (b >> 62)); + assertLongEquals(a - $noinline$LongShr(b, 63), a - (b >> 63)); + + assertLongEquals(a ^ $noinline$LongUshr(b, 1), a ^ (b >>> 1)); + assertLongEquals(a ^ $noinline$LongUshr(b, 6), a ^ (b >>> 6)); + assertLongEquals(a ^ $noinline$LongUshr(b, 7), a ^ (b >>> 7)); + assertLongEquals(a ^ $noinline$LongUshr(b, 8), a ^ (b >>> 8)); + assertLongEquals(a ^ $noinline$LongUshr(b, 14), a ^ (b >>> 14)); + assertLongEquals(a ^ $noinline$LongUshr(b, 15), a ^ (b >>> 15)); + assertLongEquals(a ^ $noinline$LongUshr(b, 16), a ^ (b >>> 16)); + assertLongEquals(a ^ $noinline$LongUshr(b, 30), a ^ (b >>> 30)); + assertLongEquals(a ^ $noinline$LongUshr(b, 31), a ^ (b >>> 31)); + assertLongEquals(a ^ $noinline$LongUshr(b, 32), a ^ (b >>> 32)); + assertLongEquals(a ^ $noinline$LongUshr(b, 62), a ^ (b >>> 62)); + assertLongEquals(a ^ $noinline$LongUshr(b, 63), a ^ (b >>> 63)); + } + + + public static void main(String[] args) { + assertLongEquals(10000L - 3L, $opt$noinline$translate(10000L, (byte)3)); + assertLongEquals(-10000L - -3L, $opt$noinline$translate(-10000L, (byte)-3)); + + assertIntEquals(4096, $opt$noinline$sameInput(512)); + assertIntEquals(-8192, $opt$noinline$sameInput(-1024)); + + assertIntEquals(((1 << 23) | 1), $opt$noinline$multipleUses(1)); + assertIntEquals(((1 << 20) | 5), $opt$noinline$multipleUses(1 << 20)); + + long inputs[] = { + -((1L << 7) - 1L), -((1L << 7)), -((1L << 7) + 1L), + -((1L << 15) - 1L), -((1L << 15)), -((1L << 15) + 1L), + -((1L << 16) - 1L), -((1L << 16)), -((1L << 16) + 1L), + -((1L << 31) - 1L), -((1L << 31)), -((1L << 31) + 1L), + -((1L << 32) - 1L), -((1L << 32)), -((1L << 32) + 1L), + -((1L << 63) - 1L), -((1L << 63)), -((1L << 63) + 1L), + -42L, -314L, -2718281828L, -0x123456789L, -0x987654321L, + -1L, -20L, -300L, -4000L, -50000L, -600000L, -7000000L, -80000000L, + 0L, + 1L, 20L, 300L, 4000L, 50000L, 600000L, 7000000L, 80000000L, + 42L, 314L, 2718281828L, 0x123456789L, 0x987654321L, + (1L << 7) - 1L, (1L << 7), (1L << 7) + 1L, + (1L << 8) - 1L, (1L << 8), (1L << 8) + 1L, + (1L << 15) - 1L, (1L << 15), (1L << 15) + 1L, + (1L << 16) - 1L, (1L << 16), (1L << 16) + 1L, + (1L << 31) - 1L, (1L << 31), (1L << 31) + 1L, + (1L << 32) - 1L, (1L << 32), (1L << 32) + 1L, + (1L << 63) - 1L, (1L << 63), (1L << 63) + 1L, + Long.MIN_VALUE, Long.MAX_VALUE + }; + for (int i = 0; i < inputs.length; i++) { + $opt$noinline$testNeg((int)inputs[i]); + for (int j = 0; j < inputs.length; j++) { + $opt$noinline$testAnd(inputs[i], inputs[j]); + $opt$noinline$testOr((int)inputs[i], (int)inputs[j]); + $opt$noinline$testXor(inputs[i], inputs[j]); + + $opt$validateExtendByte(inputs[i], (byte)inputs[j]); + $opt$validateExtendChar(inputs[i], (char)inputs[j]); + $opt$validateExtendShort(inputs[i], (short)inputs[j]); + $opt$validateExtendInt(inputs[i], (int)inputs[j]); + $opt$validateExtendLong(inputs[i], inputs[j]); + + $opt$validateShiftInt((int)inputs[i], (int)inputs[j]); + $opt$validateShiftLong(inputs[i], inputs[j]); + } + } + + } +} diff --git a/test/551-implicit-null-checks/expected.txt b/test/551-implicit-null-checks/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/551-implicit-null-checks/expected.txt diff --git a/test/551-implicit-null-checks/info.txt b/test/551-implicit-null-checks/info.txt new file mode 100644 index 0000000000..bdd066bec3 --- /dev/null +++ b/test/551-implicit-null-checks/info.txt @@ -0,0 +1 @@ +Test that implicit null checks are recorded correctly for longs.
\ No newline at end of file diff --git a/test/551-implicit-null-checks/src/Main.java b/test/551-implicit-null-checks/src/Main.java new file mode 100644 index 0000000000..677e8d34ca --- /dev/null +++ b/test/551-implicit-null-checks/src/Main.java @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + + private class Inner { + private long i1; + } + private Inner inst; + + public static void main(String args[]) throws Exception { + Main m = new Main(); + try { + m.$opt$noinline$testGetLong(); + } catch (NullPointerException ex) { + // good + } + try { + m.$opt$noinline$testPutLong(778899112233L); + } catch (NullPointerException ex) { + // good + } + } + + public void $opt$noinline$testGetLong() throws Exception { + long result = inst.i1; + throw new Exception(); // prevent inline + } + + public void $opt$noinline$testPutLong(long a) throws Exception { + inst.i1 = a; + throw new Exception(); // prevent inline + } +} diff --git a/test/960-default-smali/build b/test/960-default-smali/build index 4dc848cfa6..b72afcdf18 100755 --- a/test/960-default-smali/build +++ b/test/960-default-smali/build @@ -22,7 +22,7 @@ ${ANDROID_BUILD_TOP}/art/test/utils/python/generate_smali_main.py ./smali # Should we compile with Java source code. By default we will use Smali. USES_JAVA_SOURCE="false" -if [[ $ARGS == *"--jvm"* ]]; then +if [[ $@ == *"--jvm"* ]]; then USES_JAVA_SOURCE="true" elif [[ "$USE_JACK" == "true" ]]; then if $JACK -D jack.java.source.version=1.8 >& /dev/null; then diff --git a/test/961-default-iface-resolution-generated/build b/test/961-default-iface-resolution-generated/build index b4ced3e82e..005f76c2dc 100755 --- a/test/961-default-iface-resolution-generated/build +++ b/test/961-default-iface-resolution-generated/build @@ -33,7 +33,7 @@ mkdir -p ./smali # Should we compile with Java source code. By default we will use Smali. USES_JAVA_SOURCE="false" -if [[ $ARGS == *"--jvm"* ]]; then +if [[ $@ == *"--jvm"* ]]; then USES_JAVA_SOURCE="true" elif [[ $USE_JACK == "true" ]]; then if "$JACK" -D jack.java.source.version=1.8 >& /dev/null; then diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk index 7a22e1b74a..f74a516486 100644 --- a/test/Android.libarttest.mk +++ b/test/Android.libarttest.mk @@ -37,7 +37,8 @@ LIBARTTEST_COMMON_SRC_FILES := \ 457-regs/regs_jni.cc \ 461-get-reference-vreg/get_reference_vreg_jni.cc \ 466-get-live-vreg/get_live_vreg_jni.cc \ - 497-inlining-and-class-loader/clear_dex_cache.cc + 497-inlining-and-class-loader/clear_dex_cache.cc \ + 543-env-long-ref/env_long_ref.cc ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so diff --git a/test/run-test b/test/run-test index 10ec3103b9..6e13b8a976 100755 --- a/test/run-test +++ b/test/run-test @@ -669,9 +669,9 @@ export TEST_NAME=`basename ${test_dir}` # ------------------------------- # Return whether the Optimizing compiler has read barrier support for ARCH. function arch_supports_read_barrier() { - # Optimizing has read barrier support for x86 and x86-64 at the + # Optimizing has read barrier support for ARM, ARM64, x86 and x86-64 at the # moment. - [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ] + [ "x$1" = xarm ] || [ "x$1" = xarm64 ] || [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ] } # Tests named '<number>-checker-*' will also have their CFGs verified with @@ -739,8 +739,8 @@ fi if [ "$run_checker" = "yes" -a "$target_mode" = "yes" ]; then # We will need to `adb pull` the .cfg output from the target onto the host to # run checker on it. This file can be big. - build_file_size_limit=16384 - run_file_size_limit=16384 + build_file_size_limit=24576 + run_file_size_limit=24576 fi if [ ${USE_JACK} = "false" ]; then # Set ulimit if we build with dx only, Jack can generate big temp files. diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh index 047c24f8aa..02787fba43 100755 --- a/tools/buildbot-build.sh +++ b/tools/buildbot-build.sh @@ -21,7 +21,7 @@ fi out_dir=${OUT_DIR-out} java_libraries_dir=${out_dir}/target/common/obj/JAVA_LIBRARIES -common_targets="vogar ${java_libraries_dir}/core-tests_intermediates/javalib.jar apache-harmony-jdwp-tests-hostdex ${java_libraries_dir}/jsr166-tests_intermediates/javalib.jar" +common_targets="vogar ${java_libraries_dir}/core-tests_intermediates/javalib.jar apache-harmony-jdwp-tests-hostdex ${java_libraries_dir}/jsr166-tests_intermediates/javalib.jar ${out_dir}/host/linux-x86/bin/jack" mode="target" j_arg="-j$(nproc)" showcommands= diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh index de27a6faaa..47fc50fbd2 100755 --- a/tools/run-jdwp-tests.sh +++ b/tools/run-jdwp-tests.sh @@ -28,6 +28,18 @@ if [ ! -f $test_jar ]; then exit 1 fi +if [ "x$ART_USE_READ_BARRIER" = xtrue ]; then + # For the moment, skip JDWP tests when read barriers are enabled, as + # they sometimes exhibit a deadlock issue with the concurrent + # copying collector in the read barrier configuration, between the + # HeapTaskDeamon and the JDWP thread (b/25800335). + # + # TODO: Re-enable the JDWP tests when this deadlock issue is fixed. + echo "JDWP tests are temporarily disabled in the read barrier configuration because of" + echo "a deadlock issue (b/25800335)." + exit 0 +fi + art="/data/local/tmp/system/bin/art" art_debugee="sh /data/local/tmp/system/bin/art" args=$@ @@ -43,9 +55,11 @@ image="-Ximage:/data/art-test/core-jit.art" vm_args="" # By default, we run the whole JDWP test suite. test="org.apache.harmony.jpda.tests.share.AllTests" +host="no" while true; do if [[ "$1" == "--mode=host" ]]; then + host="yes" # Specify bash explicitly since the art script cannot, since it has to run on the device # with mksh. art="bash ${OUT_DIR-out}/host/linux-x86/bin/art" @@ -118,3 +132,15 @@ vogar $vm_command \ --classpath $test_jar \ --vm-arg -Xcompiler-option --vm-arg --debuggable \ $test + +vogar_exit_status=$? + +echo "Killing stalled dalvikvm processes..." +if [[ $host == "yes" ]]; then + pkill -9 -f /bin/dalvikvm +else + adb shell pkill -9 -f /bin/dalvikvm +fi +echo "Done." + +exit $vogar_exit_status |