diff options
Diffstat (limited to 'compiler')
179 files changed, 12157 insertions, 3505 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index c663fcbf89..ac95abdd8d 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -41,6 +41,7 @@ LIBART_COMPILER_SRC_FILES := \ dex/quick/gen_common.cc \ dex/quick/gen_invoke.cc \ dex/quick/gen_loadstore.cc \ + dex/quick/lazy_debug_frame_opcode_writer.cc \ dex/quick/local_optimizations.cc \ dex/quick/mips/assemble_mips.cc \ dex/quick/mips/call_mips.cc \ @@ -79,6 +80,13 @@ LIBART_COMPILER_SRC_FILES := \ driver/compiler_driver.cc \ driver/compiler_options.cc \ driver/dex_compilation_unit.cc \ + linker/relative_patcher.cc \ + linker/arm/relative_patcher_arm_base.cc \ + linker/arm/relative_patcher_thumb2.cc \ + linker/arm64/relative_patcher_arm64.cc \ + linker/x86/relative_patcher_x86_base.cc \ + linker/x86/relative_patcher_x86.cc \ + linker/x86_64/relative_patcher_x86_64.cc \ jit/jit_compiler.cc \ jni/quick/arm/calling_convention_arm.cc \ jni/quick/arm64/calling_convention_arm64.cc \ @@ -132,7 +140,6 @@ LIBART_COMPILER_SRC_FILES := \ utils/arm64/assembler_arm64.cc \ utils/arm64/managed_register_arm64.cc \ utils/assembler.cc \ - utils/dwarf_cfi.cc \ utils/mips/assembler_mips.cc \ utils/mips/managed_register_mips.cc \ utils/mips64/assembler_mips64.cc \ @@ -145,6 +152,7 @@ LIBART_COMPILER_SRC_FILES := \ buffered_output_stream.cc \ compiler.cc \ elf_writer.cc \ + elf_writer_debug.cc \ elf_writer_quick.cc \ file_output_stream.cc \ image_writer.cc \ diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h new file mode 100644 index 0000000000..918179290b --- /dev/null +++ b/compiler/cfi_test.h @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_CFI_TEST_H_ +#define ART_COMPILER_CFI_TEST_H_ + +#include <vector> +#include <memory> +#include <sstream> + +#include "arch/instruction_set.h" +#include "dwarf/dwarf_test.h" +#include "dwarf/headers.h" +#include "disassembler/disassembler.h" +#include "gtest/gtest.h" + +namespace art { + +class CFITest : public dwarf::DwarfTest { + public: + void GenerateExpected(FILE* f, InstructionSet isa, const char* isa_str, + const std::vector<uint8_t>& actual_asm, + const std::vector<uint8_t>& actual_cfi) { + std::vector<std::string> lines; + // Print the raw bytes. + fprintf(f, "static constexpr uint8_t expected_asm_%s[] = {", isa_str); + HexDump(f, actual_asm); + fprintf(f, "\n};\n"); + fprintf(f, "static constexpr uint8_t expected_cfi_%s[] = {", isa_str); + HexDump(f, actual_cfi); + fprintf(f, "\n};\n"); + // Pretty-print CFI opcodes. + constexpr bool is64bit = false; + dwarf::DebugFrameOpCodeWriter<> initial_opcodes; + dwarf::WriteEhFrameCIE(is64bit, dwarf::Reg(8), initial_opcodes, &eh_frame_data_); + dwarf::WriteEhFrameFDE(is64bit, 0, 0, actual_asm.size(), &actual_cfi, &eh_frame_data_); + ReformatCfi(Objdump(false, "-W"), &lines); + // Pretty-print assembly. + auto* opts = new DisassemblerOptions(false, actual_asm.data(), true); + std::unique_ptr<Disassembler> disasm(Disassembler::Create(isa, opts)); + std::stringstream stream; + const uint8_t* base = actual_asm.data() + (isa == kThumb2 ? 1 : 0); + disasm->Dump(stream, base, base + actual_asm.size()); + ReformatAsm(&stream, &lines); + // Print CFI and assembly interleaved. + std::stable_sort(lines.begin(), lines.end(), CompareByAddress); + for (const std::string& line : lines) { + fprintf(f, "// %s\n", line.c_str()); + } + fprintf(f, "\n"); + } + + private: + // Helper - get offset just past the end of given string. + static size_t FindEndOf(const std::string& str, const char* substr) { + size_t pos = str.find(substr); + CHECK_NE(std::string::npos, pos); + return pos + strlen(substr); + } + + // Spit to lines and remove raw instruction bytes. + static void ReformatAsm(std::stringstream* stream, + std::vector<std::string>* output) { + std::string line; + while (std::getline(*stream, line)) { + line = line.substr(0, FindEndOf(line, ": ")) + + line.substr(FindEndOf(line, "\t")); + size_t pos; + while ((pos = line.find(" ")) != std::string::npos) { + line = line.replace(pos, 2, " "); + } + while (!line.empty() && line.back() == ' ') { + line.pop_back(); + } + output->push_back(line); + } + } + + // Find interesting parts of objdump output and prefix the lines with address. + static void ReformatCfi(const std::vector<std::string>& lines, + std::vector<std::string>* output) { + std::string address; + for (const std::string& line : lines) { + if (line.find("DW_CFA_nop") != std::string::npos) { + // Ignore. + } else if (line.find("DW_CFA_advance_loc") != std::string::npos) { + // The last 8 characters are the address. + address = "0x" + line.substr(line.size() - 8); + } else if (line.find("DW_CFA_") != std::string::npos) { + std::string new_line(line); + // "bad register" warning is caused by always using host (x86) objdump. + const char* bad_reg = "bad register: "; + size_t pos; + if ((pos = new_line.find(bad_reg)) != std::string::npos) { + new_line = new_line.replace(pos, strlen(bad_reg), ""); + } + // Remove register names in parentheses since they have x86 names. + if ((pos = new_line.find(" (")) != std::string::npos) { + new_line = new_line.replace(pos, FindEndOf(new_line, ")") - pos, ""); + } + // Use the .cfi_ prefix. + new_line = ".cfi_" + new_line.substr(FindEndOf(new_line, "DW_CFA_")); + output->push_back(address + ": " + new_line); + } + } + } + + // Compare strings by the address prefix. + static bool CompareByAddress(const std::string& lhs, const std::string& rhs) { + EXPECT_EQ(lhs[10], ':'); + EXPECT_EQ(rhs[10], ':'); + return strncmp(lhs.c_str(), rhs.c_str(), 10) < 0; + } + + // Pretty-print byte array. 12 bytes per line. + static void HexDump(FILE* f, const std::vector<uint8_t>& data) { + for (size_t i = 0; i < data.size(); i++) { + fprintf(f, i % 12 == 0 ? "\n " : " "); // Whitespace. + fprintf(f, "0x%02X,", data[i]); + } + } +}; + +} // namespace art + +#endif // ART_COMPILER_CFI_TEST_H_ diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index 1d0aad5425..96d90bb443 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -24,6 +24,7 @@ #include "dex/quick/dex_file_to_method_inliner_map.h" #include "dex/verification_results.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "interpreter/interpreter.h" #include "mirror/art_method.h" #include "mirror/dex_cache.h" diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc index 1849e7ef64..4f7a970fdd 100644 --- a/compiler/compiled_method.cc +++ b/compiler/compiled_method.cc @@ -132,7 +132,7 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver, const ArrayRef<const uint8_t>& vmap_table, const ArrayRef<const uint8_t>& native_gc_map, const ArrayRef<const uint8_t>& cfi_info, - const ArrayRef<LinkerPatch>& patches) + const ArrayRef<const LinkerPatch>& patches) : CompiledCode(driver, instruction_set, quick_code, !driver->DedupeEnabled()), owns_arrays_(!driver->DedupeEnabled()), frame_size_in_bytes_(frame_size_in_bytes), core_spill_mask_(core_spill_mask), @@ -142,7 +142,6 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver, if (src_mapping_table == nullptr) { src_mapping_table_ = new SwapSrcMap(driver->GetSwapSpaceAllocator()); } else { - src_mapping_table->Arrange(); src_mapping_table_ = new SwapSrcMap(src_mapping_table->begin(), src_mapping_table->end(), driver->GetSwapSpaceAllocator()); } @@ -159,7 +158,7 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver, } else { src_mapping_table_ = src_mapping_table == nullptr ? driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>()) : - driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>(src_mapping_table->Arrange())); + driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>(*src_mapping_table)); mapping_table_ = mapping_table.empty() ? nullptr : driver->DeduplicateMappingTable(mapping_table); vmap_table_ = driver->DeduplicateVMapTable(vmap_table); @@ -180,7 +179,7 @@ CompiledMethod* CompiledMethod::SwapAllocCompiledMethod( const ArrayRef<const uint8_t>& vmap_table, const ArrayRef<const uint8_t>& native_gc_map, const ArrayRef<const uint8_t>& cfi_info, - const ArrayRef<LinkerPatch>& patches) { + const ArrayRef<const LinkerPatch>& patches) { SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator()); CompiledMethod* ret = alloc.allocate(1); alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask, @@ -189,38 +188,6 @@ CompiledMethod* CompiledMethod::SwapAllocCompiledMethod( return ret; } -CompiledMethod* CompiledMethod::SwapAllocCompiledMethodStackMap( - CompilerDriver* driver, - InstructionSet instruction_set, - const ArrayRef<const uint8_t>& quick_code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const ArrayRef<const uint8_t>& stack_map) { - SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator()); - CompiledMethod* ret = alloc.allocate(1); - alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask, - fp_spill_mask, nullptr, ArrayRef<const uint8_t>(), stack_map, - ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(), ArrayRef<LinkerPatch>()); - return ret; -} - -CompiledMethod* CompiledMethod::SwapAllocCompiledMethodCFI( - CompilerDriver* driver, - InstructionSet instruction_set, - const ArrayRef<const uint8_t>& quick_code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const ArrayRef<const uint8_t>& cfi_info) { - SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator()); - CompiledMethod* ret = alloc.allocate(1); - alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask, - fp_spill_mask, nullptr, ArrayRef<const uint8_t>(), - ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(), - cfi_info, ArrayRef<LinkerPatch>()); - return ret; -} void CompiledMethod::ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m) { diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index d6a07f6226..480d021db0 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -94,20 +94,12 @@ class SrcMapElem { uint32_t from_; int32_t to_; - explicit operator int64_t() const { - return (static_cast<int64_t>(to_) << 32) | from_; - } - - bool operator<(const SrcMapElem& sme) const { - return int64_t(*this) < int64_t(sme); - } - - bool operator==(const SrcMapElem& sme) const { - return int64_t(*this) == int64_t(sme); - } - - explicit operator uint8_t() const { - return static_cast<uint8_t>(from_ + to_); + // Lexicographical compare. + bool operator<(const SrcMapElem& other) const { + if (from_ != other.from_) { + return from_ < other.from_; + } + return to_ < other.to_; } }; @@ -129,49 +121,33 @@ class SrcMap FINAL : public std::vector<SrcMapElem, Allocator> { SrcMap(InputIt first, InputIt last, const Allocator& alloc) : std::vector<SrcMapElem, Allocator>(first, last, alloc) {} - void SortByFrom() { - std::sort(begin(), end(), [] (const SrcMapElem& lhs, const SrcMapElem& rhs) -> bool { - return lhs.from_ < rhs.from_; - }); - } - - const_iterator FindByTo(int32_t to) const { - return std::lower_bound(begin(), end(), SrcMapElem({0, to})); - } - - SrcMap& Arrange() { + void push_back(const SrcMapElem& elem) { if (!empty()) { - std::sort(begin(), end()); - resize(std::unique(begin(), end()) - begin()); - shrink_to_fit(); + // Check that the addresses are inserted in sorted order. + DCHECK_GE(elem.from_, this->back().from_); + // If two consequitive entries map to the same value, ignore the later. + // E.g. for map {{0, 1}, {4, 1}, {8, 2}}, all values in [0,8) map to 1. + if (elem.to_ == this->back().to_) { + return; + } } - return *this; + std::vector<SrcMapElem, Allocator>::push_back(elem); } - void DeltaFormat(const SrcMapElem& start, uint32_t highest_pc) { - // Convert from abs values to deltas. - if (!empty()) { - SortByFrom(); - - // TODO: one PC can be mapped to several Java src lines. - // do we want such a one-to-many correspondence? - - // get rid of the highest values - size_t i = size() - 1; - for (; i > 0 ; i--) { - if ((*this)[i].from_ < highest_pc) { - break; - } - } - this->resize(i + 1); - - for (i = size(); --i >= 1; ) { - (*this)[i].from_ -= (*this)[i-1].from_; - (*this)[i].to_ -= (*this)[i-1].to_; - } - DCHECK((*this)[0].from_ >= start.from_); - (*this)[0].from_ -= start.from_; - (*this)[0].to_ -= start.to_; + // Returns true and the corresponding "to" value if the mapping is found. + // Oterwise returns false and 0. + std::pair<bool, int32_t> Find(uint32_t from) const { + // Finds first mapping such that lb.from_ >= from. + auto lb = std::lower_bound(begin(), end(), SrcMapElem {from, INT32_MIN}); + if (lb != end() && lb->from_ == from) { + // Found exact match. + return std::make_pair(true, lb->to_); + } else if (lb != begin()) { + // The previous mapping is still in effect. + return std::make_pair(true, (--lb)->to_); + } else { + // Not found because 'from' is smaller than first entry in the map. + return std::make_pair(false, 0); } } }; @@ -185,6 +161,7 @@ enum LinkerPatchType { kLinkerPatchCall, kLinkerPatchCallRelative, // NOTE: Actual patching is instruction_set-dependent. kLinkerPatchType, + kLinkerPatchDexCacheArray, // NOTE: Actual patching is instruction_set-dependent. }; class LinkerPatch { @@ -192,28 +169,44 @@ class LinkerPatch { static LinkerPatch MethodPatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t target_method_idx) { - return LinkerPatch(literal_offset, kLinkerPatchMethod, - target_method_idx, target_dex_file); + LinkerPatch patch(literal_offset, kLinkerPatchMethod, target_dex_file); + patch.method_idx_ = target_method_idx; + return patch; } static LinkerPatch CodePatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t target_method_idx) { - return LinkerPatch(literal_offset, kLinkerPatchCall, - target_method_idx, target_dex_file); + LinkerPatch patch(literal_offset, kLinkerPatchCall, target_dex_file); + patch.method_idx_ = target_method_idx; + return patch; } static LinkerPatch RelativeCodePatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t target_method_idx) { - return LinkerPatch(literal_offset, kLinkerPatchCallRelative, - target_method_idx, target_dex_file); + LinkerPatch patch(literal_offset, kLinkerPatchCallRelative, target_dex_file); + patch.method_idx_ = target_method_idx; + return patch; } static LinkerPatch TypePatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t target_type_idx) { - return LinkerPatch(literal_offset, kLinkerPatchType, target_type_idx, target_dex_file); + LinkerPatch patch(literal_offset, kLinkerPatchType, target_dex_file); + patch.type_idx_ = target_type_idx; + return patch; + } + + static LinkerPatch DexCacheArrayPatch(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + size_t element_offset) { + DCHECK(IsUint<32>(element_offset)); + LinkerPatch patch(literal_offset, kLinkerPatchDexCacheArray, target_dex_file); + patch.pc_insn_offset_ = pc_insn_offset; + patch.element_offset_ = element_offset; + return patch; } LinkerPatch(const LinkerPatch& other) = default; @@ -227,10 +220,14 @@ class LinkerPatch { return patch_type_; } + bool IsPcRelative() const { + return Type() == kLinkerPatchCallRelative || Type() == kLinkerPatchDexCacheArray; + } + MethodReference TargetMethod() const { DCHECK(patch_type_ == kLinkerPatchMethod || patch_type_ == kLinkerPatchCall || patch_type_ == kLinkerPatchCallRelative); - return MethodReference(target_dex_file_, target_idx_); + return MethodReference(target_dex_file_, method_idx_); } const DexFile* TargetTypeDexFile() const { @@ -240,22 +237,52 @@ class LinkerPatch { uint32_t TargetTypeIndex() const { DCHECK(patch_type_ == kLinkerPatchType); - return target_idx_; + return type_idx_; + } + + const DexFile* TargetDexCacheDexFile() const { + DCHECK(patch_type_ == kLinkerPatchDexCacheArray); + return target_dex_file_; + } + + size_t TargetDexCacheElementOffset() const { + DCHECK(patch_type_ == kLinkerPatchDexCacheArray); + return element_offset_; + } + + uint32_t PcInsnOffset() const { + DCHECK(patch_type_ == kLinkerPatchDexCacheArray); + return pc_insn_offset_; } private: - LinkerPatch(size_t literal_offset, LinkerPatchType patch_type, - uint32_t target_idx, const DexFile* target_dex_file) - : literal_offset_(literal_offset), - patch_type_(patch_type), - target_idx_(target_idx), - target_dex_file_(target_dex_file) { + LinkerPatch(size_t literal_offset, LinkerPatchType patch_type, const DexFile* target_dex_file) + : target_dex_file_(target_dex_file), + literal_offset_(literal_offset), + patch_type_(patch_type) { + cmp1_ = 0u; + cmp2_ = 0u; + // The compiler rejects methods that are too big, so the compiled code + // of a single method really shouln't be anywhere close to 16MiB. + DCHECK(IsUint<24>(literal_offset)); } - size_t literal_offset_; - LinkerPatchType patch_type_; - uint32_t target_idx_; // Method index (Call/Method patches) or type index (Type patches). const DexFile* target_dex_file_; + uint32_t literal_offset_ : 24; // Method code size up to 16MiB. + LinkerPatchType patch_type_ : 8; + union { + uint32_t cmp1_; // Used for relational operators. + uint32_t method_idx_; // Method index for Call/Method patches. + uint32_t type_idx_; // Type index for Type patches. + uint32_t element_offset_; // Element offset in the dex cache arrays. + }; + union { + uint32_t cmp2_; // Used for relational operators. + // Literal offset of the insn loading PC (same as literal_offset if it's the same insn, + // may be different if the PC-relative addressing needs multiple insns). + uint32_t pc_insn_offset_; + static_assert(sizeof(pc_insn_offset_) == sizeof(cmp2_), "needed by relational operators"); + }; friend bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs); friend bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs); @@ -264,15 +291,17 @@ class LinkerPatch { inline bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs) { return lhs.literal_offset_ == rhs.literal_offset_ && lhs.patch_type_ == rhs.patch_type_ && - lhs.target_idx_ == rhs.target_idx_ && - lhs.target_dex_file_ == rhs.target_dex_file_; + lhs.target_dex_file_ == rhs.target_dex_file_ && + lhs.cmp1_ == rhs.cmp1_ && + lhs.cmp2_ == rhs.cmp2_; } inline bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs) { return (lhs.literal_offset_ != rhs.literal_offset_) ? lhs.literal_offset_ < rhs.literal_offset_ : (lhs.patch_type_ != rhs.patch_type_) ? lhs.patch_type_ < rhs.patch_type_ - : (lhs.target_idx_ != rhs.target_idx_) ? lhs.target_idx_ < rhs.target_idx_ - : lhs.target_dex_file_ < rhs.target_dex_file_; + : (lhs.target_dex_file_ != rhs.target_dex_file_) ? lhs.target_dex_file_ < rhs.target_dex_file_ + : (lhs.cmp1_ != rhs.cmp1_) ? lhs.cmp1_ < rhs.cmp1_ + : lhs.cmp2_ < rhs.cmp2_; } class CompiledMethod FINAL : public CompiledCode { @@ -291,7 +320,7 @@ class CompiledMethod FINAL : public CompiledCode { const ArrayRef<const uint8_t>& vmap_table, const ArrayRef<const uint8_t>& native_gc_map, const ArrayRef<const uint8_t>& cfi_info, - const ArrayRef<LinkerPatch>& patches = ArrayRef<LinkerPatch>()); + const ArrayRef<const LinkerPatch>& patches); virtual ~CompiledMethod(); @@ -307,24 +336,7 @@ class CompiledMethod FINAL : public CompiledCode { const ArrayRef<const uint8_t>& vmap_table, const ArrayRef<const uint8_t>& native_gc_map, const ArrayRef<const uint8_t>& cfi_info, - const ArrayRef<LinkerPatch>& patches = ArrayRef<LinkerPatch>()); - - static CompiledMethod* SwapAllocCompiledMethodStackMap( - CompilerDriver* driver, - InstructionSet instruction_set, - const ArrayRef<const uint8_t>& quick_code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const ArrayRef<const uint8_t>& stack_map); - - static CompiledMethod* SwapAllocCompiledMethodCFI(CompilerDriver* driver, - InstructionSet instruction_set, - const ArrayRef<const uint8_t>& quick_code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const ArrayRef<const uint8_t>& cfi_info); + const ArrayRef<const LinkerPatch>& patches); static void ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m); @@ -362,8 +374,8 @@ class CompiledMethod FINAL : public CompiledCode { return cfi_info_; } - const SwapVector<LinkerPatch>& GetPatches() const { - return patches_; + ArrayRef<const LinkerPatch> GetPatches() const { + return ArrayRef<const LinkerPatch>(patches_); } private: @@ -375,7 +387,7 @@ class CompiledMethod FINAL : public CompiledCode { const uint32_t core_spill_mask_; // For quick code, a bit mask describing spilled FPR callee-save registers. const uint32_t fp_spill_mask_; - // For quick code, a set of pairs (PC, Line) mapping from native PC offset to Java line + // For quick code, a set of pairs (PC, DEX) mapping from native PC offset to DEX offset. SwapSrcMap* src_mapping_table_; // For quick code, a uleb128 encoded map from native PC offset to dex PC aswell as dex PC to // native PC offset. Size prefixed. @@ -388,7 +400,7 @@ class CompiledMethod FINAL : public CompiledCode { // For quick code, a FDE entry for the debug_frame section. SwapVector<uint8_t>* cfi_info_; // For quick code, linker patches needed by the method. - SwapVector<LinkerPatch> patches_; + const SwapVector<LinkerPatch> patches_; }; } // namespace art diff --git a/compiler/compiler.h b/compiler/compiler.h index 6ec39f9605..a04641e3fa 100644 --- a/compiler/compiler.h +++ b/compiler/compiler.h @@ -107,6 +107,9 @@ class Compiler { return driver_; } + // Whether to produce 64-bit ELF files for 64-bit targets. Leave this off for now. + static constexpr bool kProduce64BitELFFiles = false; + private: CompilerDriver* const driver_; const uint64_t maximum_compilation_time_before_warning_; diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h index 93d83c6fd4..0850f42a9a 100644 --- a/compiler/dex/bb_optimizations.h +++ b/compiler/dex/bb_optimizations.h @@ -403,13 +403,6 @@ class SuspendCheckElimination : public PassME { DCHECK(bb != nullptr); return c_unit->mir_graph->EliminateSuspendChecks(bb); } - - void End(PassDataHolder* data) const { - DCHECK(data != nullptr); - CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; - DCHECK(c_unit != nullptr); - c_unit->mir_graph->EliminateSuspendChecksEnd(); - } }; } // namespace art diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 39725dee38..0acdd422df 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -99,14 +99,16 @@ std::ostream& operator<<(std::ostream& os, const BBType& code); // Shared pseudo opcodes - must be < 0. enum LIRPseudoOpcode { - kPseudoExportedPC = -16, - kPseudoSafepointPC = -15, - kPseudoIntrinsicRetry = -14, - kPseudoSuspendTarget = -13, - kPseudoThrowTarget = -12, - kPseudoCaseLabel = -11, - kPseudoMethodEntry = -10, - kPseudoMethodExit = -9, + kPseudoPrologueBegin = -18, + kPseudoPrologueEnd = -17, + kPseudoEpilogueBegin = -16, + kPseudoEpilogueEnd = -15, + kPseudoExportedPC = -14, + kPseudoSafepointPC = -13, + kPseudoIntrinsicRetry = -12, + kPseudoSuspendTarget = -11, + kPseudoThrowTarget = -10, + kPseudoCaseLabel = -9, kPseudoBarrier = -8, kPseudoEntryBlock = -7, kPseudoExitBlock = -6, diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc index 2d4c18ff49..ec12221f3c 100644 --- a/compiler/dex/gvn_dead_code_elimination.cc +++ b/compiler/dex/gvn_dead_code_elimination.cc @@ -1357,7 +1357,6 @@ bool GvnDeadCodeElimination::RecordMIR(MIR* mir) { default: LOG(FATAL) << "Unexpected opcode: " << opcode; UNREACHABLE(); - break; } if (mir->ssa_rep->num_defs != 0) { diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc index dc222b5211..cdf5e38a9c 100644 --- a/compiler/dex/local_value_numbering.cc +++ b/compiler/dex/local_value_numbering.cc @@ -166,9 +166,9 @@ class LocalValueNumbering::AliasingArrayVersions { return gvn->LookupValue(kAliasingArrayOp, type, location, memory_version); } - static uint16_t LookupMergeValue(GlobalValueNumbering* gvn ATTRIBUTE_UNUSED, + static uint16_t LookupMergeValue(GlobalValueNumbering* gvn, const LocalValueNumbering* lvn, - uint16_t type ATTRIBUTE_UNUSED, uint16_t location) { + uint16_t type, uint16_t location) { // If the location is non-aliasing in lvn, use the non-aliasing value. uint16_t array = gvn->GetArrayLocationBase(location); if (lvn->IsNonAliasingArray(array, type)) { @@ -182,8 +182,6 @@ class LocalValueNumbering::AliasingArrayVersions { static bool HasNewBaseVersion(GlobalValueNumbering* gvn ATTRIBUTE_UNUSED, const LocalValueNumbering* lvn, uint16_t type ATTRIBUTE_UNUSED) { - UNUSED(gvn); - UNUSED(type); return lvn->global_memory_version_ == lvn->merge_new_memory_version_; } diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc index f638b0bf4d..2a920a4e29 100644 --- a/compiler/dex/mir_dataflow.cc +++ b/compiler/dex/mir_dataflow.cc @@ -1396,6 +1396,13 @@ void MIRGraph::CompilerInitializeSSAConversion() { InitializeBasicBlockDataFlow(); } +uint32_t MIRGraph::GetUseCountWeight(BasicBlock* bb) const { + // Each level of nesting adds *100 to count, up to 3 levels deep. + uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth)); + uint32_t weight = std::max(1U, depth * 100); + return weight; +} + /* * Count uses, weighting by loop nesting depth. This code only * counts explicitly used s_regs. A later phase will add implicit @@ -1405,9 +1412,7 @@ void MIRGraph::CountUses(BasicBlock* bb) { if (bb->block_type != kDalvikByteCode) { return; } - // Each level of nesting adds *100 to count, up to 3 levels deep. - uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth)); - uint32_t weight = std::max(1U, depth * 100); + uint32_t weight = GetUseCountWeight(bb); for (MIR* mir = bb->first_mir_insn; (mir != NULL); mir = mir->next) { if (mir->ssa_rep == NULL) { continue; @@ -1417,23 +1422,6 @@ void MIRGraph::CountUses(BasicBlock* bb) { raw_use_counts_[s_reg] += 1u; use_counts_[s_reg] += weight; } - if (!(cu_->disable_opt & (1 << kPromoteCompilerTemps))) { - uint64_t df_attributes = GetDataFlowAttributes(mir); - // Implicit use of Method* ? */ - if (df_attributes & DF_UMS) { - /* - * Some invokes will not use Method* - need to perform test similar - * to that found in GenInvoke() to decide whether to count refs - * for Method* on invoke-class opcodes. This is a relatively expensive - * operation, so should only be done once. - * TODO: refactor InvokeUsesMethodStar() to perform check at parse time, - * and save results for both here and GenInvoke. For now, go ahead - * and assume all invokes use method*. - */ - raw_use_counts_[method_sreg_] += 1u; - use_counts_[method_sreg_] += weight; - } - } } } diff --git a/compiler/dex/mir_field_info.cc b/compiler/dex/mir_field_info.cc index d2079a254d..a9ab3bb0d4 100644 --- a/compiler/dex/mir_field_info.cc +++ b/compiler/dex/mir_field_info.cc @@ -19,6 +19,7 @@ #include <string.h> #include "base/logging.h" +#include "dex/verified_method.h" #include "driver/compiler_driver.h" #include "driver/compiler_driver-inl.h" #include "mirror/class_loader.h" // Only to allow casts in Handle<ClassLoader>. diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 3103f96e4e..4d340387f2 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -688,7 +688,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse /* Parse a Dex method and insert it into the MIRGraph at the current insert point. */ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_flags, - InvokeType invoke_type, uint16_t class_def_idx, + InvokeType invoke_type ATTRIBUTE_UNUSED, uint16_t class_def_idx, uint32_t method_idx, jobject class_loader, const DexFile& dex_file) { current_code_item_ = code_item; method_stack_.push_back(std::make_pair(current_method_, current_offset_)); @@ -726,13 +726,6 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ null_block->hidden = true; entry_block_ = CreateNewBB(kEntryBlock); exit_block_ = CreateNewBB(kExitBlock); - // TODO: deprecate all "cu->" fields; move what's left to wherever CompilationUnit is allocated. - cu_->dex_file = &dex_file; - cu_->class_def_idx = class_def_idx; - cu_->method_idx = method_idx; - cu_->access_flags = access_flags; - cu_->invoke_type = invoke_type; - cu_->shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx)); } else { UNIMPLEMENTED(FATAL) << "Nested inlining not implemented."; /* @@ -1616,8 +1609,8 @@ void MIRGraph::ReplaceSpecialChars(std::string& str) { } std::string MIRGraph::GetSSAName(int ssa_reg) { - // TODO: This value is needed for LLVM and debugging. Currently, we compute this and then copy to - // the arena. We should be smarter and just place straight into the arena, or compute the + // TODO: This value is needed for debugging. Currently, we compute this and then copy to the + // arena. We should be smarter and just place straight into the arena, or compute the // value more lazily. int vreg = SRegToVReg(ssa_reg); if (vreg >= static_cast<int>(GetFirstTempVR())) { diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 3298af1162..85b13448da 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -960,6 +960,12 @@ class MIRGraph { */ CompilerTemp* GetNewCompilerTemp(CompilerTempType ct_type, bool wide); + /** + * @brief Used to remove last created compiler temporary when it's not needed. + * @param temp the temporary to remove. + */ + void RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp); + bool MethodIsLeaf() { return attributes_ & METHOD_IS_LEAF; } @@ -1079,7 +1085,6 @@ class MIRGraph { void EliminateDeadCodeEnd(); bool EliminateSuspendChecksGate(); bool EliminateSuspendChecks(BasicBlock* bb); - void EliminateSuspendChecksEnd(); uint16_t GetGvnIFieldId(MIR* mir) const { DCHECK(IsInstructionIGetOrIPut(mir->dalvikInsn.opcode)); @@ -1185,6 +1190,12 @@ class MIRGraph { void DoConstantPropagation(BasicBlock* bb); /** + * @brief Get use count weight for a given block. + * @param bb the BasicBlock. + */ + uint32_t GetUseCountWeight(BasicBlock* bb) const; + + /** * @brief Count the uses in the BasicBlock * @param bb the BasicBlock */ @@ -1396,10 +1407,6 @@ class MIRGraph { uint16_t* sfield_ids; // Ditto. GvnDeadCodeElimination* dce; } gvn; - // Suspend check elimination. - struct { - DexFileMethodInliner* inliner; - } sce; } temp_; static const int kInvalidEntry = -1; ArenaVector<BasicBlock*> block_list_; @@ -1451,6 +1458,7 @@ class MIRGraph { friend class GvnDeadCodeEliminationTest; friend class LocalValueNumberingTest; friend class TopologicalSortOrderTest; + friend class QuickCFITest; }; } // namespace art diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc index 34fb1bf0e0..0c84b82edd 100644 --- a/compiler/dex/mir_method_info.cc +++ b/compiler/dex/mir_method_info.cc @@ -16,9 +16,13 @@ # include "mir_method_info.h" +#include "dex/quick/dex_file_method_inliner.h" +#include "dex/quick/dex_file_to_method_inliner_map.h" +#include "dex/verified_method.h" #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "driver/compiler_driver-inl.h" +#include "driver/compiler_options.h" #include "mirror/class_loader.h" // Only to allow casts in Handle<ClassLoader>. #include "mirror/dex_cache.h" // Only to allow casts in Handle<DexCache>. #include "scoped_thread_state_change.h" @@ -62,6 +66,9 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver, const DexFile* const dex_file = mUnit->GetDexFile(); const bool use_jit = runtime->UseJit(); const VerifiedMethod* const verified_method = mUnit->GetVerifiedMethod(); + DexFileToMethodInlinerMap* inliner_map = compiler_driver->GetMethodInlinerMap(); + DexFileMethodInliner* default_inliner = + (inliner_map != nullptr) ? inliner_map->GetMethodInliner(dex_file) : nullptr; for (auto it = method_infos, end = method_infos + count; it != end; ++it) { // For quickened invokes, the dex method idx is actually the mir offset. @@ -120,6 +127,7 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver, if (UNLIKELY(resolved_method == nullptr)) { continue; } + compiler_driver->GetResolvedMethodDexFileLocation(resolved_method, &it->declaring_dex_file_, &it->declaring_class_idx_, &it->declaring_method_idx_); if (!it->IsQuickened()) { @@ -131,6 +139,7 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver, it->vtable_idx_ = compiler_driver->GetResolvedMethodVTableIndex(resolved_method, invoke_type); } + MethodReference target_method(it->target_dex_file_, it->target_method_idx_); int fast_path_flags = compiler_driver->IsFastInvoke( soa, current_dex_cache, class_loader, mUnit, referrer_class.Get(), resolved_method, @@ -138,10 +147,23 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver, const bool is_referrers_class = referrer_class.Get() == resolved_method->GetDeclaringClass(); const bool is_class_initialized = compiler_driver->IsMethodsClassInitialized(referrer_class.Get(), resolved_method); + + // Check if the target method is intrinsic or special. + InlineMethodFlags is_intrinsic_or_special = kNoInlineMethodFlags; + if (inliner_map != nullptr) { + auto* inliner = (target_method.dex_file == dex_file) + ? default_inliner + : inliner_map->GetMethodInliner(target_method.dex_file); + is_intrinsic_or_special = inliner->IsIntrinsicOrSpecial(target_method.dex_method_index); + } + uint16_t other_flags = it->flags_ & - ~(kFlagFastPath | kFlagClassIsInitialized | (kInvokeTypeMask << kBitSharpTypeBegin)); + ~(kFlagFastPath | kFlagIsIntrinsic | kFlagIsSpecial | kFlagClassIsInitialized | + (kInvokeTypeMask << kBitSharpTypeBegin)); it->flags_ = other_flags | (fast_path_flags != 0 ? kFlagFastPath : 0u) | + ((is_intrinsic_or_special & kInlineIntrinsic) != 0 ? kFlagIsIntrinsic : 0u) | + ((is_intrinsic_or_special & kInlineSpecial) != 0 ? kFlagIsSpecial : 0u) | (static_cast<uint16_t>(invoke_type) << kBitSharpTypeBegin) | (is_referrers_class ? kFlagIsReferrersClass : 0u) | (is_class_initialized ? kFlagClassIsInitialized : 0u); diff --git a/compiler/dex/mir_method_info.h b/compiler/dex/mir_method_info.h index e131c96a81..7230c462cd 100644 --- a/compiler/dex/mir_method_info.h +++ b/compiler/dex/mir_method_info.h @@ -127,6 +127,14 @@ class MirMethodLoweringInfo : public MirMethodInfo { return (flags_ & kFlagFastPath) != 0u; } + bool IsIntrinsic() const { + return (flags_ & kFlagIsIntrinsic) != 0u; + } + + bool IsSpecial() const { + return (flags_ & kFlagIsSpecial) != 0u; + } + bool IsReferrersClass() const { return (flags_ & kFlagIsReferrersClass) != 0; } @@ -188,9 +196,11 @@ class MirMethodLoweringInfo : public MirMethodInfo { private: enum { kBitFastPath = kMethodInfoBitEnd, + kBitIsIntrinsic, + kBitIsSpecial, kBitInvokeTypeBegin, kBitInvokeTypeEnd = kBitInvokeTypeBegin + 3, // 3 bits for invoke type. - kBitSharpTypeBegin, + kBitSharpTypeBegin = kBitInvokeTypeEnd, kBitSharpTypeEnd = kBitSharpTypeBegin + 3, // 3 bits for sharp type. kBitIsReferrersClass = kBitSharpTypeEnd, kBitClassIsInitialized, @@ -199,6 +209,8 @@ class MirMethodLoweringInfo : public MirMethodInfo { }; static_assert(kMethodLoweringInfoBitEnd <= 16, "Too many flags"); static constexpr uint16_t kFlagFastPath = 1u << kBitFastPath; + static constexpr uint16_t kFlagIsIntrinsic = 1u << kBitIsIntrinsic; + static constexpr uint16_t kFlagIsSpecial = 1u << kBitIsSpecial; static constexpr uint16_t kFlagIsReferrersClass = 1u << kBitIsReferrersClass; static constexpr uint16_t kFlagClassIsInitialized = 1u << kBitClassIsInitialized; static constexpr uint16_t kFlagQuickened = 1u << kBitQuickened; diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index c85c3b6f21..9d7b4b4dfd 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -318,9 +318,11 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide) // Since VR temps cannot be requested once the BE temps are requested, we // allow reservation of VR temps as well for BE. We size_t available_temps = reserved_temps_for_backend_ + GetNumAvailableVRTemps(); - if (available_temps <= 0 || (available_temps <= 1 && wide)) { + size_t needed_temps = wide ? 2u : 1u; + if (available_temps < needed_temps) { if (verbose) { - LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str << " are available."; + LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str + << " are available."; } return nullptr; } @@ -328,12 +330,8 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide) // Update the remaining reserved temps since we have now used them. // Note that the code below is actually subtracting to remove them from reserve // once they have been claimed. It is careful to not go below zero. - if (reserved_temps_for_backend_ >= 1) { - reserved_temps_for_backend_--; - } - if (wide && reserved_temps_for_backend_ >= 1) { - reserved_temps_for_backend_--; - } + reserved_temps_for_backend_ = + std::max(reserved_temps_for_backend_, needed_temps) - needed_temps; // The new non-special compiler temp must receive a unique v_reg. compiler_temp->v_reg = GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_; @@ -407,6 +405,36 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide) return compiler_temp; } +void MIRGraph::RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp) { + // Once the compiler temps have been committed, it's too late for any modifications. + DCHECK_EQ(compiler_temps_committed_, false); + + size_t used_temps = wide ? 2u : 1u; + + if (ct_type == kCompilerTempBackend) { + DCHECK(requested_backend_temp_); + + // Make the temps available to backend again. + reserved_temps_for_backend_ += used_temps; + } else if (ct_type == kCompilerTempVR) { + DCHECK(!requested_backend_temp_); + } else { + UNIMPLEMENTED(FATAL) << "No handling for compiler temp type " << static_cast<int>(ct_type); + } + + // Reduce the number of non-special compiler temps. + DCHECK_LE(used_temps, num_non_special_compiler_temps_); + num_non_special_compiler_temps_ -= used_temps; + + // Check that this was really the last temp. + DCHECK_EQ(static_cast<size_t>(temp->v_reg), + GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_); + + if (cu_->verbose) { + LOG(INFO) << "Last temporary has been removed."; + } +} + static bool EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) { bool is_taken; switch (opcode) { @@ -1489,7 +1517,7 @@ void MIRGraph::InlineSpecialMethods(BasicBlock* bb) { continue; } const MirMethodLoweringInfo& method_info = GetMethodLoweringInfo(mir); - if (!method_info.FastPath()) { + if (!method_info.FastPath() || !method_info.IsSpecial()) { continue; } @@ -1631,10 +1659,6 @@ bool MIRGraph::EliminateSuspendChecksGate() { !HasInvokes()) { // No invokes to actually eliminate any suspend checks. return false; } - if (cu_->compiler_driver != nullptr && cu_->compiler_driver->GetMethodInlinerMap() != nullptr) { - temp_.sce.inliner = - cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file); - } suspend_checks_in_loops_ = arena_->AllocArray<uint32_t>(GetNumBlocks(), kArenaAllocMisc); return true; } @@ -1652,9 +1676,9 @@ bool MIRGraph::EliminateSuspendChecks(BasicBlock* bb) { uint32_t suspend_checks_in_loops = (1u << bb->nesting_depth) - 1u; // Start with all loop heads. bool found_invoke = false; for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) { - if (IsInstructionInvoke(mir->dalvikInsn.opcode) && - (temp_.sce.inliner == nullptr || - !temp_.sce.inliner->IsIntrinsic(mir->dalvikInsn.vB, nullptr))) { + if ((IsInstructionInvoke(mir->dalvikInsn.opcode) || + IsInstructionQuickInvoke(mir->dalvikInsn.opcode)) && + !GetMethodLoweringInfo(mir).IsIntrinsic()) { // Non-intrinsic invoke, rely on a suspend point in the invoked method. found_invoke = true; break; @@ -1717,10 +1741,6 @@ bool MIRGraph::EliminateSuspendChecks(BasicBlock* bb) { return true; } -void MIRGraph::EliminateSuspendChecksEnd() { - temp_.sce.inliner = nullptr; -} - bool MIRGraph::CanThrow(MIR* mir) const { if ((mir->dalvikInsn.FlagsOf() & Instruction::kThrow) == 0) { return false; diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc index 9ce5ebbc1b..10a4337cf5 100644 --- a/compiler/dex/mir_optimization_test.cc +++ b/compiler/dex/mir_optimization_test.cc @@ -474,7 +474,6 @@ class SuspendCheckEliminationTest : public MirOptimizationTest { for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) { change = cu_.mir_graph->EliminateSuspendChecks(bb); } - cu_.mir_graph->EliminateSuspendChecksEnd(); } SuspendCheckEliminationTest() diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc index 3e69878846..c5ac4c1508 100644 --- a/compiler/dex/quick/arm/assemble_arm.cc +++ b/compiler/dex/quick/arm/assemble_arm.cc @@ -1083,7 +1083,9 @@ void ArmMir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) { #define PADDING_MOV_R5_R5 0x1C2D uint8_t* ArmMir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { + uint8_t* const write_buffer = write_pos; for (; lir != NULL; lir = NEXT_LIR(lir)) { + lir->offset = (write_pos - write_buffer); if (!lir->flags.is_nop) { int opcode = lir->opcode; if (IsPseudoLirOp(opcode)) { diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index d46c25a8da..3d18af6169 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -23,11 +23,13 @@ #include "dex/mir_graph.h" #include "dex/quick/mir_to_lir-inl.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "gc/accounting/card_table.h" #include "mirror/art_method.h" #include "mirror/object_array-inl.h" #include "entrypoints/quick/quick_entrypoints.h" #include "utils.h" +#include "utils/dex_cache_arrays_layout-inl.h" namespace art { @@ -353,7 +355,16 @@ void ArmMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) { FreeTemp(reg_card_no); } +static dwarf::Reg DwarfCoreReg(int num) { + return dwarf::Reg::ArmCore(num); +} + +static dwarf::Reg DwarfFpReg(int num) { + return dwarf::Reg::ArmFp(num); +} + void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); // empty stack. int spill_count = num_core_spills_ + num_fp_spills_; /* * On entry, r0, r1, r2 & r3 are live. Let the register allocation @@ -371,7 +382,6 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { * a leaf *and* our frame size < fudge factor. */ bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, kArm); - NewLIR0(kPseudoMethodEntry); const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm); bool large_frame = (static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes); bool generate_explicit_stack_overflow_check = large_frame || @@ -402,28 +412,32 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { } } /* Spill core callee saves */ - if (core_spill_mask_ == 0u) { - // Nothing to spill. - } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) { - // Spilling only low regs and/or LR, use 16-bit PUSH. - constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8; - NewLIR1(kThumbPush, - (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) | - ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift)); - } else if (IsPowerOfTwo(core_spill_mask_)) { - // kThumb2Push cannot be used to spill a single register. - NewLIR1(kThumb2Push1, CTZ(core_spill_mask_)); - } else { - NewLIR1(kThumb2Push, core_spill_mask_); + if (core_spill_mask_ != 0u) { + if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) { + // Spilling only low regs and/or LR, use 16-bit PUSH. + constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8; + NewLIR1(kThumbPush, + (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) | + ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift)); + } else if (IsPowerOfTwo(core_spill_mask_)) { + // kThumb2Push cannot be used to spill a single register. + NewLIR1(kThumb2Push1, CTZ(core_spill_mask_)); + } else { + NewLIR1(kThumb2Push, core_spill_mask_); + } + cfi_.AdjustCFAOffset(num_core_spills_ * kArmPointerSize); + cfi_.RelOffsetForMany(DwarfCoreReg(0), 0, core_spill_mask_, kArmPointerSize); } /* Need to spill any FP regs? */ - if (num_fp_spills_) { + if (num_fp_spills_ != 0u) { /* * NOTE: fp spills are a little different from core spills in that * they are pushed as a contiguous block. When promoting from * the fp set, we must allocate all singles from s16..highest-promoted */ NewLIR1(kThumb2VPushCS, num_fp_spills_); + cfi_.AdjustCFAOffset(num_fp_spills_ * kArmPointerSize); + cfi_.RelOffsetForMany(DwarfFpReg(0), 0, fp_spill_mask_, kArmPointerSize); } const int spill_size = spill_count * 4; @@ -444,12 +458,14 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR); } m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_); + m2l_->cfi().AdjustCFAOffset(-sp_displace_); m2l_->ClobberCallerSave(); ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow); // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes // codegen and target are in thumb2 mode. // NOTE: native pointer. m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC); + m2l_->cfi().AdjustCFAOffset(sp_displace_); } private: @@ -464,6 +480,7 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { // Need to restore LR since we used it as a temp. AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size)); OpRegCopy(rs_rARM_SP, rs_rARM_LR); // Establish stack + cfi_.AdjustCFAOffset(frame_size_without_spills); } else { /* * If the frame is small enough we are guaranteed to have enough space that remains to @@ -474,6 +491,7 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { MarkTemp(rs_rARM_LR); FreeTemp(rs_rARM_LR); OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills); + cfi_.AdjustCFAOffset(frame_size_without_spills); Clobber(rs_rARM_LR); UnmarkTemp(rs_rARM_LR); LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr); @@ -483,13 +501,23 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { // Implicit stack overflow check has already been done. Just make room on the // stack for the frame now. OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); + cfi_.AdjustCFAOffset(frame_size_without_spills); } } else { OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); + cfi_.AdjustCFAOffset(frame_size_without_spills); } FlushIns(ArgLocs, rl_method); + // We can promote a PC-relative reference to dex cache arrays to a register + // if it's used at least twice. Without investigating where we should lazily + // load the reference, we conveniently load it after flushing inputs. + if (dex_cache_arrays_base_reg_.Valid()) { + OpPcRelDexCacheArrayAddr(cu_->dex_file, dex_cache_arrays_min_offset_, + dex_cache_arrays_base_reg_); + } + FreeTemp(rs_r0); FreeTemp(rs_r1); FreeTemp(rs_r2); @@ -498,7 +526,9 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { } void ArmMir2Lir::GenExitSequence() { + cfi_.RememberState(); int spill_count = num_core_spills_ + num_fp_spills_; + /* * In the exit path, r0/r1 are live - make sure they aren't * allocated by the register utilities as temps. @@ -506,35 +536,47 @@ void ArmMir2Lir::GenExitSequence() { LockTemp(rs_r0); LockTemp(rs_r1); - NewLIR0(kPseudoMethodExit); - OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4)); + int adjust = frame_size_ - (spill_count * kArmPointerSize); + OpRegImm(kOpAdd, rs_rARM_SP, adjust); + cfi_.AdjustCFAOffset(-adjust); /* Need to restore any FP callee saves? */ if (num_fp_spills_) { NewLIR1(kThumb2VPopCS, num_fp_spills_); + cfi_.AdjustCFAOffset(-num_fp_spills_ * kArmPointerSize); + cfi_.RestoreMany(DwarfFpReg(0), fp_spill_mask_); } - if ((core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0) { - /* Unspill rARM_LR to rARM_PC */ + bool unspill_LR_to_PC = (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0; + if (unspill_LR_to_PC) { core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum()); core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum()); } - if (core_spill_mask_ == 0u) { - // Nothing to unspill. - } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) { - // Unspilling only low regs and/or PC, use 16-bit POP. - constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8; - NewLIR1(kThumbPop, - (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) | - ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift)); - } else if (IsPowerOfTwo(core_spill_mask_)) { - // kThumb2Pop cannot be used to unspill a single register. - NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_)); - } else { - NewLIR1(kThumb2Pop, core_spill_mask_); + if (core_spill_mask_ != 0u) { + if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) { + // Unspilling only low regs and/or PC, use 16-bit POP. + constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8; + NewLIR1(kThumbPop, + (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) | + ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift)); + } else if (IsPowerOfTwo(core_spill_mask_)) { + // kThumb2Pop cannot be used to unspill a single register. + NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_)); + } else { + NewLIR1(kThumb2Pop, core_spill_mask_); + } + // If we pop to PC, there is no further epilogue code. + if (!unspill_LR_to_PC) { + cfi_.AdjustCFAOffset(-num_core_spills_ * kArmPointerSize); + cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_); + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); // empty stack. + } } - if ((core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum())) == 0) { + if (!unspill_LR_to_PC) { /* We didn't pop to rARM_PC, so must do a bv rARM_LR */ NewLIR1(kThumbBx, rs_rARM_LR.GetReg()); } + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size_); } void ArmMir2Lir::GenSpecialExitSequence() { @@ -556,11 +598,16 @@ void ArmMir2Lir::GenSpecialEntryForSuspend() { NewLIR1(kThumbPush, (1u << rs_r0.GetRegNum()) | // ArtMethod* (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) | // Spills other than LR. (1u << 8)); // LR encoded for 16-bit push. + cfi_.AdjustCFAOffset(frame_size_); + // Do not generate CFI for scratch register r0. + cfi_.RelOffsetForMany(DwarfCoreReg(0), 4, core_spill_mask_, kArmPointerSize); } void ArmMir2Lir::GenSpecialExitForSuspend() { // Pop the frame. (ArtMethod* no longer needed but restore it anyway.) NewLIR1(kThumb2Pop, (1u << rs_r0.GetRegNum()) | core_spill_mask_); // 32-bit because of LR. + cfi_.AdjustCFAOffset(-frame_size_); + cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_); } static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) { @@ -572,12 +619,12 @@ static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& targe * Bit of a hack here - in the absence of a real scheduling pass, * emit the next instruction in static & direct invoke sequences. */ -static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED, - int state, const MethodReference& target_method, - uint32_t unused_idx ATTRIBUTE_UNUSED, - uintptr_t direct_code, uintptr_t direct_method, - InvokeType type) { - Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); +int ArmMir2Lir::ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED, + int state, const MethodReference& target_method, + uint32_t unused_idx ATTRIBUTE_UNUSED, + uintptr_t direct_code, uintptr_t direct_method, + InvokeType type) { + ArmMir2Lir* cg = static_cast<ArmMir2Lir*>(cu->cg.get()); if (direct_code != 0 && direct_method != 0) { switch (state) { case 0: // Get the current Method* [sets kArg0] @@ -598,17 +645,24 @@ static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSE return -1; } } else { + bool use_pc_rel = cg->CanUseOpPcRelDexCacheArrayLoad(); RegStorage arg0_ref = cg->TargetReg(kArg0, kRef); switch (state) { case 0: // Get the current Method* [sets kArg0] // TUNING: we can save a reg copy if Method* has been promoted. - cg->LoadCurrMethodDirect(arg0_ref); - break; + if (!use_pc_rel) { + cg->LoadCurrMethodDirect(arg0_ref); + break; + } + ++state; + FALLTHROUGH_INTENDED; case 1: // Get method->dex_cache_resolved_methods_ - cg->LoadRefDisp(arg0_ref, - mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), - arg0_ref, - kNotVolatile); + if (!use_pc_rel) { + cg->LoadRefDisp(arg0_ref, + mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), + arg0_ref, + kNotVolatile); + } // Set up direct code if known. if (direct_code != 0) { if (direct_code != static_cast<uintptr_t>(-1)) { @@ -620,14 +674,23 @@ static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSE cg->LoadCodeAddress(target_method, type, kInvokeTgt); } } - break; + if (!use_pc_rel || direct_code != 0) { + break; + } + ++state; + FALLTHROUGH_INTENDED; case 2: // Grab target method* CHECK_EQ(cu->dex_file, target_method.dex_file); - cg->LoadRefDisp(arg0_ref, - mirror::ObjectArray<mirror::Object>::OffsetOfElement( - target_method.dex_method_index).Int32Value(), - arg0_ref, - kNotVolatile); + if (!use_pc_rel) { + cg->LoadRefDisp(arg0_ref, + mirror::ObjectArray<mirror::Object>::OffsetOfElement( + target_method.dex_method_index).Int32Value(), + arg0_ref, + kNotVolatile); + } else { + size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index); + cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, arg0_ref); + } break; case 3: // Grab the code from the method* if (direct_code == 0) { diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 4141bcfe98..83b27df939 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -82,6 +82,9 @@ class ArmMir2Lir FINAL : public Mir2Lir { /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage) void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE; + bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE; + void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) OVERRIDE; + // Required for target - register utilities. RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE; RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) OVERRIDE { @@ -257,6 +260,9 @@ class ArmMir2Lir FINAL : public Mir2Lir { */ LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE; + void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) OVERRIDE; + void DoPromotion() OVERRIDE; + /* * @brief Handle ARM specific literals. */ @@ -300,6 +306,13 @@ class ArmMir2Lir FINAL : public Mir2Lir { ArenaVector<LIR*> call_method_insns_; + // Instructions needing patching with PC relative code addresses. + ArenaVector<LIR*> dex_cache_access_insns_; + + // Register with a reference to the dex cache arrays at dex_cache_arrays_min_offset_, + // if promoted. + RegStorage dex_cache_arrays_base_reg_; + /** * @brief Given float register pair, returns Solo64 float register. * @param reg #RegStorage containing a float register pair (e.g. @c s2 and @c s3). @@ -329,6 +342,14 @@ class ArmMir2Lir FINAL : public Mir2Lir { } int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE; + + static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED, + int state, const MethodReference& target_method, + uint32_t unused_idx ATTRIBUTE_UNUSED, + uintptr_t direct_code, uintptr_t direct_method, + InvokeType type); + + void OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest); }; } // namespace art diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 9193e1b23c..47669db979 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -1087,6 +1087,36 @@ void ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { lir->target = target; } +bool ArmMir2Lir::CanUseOpPcRelDexCacheArrayLoad() const { + return dex_cache_arrays_layout_.Valid(); +} + +void ArmMir2Lir::OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest) { + LIR* movw = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), 0); + LIR* movt = NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), 0); + ArmOpcode add_pc_opcode = (r_dest.GetRegNum() < 8) ? kThumbAddRRLH : kThumbAddRRHH; + LIR* add_pc = NewLIR2(add_pc_opcode, r_dest.GetReg(), rs_rARM_PC.GetReg()); + add_pc->flags.fixup = kFixupLabel; + movw->operands[2] = WrapPointer(dex_file); + movw->operands[3] = offset; + movw->operands[4] = WrapPointer(add_pc); + movt->operands[2] = movw->operands[2]; + movt->operands[3] = movw->operands[3]; + movt->operands[4] = movw->operands[4]; + dex_cache_access_insns_.push_back(movw); + dex_cache_access_insns_.push_back(movt); +} + +void ArmMir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) { + if (dex_cache_arrays_base_reg_.Valid()) { + LoadRefDisp(dex_cache_arrays_base_reg_, offset - dex_cache_arrays_min_offset_, + r_dest, kNotVolatile); + } else { + OpPcRelDexCacheArrayAddr(dex_file, offset, r_dest); + LoadRefDisp(r_dest, 0, r_dest, kNotVolatile); + } +} + LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) { return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count); } diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index 9812d9ff99..5f27338e6b 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -575,7 +575,9 @@ RegisterClass ArmMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatil ArmMir2Lir::ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) : Mir2Lir(cu, mir_graph, arena), - call_method_insns_(arena->Adapter()) { + call_method_insns_(arena->Adapter()), + dex_cache_access_insns_(arena->Adapter()), + dex_cache_arrays_base_reg_(RegStorage::InvalidReg()) { call_method_insns_.reserve(100); // Sanity check - make sure encoding map lines up. for (int i = 0; i < kArmLast; i++) { @@ -901,14 +903,28 @@ RegStorage ArmMir2Lir::AllocPreservedSingle(int s_reg) { } void ArmMir2Lir::InstallLiteralPools() { + patches_.reserve(call_method_insns_.size() + dex_cache_access_insns_.size()); + // PC-relative calls to methods. - patches_.reserve(call_method_insns_.size()); for (LIR* p : call_method_insns_) { - DCHECK_EQ(p->opcode, kThumb2Bl); - uint32_t target_method_idx = p->operands[1]; - const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]); - patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset, - target_dex_file, target_method_idx)); + DCHECK_EQ(p->opcode, kThumb2Bl); + uint32_t target_method_idx = p->operands[1]; + const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]); + patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset, + target_dex_file, target_method_idx)); + } + + // PC-relative dex cache array accesses. + for (LIR* p : dex_cache_access_insns_) { + DCHECK(p->opcode = kThumb2MovImm16 || p->opcode == kThumb2MovImm16H); + const LIR* add_pc = UnwrapPointer<LIR>(p->operands[4]); + DCHECK(add_pc->opcode == kThumbAddRRLH || add_pc->opcode == kThumbAddRRHH); + const DexFile* dex_file = UnwrapPointer<DexFile>(p->operands[2]); + uint32_t offset = p->operands[3]; + DCHECK(!p->flags.is_nop); + DCHECK(!add_pc->flags.is_nop); + patches_.push_back(LinkerPatch::DexCacheArrayPatch(p->offset, + dex_file, add_pc->offset, offset)); } // And do the normal processing. diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index e4bd2a33ae..25ea6941c0 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -19,6 +19,7 @@ #include "arch/arm/instruction_set_features_arm.h" #include "arm_lir.h" #include "base/logging.h" +#include "dex/mir_graph.h" #include "dex/quick/mir_to_lir-inl.h" #include "dex/reg_storage_eq.h" #include "driver/compiler_driver.h" @@ -1266,4 +1267,39 @@ size_t ArmMir2Lir::GetInstructionOffset(LIR* lir) { return offset; } +void ArmMir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) { + // Start with the default counts. + Mir2Lir::CountRefs(core_counts, fp_counts, num_regs); + + if (pc_rel_temp_ != nullptr) { + // Now, if the dex cache array base temp is used only once outside any loops (weight = 1), + // avoid the promotion, otherwise boost the weight by factor 3 because the full PC-relative + // load sequence is 4 instructions long and by promoting the PC base we save up to 3 + // instructions per use. + int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low); + if (core_counts[p_map_idx].count == 1) { + core_counts[p_map_idx].count = 0; + } else { + core_counts[p_map_idx].count *= 3; + } + } +} + +void ArmMir2Lir::DoPromotion() { + if (CanUseOpPcRelDexCacheArrayLoad()) { + pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false); + } + + Mir2Lir::DoPromotion(); + + if (pc_rel_temp_ != nullptr) { + // Now, if the dex cache array base temp is promoted, remember the register but + // always remove the temp's stack location to avoid unnecessarily bloating the stack. + dex_cache_arrays_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg; + DCHECK(!dex_cache_arrays_base_reg_.Valid() || !dex_cache_arrays_base_reg_.IsFloat()); + mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_); + pc_rel_temp_ = nullptr; + } +} + } // namespace art diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index d15412a1bd..f6fa9389d0 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -236,6 +236,7 @@ enum A64Opcode { kA64Add4rrro, // add [00001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0]. kA64Add4RRre, // add [00001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0]. kA64Adr2xd, // adr [0] immlo[30-29] [10000] immhi[23-5] rd[4-0]. + kA64Adrp2xd, // adrp [1] immlo[30-29] [10000] immhi[23-5] rd[4-0]. kA64And3Rrl, // and [00010010] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0]. kA64And4rrro, // and [00001010] shift[23-22] [N=0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0]. kA64Asr3rrd, // asr [0001001100] immr[21-16] imms[15-10] rn[9-5] rd[4-0]. diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index 329bb1e770..2f1ae66bfc 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -131,6 +131,10 @@ const A64EncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP, "adr", "!0x, #!1d", kFixupAdr), + ENCODING_MAP(kA64Adrp2xd, NO_VARIANTS(0x90000000), + kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP, + "adrp", "!0x, #!1d", kFixupLabel), ENCODING_MAP(WIDE(kA64And3Rrl), SF_VARIANTS(0x12000000), kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, @@ -682,7 +686,9 @@ void Arm64Mir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) #define PADDING_NOP (UINT32_C(0xd503201f)) uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { + uint8_t* const write_buffer = write_pos; for (; lir != nullptr; lir = NEXT_LIR(lir)) { + lir->offset = (write_pos - write_buffer); bool opcode_is_wide = IS_WIDE(lir->opcode); A64Opcode opcode = UNWIDE(lir->opcode); diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 823cb60d97..4abbd77d88 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -23,10 +23,12 @@ #include "dex/mir_graph.h" #include "dex/quick/mir_to_lir-inl.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "gc/accounting/card_table.h" #include "entrypoints/quick/quick_entrypoints.h" #include "mirror/art_method.h" #include "mirror/object_array-inl.h" +#include "utils/dex_cache_arrays_layout-inl.h" namespace art { @@ -280,7 +282,13 @@ void Arm64Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) { FreeTemp(reg_card_no); } +static dwarf::Reg DwarfCoreReg(int num) { + return dwarf::Reg::Arm64Core(num); +} + void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); // empty stack. + /* * On entry, x0 to x7 are live. Let the register allocation * mechanism know so it doesn't try to use any of them when @@ -310,8 +318,6 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, kArm64); - NewLIR0(kPseudoMethodEntry); - const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm64); const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes; bool generate_explicit_stack_overflow_check = large_frame || @@ -345,6 +351,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) if (spilled_already != frame_size_) { OpRegImm(kOpSub, rs_sp, frame_size_without_spills); + cfi_.AdjustCFAOffset(frame_size_without_spills); } if (!skip_overflow_check) { @@ -361,12 +368,14 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) GenerateTargetLabel(kPseudoThrowTarget); // Unwinds stack. m2l_->OpRegImm(kOpAdd, rs_sp, sp_displace_); + m2l_->cfi().AdjustCFAOffset(-sp_displace_); m2l_->ClobberCallerSave(); ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow); m2l_->LockTemp(rs_xIP0); m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_xIP0); m2l_->NewLIR1(kA64Br1x, rs_xIP0.GetReg()); m2l_->FreeTemp(rs_xIP0); + m2l_->cfi().AdjustCFAOffset(sp_displace_); } private: @@ -393,19 +402,20 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) } void Arm64Mir2Lir::GenExitSequence() { + cfi_.RememberState(); /* * In the exit path, r0/r1 are live - make sure they aren't * allocated by the register utilities as temps. */ LockTemp(rs_x0); LockTemp(rs_x1); - - NewLIR0(kPseudoMethodExit); - UnspillRegs(rs_sp, core_spill_mask_, fp_spill_mask_, frame_size_); // Finally return. NewLIR0(kA64Ret); + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size_); } void Arm64Mir2Lir::GenSpecialExitSequence() { @@ -422,11 +432,16 @@ void Arm64Mir2Lir::GenSpecialEntryForSuspend() { core_vmap_table_.clear(); fp_vmap_table_.clear(); NewLIR4(WIDE(kA64StpPre4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), -frame_size_ / 8); + cfi_.AdjustCFAOffset(frame_size_); + // Do not generate CFI for scratch register x0. + cfi_.RelOffset(DwarfCoreReg(rxLR), 8); } void Arm64Mir2Lir::GenSpecialExitForSuspend() { // Pop the frame. (ArtMethod* no longer needed but restore it anyway.) NewLIR4(WIDE(kA64LdpPost4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), frame_size_ / 8); + cfi_.AdjustCFAOffset(-frame_size_); + cfi_.Restore(DwarfCoreReg(rxLR)); } static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) { @@ -438,13 +453,13 @@ static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& tar * Bit of a hack here - in the absence of a real scheduling pass, * emit the next instruction in static & direct invoke sequences. */ -static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info, - int state, const MethodReference& target_method, - uint32_t unused_idx, - uintptr_t direct_code, uintptr_t direct_method, - InvokeType type) { +int Arm64Mir2Lir::Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info, + int state, const MethodReference& target_method, + uint32_t unused_idx, + uintptr_t direct_code, uintptr_t direct_method, + InvokeType type) { UNUSED(info, unused_idx); - Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); + Arm64Mir2Lir* cg = static_cast<Arm64Mir2Lir*>(cu->cg.get()); if (direct_code != 0 && direct_method != 0) { switch (state) { case 0: // Get the current Method* [sets kArg0] @@ -465,17 +480,24 @@ static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info, return -1; } } else { + bool use_pc_rel = cg->CanUseOpPcRelDexCacheArrayLoad(); RegStorage arg0_ref = cg->TargetReg(kArg0, kRef); switch (state) { case 0: // Get the current Method* [sets kArg0] // TUNING: we can save a reg copy if Method* has been promoted. - cg->LoadCurrMethodDirect(arg0_ref); - break; + if (!use_pc_rel) { + cg->LoadCurrMethodDirect(arg0_ref); + break; + } + ++state; + FALLTHROUGH_INTENDED; case 1: // Get method->dex_cache_resolved_methods_ - cg->LoadRefDisp(arg0_ref, - mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), - arg0_ref, - kNotVolatile); + if (!use_pc_rel) { + cg->LoadRefDisp(arg0_ref, + mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), + arg0_ref, + kNotVolatile); + } // Set up direct code if known. if (direct_code != 0) { if (direct_code != static_cast<uintptr_t>(-1)) { @@ -487,14 +509,23 @@ static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info, cg->LoadCodeAddress(target_method, type, kInvokeTgt); } } - break; + if (!use_pc_rel || direct_code != 0) { + break; + } + ++state; + FALLTHROUGH_INTENDED; case 2: // Grab target method* CHECK_EQ(cu->dex_file, target_method.dex_file); - cg->LoadRefDisp(arg0_ref, - mirror::ObjectArray<mirror::Object>::OffsetOfElement( - target_method.dex_method_index).Int32Value(), - arg0_ref, - kNotVolatile); + if (!use_pc_rel) { + cg->LoadRefDisp(arg0_ref, + mirror::ObjectArray<mirror::Object>::OffsetOfElement( + target_method.dex_method_index).Int32Value(), + arg0_ref, + kNotVolatile); + } else { + size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index); + cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, arg0_ref); + } break; case 3: // Grab the code from the method* if (direct_code == 0) { diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index 54fd46de0e..8184f02287 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -78,6 +78,9 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage) void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE; + bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE; + void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) OVERRIDE; + LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg, int offset, int check_value, LIR* target, LIR** compare) OVERRIDE; @@ -393,9 +396,16 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_div, int flags); + static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info, + int state, const MethodReference& target_method, + uint32_t unused_idx, + uintptr_t direct_code, uintptr_t direct_method, + InvokeType type); + static const A64EncodingMap EncodingMap[kA64Last]; ArenaVector<LIR*> call_method_insns_; + ArenaVector<LIR*> dex_cache_access_insns_; int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE; }; diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index 2372ccc527..20f61f2261 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -943,6 +943,23 @@ void Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { lir->target = target; } +bool Arm64Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const { + return dex_cache_arrays_layout_.Valid(); +} + +void Arm64Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, + RegStorage r_dest) { + LIR* adrp = NewLIR2(kA64Adrp2xd, r_dest.GetReg(), 0); + adrp->operands[2] = WrapPointer(dex_file); + adrp->operands[3] = offset; + adrp->operands[4] = WrapPointer(adrp); + dex_cache_access_insns_.push_back(adrp); + LIR* ldr = LoadBaseDisp(r_dest, 0, r_dest, kReference, kNotVolatile); + ldr->operands[4] = adrp->operands[4]; + ldr->flags.fixup = kFixupLabel; + dex_cache_access_insns_.push_back(ldr); +} + LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) { UNUSED(r_base, count); LOG(FATAL) << "Unexpected use of OpVldm for Arm64"; @@ -1441,6 +1458,14 @@ static uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) { return reg_mask; } +static dwarf::Reg DwarfCoreReg(int num) { + return dwarf::Reg::Arm64Core(num); +} + +static dwarf::Reg DwarfFpReg(int num) { + return dwarf::Reg::Arm64Fp(num); +} + static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) { int reg1 = -1, reg2 = -1; const int reg_log2_size = 3; @@ -1449,9 +1474,12 @@ static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32 reg_mask = GenPairWise(reg_mask, & reg1, & reg2); if (UNLIKELY(reg2 < 0)) { m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), offset << reg_log2_size); } else { m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().RelOffset(DwarfCoreReg(reg2), offset << reg_log2_size); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), (offset + 1) << reg_log2_size); } } } @@ -1466,9 +1494,12 @@ static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t if (UNLIKELY(reg2 < 0)) { m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().RelOffset(DwarfFpReg(reg1), offset << reg_log2_size); } else { m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().RelOffset(DwarfFpReg(reg2), offset << reg_log2_size); + m2l->cfi().RelOffset(DwarfFpReg(reg1), (offset + 1) << reg_log2_size); } } } @@ -1476,6 +1507,7 @@ static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t static int SpillRegsPreSub(Arm64Mir2Lir* m2l, uint32_t core_reg_mask, uint32_t fp_reg_mask, int frame_size) { m2l->OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size); + m2l->cfi().AdjustCFAOffset(frame_size); int core_count = POPCOUNT(core_reg_mask); @@ -1535,11 +1567,15 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core RegStorage::FloatSolo64(reg1).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize); } else { m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg1).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfFpReg(reg1), 0); cur_offset = 0; // That core reg needs to go into the upper half. } } else { @@ -1547,10 +1583,15 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core fp_reg_mask = GenPairWise(fp_reg_mask, ®1, ®2); m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfFpReg(reg2), 0); + m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize); } else { fp_reg_mask = ExtractReg(fp_reg_mask, ®1); m2l->NewLIR4(WIDE(kA64StpPre4ffXD), rs_d0.GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize); } } } else { @@ -1563,12 +1604,19 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core core_reg_mask = ExtractReg(core_reg_mask, ®1); m2l->NewLIR4(WIDE(kA64StpPre4rrXD), rs_xzr.GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize); } else { core_reg_mask = GenPairWise(core_reg_mask, ®1, ®2); m2l->NewLIR4(WIDE(kA64StpPre4rrXD), RegStorage::Solo64(reg2).GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfCoreReg(reg2), 0); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize); } } + DCHECK_EQ(m2l->cfi().GetCurrentCFAOffset(), + static_cast<int>(all_offset * kArm64PointerSize)); if (fp_count != 0) { for (; fp_reg_mask != 0;) { @@ -1577,10 +1625,13 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core if (UNLIKELY(reg2 < 0)) { m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset); + m2l->cfi().RelOffset(DwarfFpReg(reg1), cur_offset * kArm64PointerSize); // Do not increment offset here, as the second half will be filled by a core reg. } else { m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset); + m2l->cfi().RelOffset(DwarfFpReg(reg2), cur_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfFpReg(reg1), (cur_offset + 1) * kArm64PointerSize); cur_offset += 2; } } @@ -1593,6 +1644,7 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core core_reg_mask = ExtractReg(core_reg_mask, ®1); m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset + 1); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize); cur_offset += 2; // Half-slot filled now. } } @@ -1603,6 +1655,8 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core core_reg_mask = GenPairWise(core_reg_mask, ®1, ®2); m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset); + m2l->cfi().RelOffset(DwarfCoreReg(reg2), cur_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize); } DCHECK_EQ(cur_offset, all_offset); @@ -1633,10 +1687,13 @@ static void UnSpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint reg_mask = GenPairWise(reg_mask, & reg1, & reg2); if (UNLIKELY(reg2 < 0)) { m2l->NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().Restore(DwarfCoreReg(reg1)); } else { DCHECK_LE(offset, 63); m2l->NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().Restore(DwarfCoreReg(reg2)); + m2l->cfi().Restore(DwarfCoreReg(reg1)); } } } @@ -1650,9 +1707,12 @@ static void UnSpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32 if (UNLIKELY(reg2 < 0)) { m2l->NewLIR3(WIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().Restore(DwarfFpReg(reg1)); } else { m2l->NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().Restore(DwarfFpReg(reg2)); + m2l->cfi().Restore(DwarfFpReg(reg1)); } } } @@ -1694,6 +1754,7 @@ void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t early_drop = RoundDown(early_drop, 16); OpRegImm64(kOpAdd, rs_sp, early_drop); + cfi_.AdjustCFAOffset(-early_drop); } // Unspill. @@ -1707,7 +1768,9 @@ void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t } // Drop the (rest of) the frame. - OpRegImm64(kOpAdd, rs_sp, frame_size - early_drop); + int adjust = frame_size - early_drop; + OpRegImm64(kOpAdd, rs_sp, adjust); + cfi_.AdjustCFAOffset(-adjust); } bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) { diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index 09a34bf022..c5c0dc5447 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -606,7 +606,8 @@ RegisterClass Arm64Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volat Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) : Mir2Lir(cu, mir_graph, arena), - call_method_insns_(arena->Adapter()) { + call_method_insns_(arena->Adapter()), + dex_cache_access_insns_(arena->Adapter()) { // Sanity check - make sure encoding map lines up. for (int i = 0; i < kA64Last; i++) { DCHECK_EQ(UNWIDE(Arm64Mir2Lir::EncodingMap[i].opcode), i) @@ -846,8 +847,9 @@ RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(ShortyArg arg) { } void Arm64Mir2Lir::InstallLiteralPools() { + patches_.reserve(call_method_insns_.size() + dex_cache_access_insns_.size()); + // PC-relative calls to methods. - patches_.reserve(call_method_insns_.size()); for (LIR* p : call_method_insns_) { DCHECK_EQ(p->opcode, kA64Bl1t); uint32_t target_method_idx = p->operands[1]; @@ -856,6 +858,18 @@ void Arm64Mir2Lir::InstallLiteralPools() { target_dex_file, target_method_idx)); } + // PC-relative references to dex cache arrays. + for (LIR* p : dex_cache_access_insns_) { + DCHECK(p->opcode == kA64Adrp2xd || p->opcode == kA64Ldr3rXD); + const LIR* adrp = UnwrapPointer<LIR>(p->operands[4]); + DCHECK_EQ(adrp->opcode, kA64Adrp2xd); + const DexFile* dex_file = UnwrapPointer<DexFile>(adrp->operands[2]); + uint32_t offset = adrp->operands[3]; + DCHECK(!p->flags.is_nop); + DCHECK(!adrp->flags.is_nop); + patches_.push_back(LinkerPatch::DexCacheArrayPatch(p->offset, dex_file, adrp->offset, offset)); + } + // And do the normal processing. Mir2Lir::InstallLiteralPools(); } diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc index f48290d6f7..e9ad8ba175 100644 --- a/compiler/dex/quick/arm64/utility_arm64.cc +++ b/compiler/dex/quick/arm64/utility_arm64.cc @@ -589,13 +589,11 @@ LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r DCHECK_EQ(shift, 0); // Binary, but rm is encoded twice. return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg()); - break; case kOpRevsh: // Binary, but rm is encoded twice. NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg()); // "sxth r1, r2" is "sbfm r1, r2, #0, #15" return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), 0, 15); - break; case kOp2Byte: DCHECK_EQ(shift, ENCODE_NO_SHIFT); // "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)". @@ -645,10 +643,9 @@ LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage // Note: intentional fallthrough case kOpSub: return OpRegRegRegExtend(op, r_dest_src1, r_dest_src1, r_src2, ext, amount); - break; default: LOG(FATAL) << "Bad Opcode: " << opcode; - break; + UNREACHABLE(); } DCHECK(!IsPseudoLirOp(opcode)); diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index df72830801..5ea36c2769 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -29,6 +29,7 @@ #include "dex/quick/dex_file_to_method_inliner_map.h" #include "dex/verification_results.h" #include "dex/verified_method.h" +#include "utils/dex_cache_arrays_layout-inl.h" #include "verifier/dex_gc_map.h" #include "verifier/method_verifier.h" #include "vmap_table.h" @@ -202,12 +203,17 @@ void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) { /* Handle pseudo-ops individually, and all regular insns as a group */ switch (lir->opcode) { - case kPseudoMethodEntry: - LOG(INFO) << "-------- method entry " - << PrettyMethod(cu_->method_idx, *cu_->dex_file); + case kPseudoPrologueBegin: + LOG(INFO) << "-------- PrologueBegin"; break; - case kPseudoMethodExit: - LOG(INFO) << "-------- Method_Exit"; + case kPseudoPrologueEnd: + LOG(INFO) << "-------- PrologueEnd"; + break; + case kPseudoEpilogueBegin: + LOG(INFO) << "-------- EpilogueBegin"; + break; + case kPseudoEpilogueEnd: + LOG(INFO) << "-------- EpilogueEnd"; break; case kPseudoBarrier: LOG(INFO) << "-------- BARRIER"; @@ -266,8 +272,9 @@ void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) { lir, base_addr)); std::string op_operands(BuildInsnString(GetTargetInstFmt(lir->opcode), lir, base_addr)); - LOG(INFO) << StringPrintf("%5p: %-9s%s%s", + LOG(INFO) << StringPrintf("%5p|0x%02x: %-9s%s%s", base_addr + offset, + lir->dalvik_offset, op_name.c_str(), op_operands.c_str(), lir->flags.is_nop ? "(nop)" : ""); } @@ -534,13 +541,11 @@ void Mir2Lir::InstallSwitchTables() { DCHECK(tab_rec->anchor->flags.fixup != kFixupNone); bx_offset = tab_rec->anchor->offset + 4; break; - case kX86: - bx_offset = 0; - break; case kX86_64: // RIP relative to switch table. bx_offset = tab_rec->offset; break; + case kX86: case kArm64: case kMips: case kMips64: @@ -712,14 +717,17 @@ void Mir2Lir::CreateMappingTables() { DCHECK_EQ(static_cast<size_t>(write_pos - &encoded_mapping_table_[0]), hdr_data_size); uint8_t* write_pos2 = write_pos + pc2dex_data_size; + bool is_in_prologue_or_epilogue = false; pc2dex_offset = 0u; pc2dex_dalvik_offset = 0u; dex2pc_offset = 0u; dex2pc_dalvik_offset = 0u; for (LIR* tgt_lir = first_lir_insn_; tgt_lir != nullptr; tgt_lir = NEXT_LIR(tgt_lir)) { - if (generate_src_map && !tgt_lir->flags.is_nop) { - src_mapping_table_.push_back(SrcMapElem({tgt_lir->offset, - static_cast<int32_t>(tgt_lir->dalvik_offset)})); + if (generate_src_map && !tgt_lir->flags.is_nop && tgt_lir->opcode >= 0) { + if (!is_in_prologue_or_epilogue) { + src_mapping_table_.push_back(SrcMapElem({tgt_lir->offset, + static_cast<int32_t>(tgt_lir->dalvik_offset)})); + } } if (!tgt_lir->flags.is_nop && (tgt_lir->opcode == kPseudoSafepointPC)) { DCHECK(pc2dex_offset <= tgt_lir->offset); @@ -737,6 +745,12 @@ void Mir2Lir::CreateMappingTables() { dex2pc_offset = tgt_lir->offset; dex2pc_dalvik_offset = tgt_lir->dalvik_offset; } + if (tgt_lir->opcode == kPseudoPrologueBegin || tgt_lir->opcode == kPseudoEpilogueBegin) { + is_in_prologue_or_epilogue = true; + } + if (tgt_lir->opcode == kPseudoPrologueEnd || tgt_lir->opcode == kPseudoEpilogueEnd) { + is_in_prologue_or_epilogue = false; + } } DCHECK_EQ(static_cast<size_t>(write_pos - &encoded_mapping_table_[0]), hdr_data_size + pc2dex_data_size); @@ -1053,6 +1067,12 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena mem_ref_type_(ResourceMask::kHeapRef), mask_cache_(arena), safepoints_(arena->Adapter()), + dex_cache_arrays_layout_(cu->compiler_driver->GetDexCacheArraysLayout(cu->dex_file)), + pc_rel_temp_(nullptr), + dex_cache_arrays_min_offset_(std::numeric_limits<uint32_t>::max()), + cfi_(&last_lir_insn_, + cu->compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols(), + arena), in_to_reg_storage_mapping_(arena) { switch_tables_.reserve(4); fill_array_data_.reserve(4); @@ -1137,14 +1157,6 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { return lhs.LiteralOffset() < rhs.LiteralOffset(); }); - std::unique_ptr<std::vector<uint8_t>> cfi_info( - cu_->compiler_driver->GetCompilerOptions().GetGenerateGDBInformation() ? - ReturnFrameDescriptionEntry() : - nullptr); - ArrayRef<const uint8_t> cfi_ref; - if (cfi_info.get() != nullptr) { - cfi_ref = ArrayRef<const uint8_t>(*cfi_info); - } return CompiledMethod::SwapAllocCompiledMethod( cu_->compiler_driver, cu_->instruction_set, ArrayRef<const uint8_t>(code_buffer_), @@ -1153,8 +1165,8 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { ArrayRef<const uint8_t>(encoded_mapping_table_), ArrayRef<const uint8_t>(vmap_encoder.GetData()), ArrayRef<const uint8_t>(native_gc_map_), - cfi_ref, - ArrayRef<LinkerPatch>(patches_)); + ArrayRef<const uint8_t>(*cfi_.Patch(code_buffer_.size())), + ArrayRef<const LinkerPatch>(patches_)); } size_t Mir2Lir::GetMaxPossibleCompilerTemps() const { @@ -1304,9 +1316,15 @@ void Mir2Lir::LoadClassType(const DexFile& dex_file, uint32_t type_idx, OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target); } -std::vector<uint8_t>* Mir2Lir::ReturnFrameDescriptionEntry() { - // Default case is to do nothing. - return nullptr; +bool Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const { + return false; +} + +void Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file ATTRIBUTE_UNUSED, + int offset ATTRIBUTE_UNUSED, + RegStorage r_dest ATTRIBUTE_UNUSED) { + LOG(FATAL) << "No generic implementation."; + UNREACHABLE(); } RegLocation Mir2Lir::NarrowRegLoc(RegLocation loc) { diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index 8e3f4ef726..4ac6c0c5b5 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -413,6 +413,17 @@ bool DexFileMethodInliner::AnalyseMethodCode(verifier::MethodVerifier* verifier) return success && AddInlineMethod(verifier->GetMethodReference().dex_method_index, method); } +InlineMethodFlags DexFileMethodInliner::IsIntrinsicOrSpecial(uint32_t method_index) { + ReaderMutexLock mu(Thread::Current(), lock_); + auto it = inline_methods_.find(method_index); + if (it != inline_methods_.end()) { + DCHECK_NE(it->second.flags & (kInlineIntrinsic | kInlineSpecial), 0); + return it->second.flags; + } else { + return kNoInlineMethodFlags; + } +} + bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) { ReaderMutexLock mu(Thread::Current(), lock_); auto it = inline_methods_.find(method_index); diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h index cb521da9df..d1e562119c 100644 --- a/compiler/dex/quick/dex_file_method_inliner.h +++ b/compiler/dex/quick/dex_file_method_inliner.h @@ -65,6 +65,11 @@ class DexFileMethodInliner { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_); /** + * Check whether a particular method index corresponds to an intrinsic or special function. + */ + InlineMethodFlags IsIntrinsicOrSpecial(uint32_t method_index) LOCKS_EXCLUDED(lock_); + + /** * Check whether a particular method index corresponds to an intrinsic function. */ bool IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) LOCKS_EXCLUDED(lock_); diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 2bcaaca822..b132c4cc54 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -24,12 +24,14 @@ #include "dex/mir_graph.h" #include "dex/quick/arm/arm_lir.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "entrypoints/quick/quick_entrypoints.h" #include "mirror/array.h" #include "mirror/object_array-inl.h" #include "mirror/object-inl.h" #include "mirror/object_reference.h" #include "utils.h" +#include "utils/dex_cache_arrays_layout-inl.h" #include "verifier/method_verifier.h" namespace art { @@ -56,6 +58,133 @@ ALWAYS_INLINE static inline bool ForceSlowTypePath(CompilationUnit* cu) { return (cu->enable_debug & (1 << kDebugSlowTypePath)) != 0; } +void Mir2Lir::GenIfNullUseHelperImmMethod( + RegStorage r_result, QuickEntrypointEnum trampoline, int imm, RegStorage r_method) { + class CallHelperImmMethodSlowPath : public LIRSlowPath { + public: + CallHelperImmMethodSlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, + QuickEntrypointEnum trampoline_in, int imm_in, + RegStorage r_method_in, RegStorage r_result_in) + : LIRSlowPath(m2l, fromfast, cont), trampoline_(trampoline_in), + imm_(imm_in), r_method_(r_method_in), r_result_(r_result_in) { + } + + void Compile() { + GenerateTargetLabel(); + if (r_method_.Valid()) { + m2l_->CallRuntimeHelperImmReg(trampoline_, imm_, r_method_, true); + } else { + m2l_->CallRuntimeHelperImmMethod(trampoline_, imm_, true); + } + m2l_->OpRegCopy(r_result_, m2l_->TargetReg(kRet0, kRef)); + m2l_->OpUnconditionalBranch(cont_); + } + + private: + QuickEntrypointEnum trampoline_; + const int imm_; + const RegStorage r_method_; + const RegStorage r_result_; + }; + + LIR* branch = OpCmpImmBranch(kCondEq, r_result, 0, NULL); + LIR* cont = NewLIR0(kPseudoTargetLabel); + + AddSlowPath(new (arena_) CallHelperImmMethodSlowPath(this, branch, cont, trampoline, imm, + r_method, r_result)); +} + +RegStorage Mir2Lir::GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info, + int opt_flags) { + DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex); + // May do runtime call so everything to home locations. + FlushAllRegs(); + RegStorage r_base = TargetReg(kArg0, kRef); + LockTemp(r_base); + RegStorage r_method = RegStorage::InvalidReg(); // Loaded lazily, maybe in the slow-path. + if (CanUseOpPcRelDexCacheArrayLoad()) { + uint32_t offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex()); + OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, r_base); + } else { + // Using fixed register to sync with possible call to runtime support. + r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef)); + LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base, + kNotVolatile); + int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value(); + LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile); + } + // r_base now points at static storage (Class*) or nullptr if the type is not yet resolved. + LIR* unresolved_branch = nullptr; + if (!field_info.IsClassInDexCache() && (opt_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) { + // Check if r_base is nullptr. + unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, nullptr); + } + LIR* uninit_branch = nullptr; + if (!field_info.IsClassInitialized() && (opt_flags & MIR_CLASS_IS_INITIALIZED) == 0) { + // Check if r_base is not yet initialized class. + RegStorage r_tmp = TargetReg(kArg2, kNotWide); + LockTemp(r_tmp); + uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base, + mirror::Class::StatusOffset().Int32Value(), + mirror::Class::kStatusInitialized, nullptr, nullptr); + FreeTemp(r_tmp); + } + if (unresolved_branch != nullptr || uninit_branch != nullptr) { + // + // Slow path to ensure a class is initialized for sget/sput. + // + class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath { + public: + // There are up to two branches to the static field slow path, the "unresolved" when the type + // entry in the dex cache is nullptr, and the "uninit" when the class is not yet initialized. + // At least one will be non-nullptr here, otherwise we wouldn't generate the slow path. + StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index, + RegStorage r_base_in, RegStorage r_method_in) + : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont), + second_branch_(unresolved != nullptr ? uninit : nullptr), + storage_index_(storage_index), r_base_(r_base_in), r_method_(r_method_in) { + } + + void Compile() { + LIR* target = GenerateTargetLabel(); + if (second_branch_ != nullptr) { + second_branch_->target = target; + } + if (r_method_.Valid()) { + // ArtMethod* was loaded in normal path - use it. + m2l_->CallRuntimeHelperImmReg(kQuickInitializeStaticStorage, storage_index_, r_method_, + true); + } else { + // ArtMethod* wasn't loaded in normal path - use a helper that loads it. + m2l_->CallRuntimeHelperImmMethod(kQuickInitializeStaticStorage, storage_index_, true); + } + // Copy helper's result into r_base, a no-op on all but MIPS. + m2l_->OpRegCopy(r_base_, m2l_->TargetReg(kRet0, kRef)); + + m2l_->OpUnconditionalBranch(cont_); + } + + private: + // Second branch to the slow path, or nullptr if there's only one branch. + LIR* const second_branch_; + + const int storage_index_; + const RegStorage r_base_; + RegStorage r_method_; + }; + + // The slow path is invoked if the r_base is nullptr or the class pointed + // to by it is not initialized. + LIR* cont = NewLIR0(kPseudoTargetLabel); + AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont, + field_info.StorageIndex(), r_base, r_method)); + } + if (IsTemp(r_method)) { + FreeTemp(r_method); + } + return r_base; +} + /* * Generate a kPseudoBarrier marker to indicate the boundary of special * blocks. @@ -571,41 +700,6 @@ void Mir2Lir::GenFillArrayData(MIR* mir, DexOffset table_offset, RegLocation rl_ CallRuntimeHelperImmRegLocation(kQuickHandleFillArrayData, table_offset_from_start, rl_src, true); } -// -// Slow path to ensure a class is initialized for sget/sput. -// -class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath { - public: - // There are up to two branches to the static field slow path, the "unresolved" when the type - // entry in the dex cache is null, and the "uninit" when the class is not yet initialized. - // At least one will be non-null here, otherwise we wouldn't generate the slow path. - StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index, - RegStorage r_base) - : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont), - second_branch_(unresolved != nullptr ? uninit : nullptr), - storage_index_(storage_index), r_base_(r_base) { - } - - void Compile() { - LIR* target = GenerateTargetLabel(); - if (second_branch_ != nullptr) { - second_branch_->target = target; - } - m2l_->CallRuntimeHelperImm(kQuickInitializeStaticStorage, storage_index_, true); - // Copy helper's result into r_base, a no-op on all but MIPS. - m2l_->OpRegCopy(r_base_, m2l_->TargetReg(kRet0, kRef)); - - m2l_->OpUnconditionalBranch(cont_); - } - - private: - // Second branch to the slow path, or null if there's only one branch. - LIR* const second_branch_; - - const int storage_index_; - const RegStorage r_base_; -}; - void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, OpSize size) { const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir); DCHECK_EQ(SPutMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType()); @@ -615,65 +709,23 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, OpSize size) { RegStorage r_base; if (field_info.IsReferrersClass()) { // Fast path, static storage base is this method's class - RegLocation rl_method = LoadCurrMethod(); r_base = AllocTempRef(); - LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base, + RegStorage r_method = LoadCurrMethodWithHint(r_base); + LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base, kNotVolatile); - if (IsTemp(rl_method.reg)) { - FreeTemp(rl_method.reg); - } } else { // Medium path, static storage base in a different class which requires checks that the other // class is initialized. - // TODO: remove initialized check now that we are initializing classes in the compiler driver. - DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex); - // May do runtime call so everything to home locations. - FlushAllRegs(); - // Using fixed register to sync with possible call to runtime support. - RegStorage r_method = TargetReg(kArg1, kRef); - LockTemp(r_method); - LoadCurrMethodDirect(r_method); - r_base = TargetReg(kArg0, kRef); - LockTemp(r_base); - LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base, - kNotVolatile); - int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value(); - LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile); - // r_base now points at static storage (Class*) or NULL if the type is not yet resolved. - LIR* unresolved_branch = nullptr; - if (!field_info.IsClassInDexCache() && - (mir->optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) { - // Check if r_base is NULL. - unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL); - } - LIR* uninit_branch = nullptr; + r_base = GenGetOtherTypeForSgetSput(field_info, mir->optimization_flags); if (!field_info.IsClassInitialized() && (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0) { - // Check if r_base is not yet initialized class. - RegStorage r_tmp = TargetReg(kArg2, kNotWide); - LockTemp(r_tmp); - uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base, - mirror::Class::StatusOffset().Int32Value(), - mirror::Class::kStatusInitialized, nullptr, nullptr); - FreeTemp(r_tmp); - } - if (unresolved_branch != nullptr || uninit_branch != nullptr) { - // The slow path is invoked if the r_base is NULL or the class pointed - // to by it is not initialized. - LIR* cont = NewLIR0(kPseudoTargetLabel); - AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont, - field_info.StorageIndex(), r_base)); - - if (uninit_branch != nullptr) { - // Ensure load of status and store of value don't re-order. - // TODO: Presumably the actual value store is control-dependent on the status load, - // and will thus not be reordered in any case, since stores are never speculated. - // Does later code "know" that the class is now initialized? If so, we still - // need the barrier to guard later static loads. - GenMemBarrier(kLoadAny); - } + // Ensure load of status and store of value don't re-order. + // TODO: Presumably the actual value store is control-dependent on the status load, + // and will thus not be reordered in any case, since stores are never speculated. + // Does later code "know" that the class is now initialized? If so, we still + // need the barrier to guard later static loads. + GenMemBarrier(kLoadAny); } - FreeTemp(r_method); } // rBase now holds static storage base RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile()); @@ -735,57 +787,19 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, OpSize size, Primitive::Typ RegStorage r_base; if (field_info.IsReferrersClass()) { // Fast path, static storage base is this method's class - RegLocation rl_method = LoadCurrMethod(); r_base = AllocTempRef(); - LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base, + RegStorage r_method = LoadCurrMethodWithHint(r_base); + LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base, kNotVolatile); } else { // Medium path, static storage base in a different class which requires checks that the other // class is initialized - DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex); - // May do runtime call so everything to home locations. - FlushAllRegs(); - // Using fixed register to sync with possible call to runtime support. - RegStorage r_method = TargetReg(kArg1, kRef); - LockTemp(r_method); - LoadCurrMethodDirect(r_method); - r_base = TargetReg(kArg0, kRef); - LockTemp(r_base); - LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base, - kNotVolatile); - int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value(); - LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile); - // r_base now points at static storage (Class*) or NULL if the type is not yet resolved. - LIR* unresolved_branch = nullptr; - if (!field_info.IsClassInDexCache() && - (mir->optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) { - // Check if r_base is NULL. - unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL); - } - LIR* uninit_branch = nullptr; + r_base = GenGetOtherTypeForSgetSput(field_info, mir->optimization_flags); if (!field_info.IsClassInitialized() && (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0) { - // Check if r_base is not yet initialized class. - RegStorage r_tmp = TargetReg(kArg2, kNotWide); - LockTemp(r_tmp); - uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base, - mirror::Class::StatusOffset().Int32Value(), - mirror::Class::kStatusInitialized, nullptr, nullptr); - FreeTemp(r_tmp); + // Ensure load of status and load of value don't re-order. + GenMemBarrier(kLoadAny); } - if (unresolved_branch != nullptr || uninit_branch != nullptr) { - // The slow path is invoked if the r_base is NULL or the class pointed - // to by it is not initialized. - LIR* cont = NewLIR0(kPseudoTargetLabel); - AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont, - field_info.StorageIndex(), r_base)); - - if (uninit_branch != nullptr) { - // Ensure load of status and load of value don't re-order. - GenMemBarrier(kLoadAny); - } - } - FreeTemp(r_method); } // r_base now holds static storage base RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile()); @@ -1022,64 +1036,41 @@ void Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl } void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) { - RegLocation rl_method = LoadCurrMethod(); - CheckRegLocation(rl_method); - RegStorage res_reg = AllocTempRef(); + RegLocation rl_result; if (!cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file, type_idx)) { // Call out to helper which resolves type and verifies access. // Resolved type returned in kRet0. - CallRuntimeHelperImmReg(kQuickInitializeTypeAndVerifyAccess, type_idx, rl_method.reg, true); - RegLocation rl_result = GetReturn(kRefReg); - StoreValue(rl_dest, rl_result); + CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true); + rl_result = GetReturn(kRefReg); } else { - RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true); - // We're don't need access checks, load type from dex cache - int32_t dex_cache_offset = - mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(); - LoadRefDisp(rl_method.reg, dex_cache_offset, res_reg, kNotVolatile); - int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); - LoadRefDisp(res_reg, offset_of_type, rl_result.reg, kNotVolatile); + rl_result = EvalLoc(rl_dest, kRefReg, true); + // We don't need access checks, load type from dex cache + RegStorage r_method = RegStorage::InvalidReg(); + if (CanUseOpPcRelDexCacheArrayLoad()) { + size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx); + OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, rl_result.reg); + } else { + RegLocation rl_method = LoadCurrMethod(); + CheckRegLocation(rl_method); + r_method = rl_method.reg; + int32_t dex_cache_offset = + mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(); + RegStorage res_reg = AllocTempRef(); + LoadRefDisp(r_method, dex_cache_offset, res_reg, kNotVolatile); + int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); + LoadRefDisp(res_reg, offset_of_type, rl_result.reg, kNotVolatile); + FreeTemp(res_reg); + } if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx) || ForceSlowTypePath(cu_)) { // Slow path, at runtime test if type is null and if so initialize FlushAllRegs(); - LIR* branch = OpCmpImmBranch(kCondEq, rl_result.reg, 0, NULL); - LIR* cont = NewLIR0(kPseudoTargetLabel); - - // Object to generate the slow path for class resolution. - class SlowPath : public LIRSlowPath { - public: - SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont_in, const int type_idx_in, - const RegLocation& rl_method_in, const RegLocation& rl_result_in) - : LIRSlowPath(m2l, fromfast, cont_in), - type_idx_(type_idx_in), rl_method_(rl_method_in), rl_result_(rl_result_in) { - } - - void Compile() { - GenerateTargetLabel(); - - m2l_->CallRuntimeHelperImmReg(kQuickInitializeType, type_idx_, rl_method_.reg, true); - m2l_->OpRegCopy(rl_result_.reg, m2l_->TargetReg(kRet0, kRef)); - m2l_->OpUnconditionalBranch(cont_); - } - - private: - const int type_idx_; - const RegLocation rl_method_; - const RegLocation rl_result_; - }; - - // Add to list for future. - AddSlowPath(new (arena_) SlowPath(this, branch, cont, type_idx, rl_method, rl_result)); - - StoreValue(rl_dest, rl_result); - } else { - // Fast path, we're done - just store result - StoreValue(rl_dest, rl_result); + GenIfNullUseHelperImmMethod(rl_result.reg, kQuickInitializeType, type_idx, r_method); } } + StoreValue(rl_dest, rl_result); } void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) { @@ -1092,64 +1083,42 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) { FlushAllRegs(); LockCallTemps(); // Using explicit registers - // If the Method* is already in a register, we can save a copy. - RegLocation rl_method = mir_graph_->GetMethodLoc(); - RegStorage r_method; - if (rl_method.location == kLocPhysReg) { - // A temp would conflict with register use below. - DCHECK(!IsTemp(rl_method.reg)); - r_method = rl_method.reg; - } else { - r_method = TargetReg(kArg2, kRef); - LoadCurrMethodDirect(r_method); - } - // Method to declaring class. - LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), - TargetReg(kArg0, kRef), kNotVolatile); - // Declaring class to dex cache strings. - LoadRefDisp(TargetReg(kArg0, kRef), mirror::Class::DexCacheStringsOffset().Int32Value(), - TargetReg(kArg0, kRef), kNotVolatile); - // Might call out to helper, which will return resolved string in kRet0 - LoadRefDisp(TargetReg(kArg0, kRef), offset_of_string, TargetReg(kRet0, kRef), kNotVolatile); - LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0, kRef), 0, NULL); - LIR* cont = NewLIR0(kPseudoTargetLabel); - - { - // Object to generate the slow path for string resolution. - class SlowPath : public LIRSlowPath { - public: - SlowPath(Mir2Lir* m2l, LIR* fromfast_in, LIR* cont_in, RegStorage r_method_in, - int32_t string_idx_in) - : LIRSlowPath(m2l, fromfast_in, cont_in), - r_method_(r_method_in), string_idx_(string_idx_in) { - } - - void Compile() { - GenerateTargetLabel(); - m2l_->CallRuntimeHelperImmReg(kQuickResolveString, string_idx_, r_method_, true); - m2l_->OpUnconditionalBranch(cont_); - } - - private: - const RegStorage r_method_; - const int32_t string_idx_; - }; - - AddSlowPath(new (arena_) SlowPath(this, fromfast, cont, r_method, string_idx)); + RegStorage ret0 = TargetReg(kRet0, kRef); + RegStorage r_method = RegStorage::InvalidReg(); + if (CanUseOpPcRelDexCacheArrayLoad()) { + size_t offset = dex_cache_arrays_layout_.StringOffset(string_idx); + OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, ret0); + } else { + r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef)); + // Method to declaring class. + RegStorage arg0 = TargetReg(kArg0, kRef); + LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), + arg0, kNotVolatile); + // Declaring class to dex cache strings. + LoadRefDisp(arg0, mirror::Class::DexCacheStringsOffset().Int32Value(), arg0, kNotVolatile); + + LoadRefDisp(arg0, offset_of_string, ret0, kNotVolatile); } + GenIfNullUseHelperImmMethod(ret0, kQuickResolveString, string_idx, r_method); GenBarrier(); StoreValue(rl_dest, GetReturn(kRefReg)); } else { - RegLocation rl_method = LoadCurrMethod(); - RegStorage res_reg = AllocTempRef(); RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true); - LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), res_reg, - kNotVolatile); - LoadRefDisp(res_reg, mirror::Class::DexCacheStringsOffset().Int32Value(), res_reg, - kNotVolatile); - LoadRefDisp(res_reg, offset_of_string, rl_result.reg, kNotVolatile); + if (CanUseOpPcRelDexCacheArrayLoad()) { + size_t offset = dex_cache_arrays_layout_.StringOffset(string_idx); + OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, rl_result.reg); + } else { + RegLocation rl_method = LoadCurrMethod(); + RegStorage res_reg = AllocTempRef(); + LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), res_reg, + kNotVolatile); + LoadRefDisp(res_reg, mirror::Class::DexCacheStringsOffset().Int32Value(), res_reg, + kNotVolatile); + LoadRefDisp(res_reg, offset_of_string, rl_result.reg, kNotVolatile); + FreeTemp(res_reg); + } StoreValue(rl_dest, rl_result); } } @@ -1224,14 +1193,20 @@ void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, Re RegStorage check_class = AllocTypedTemp(false, kRefReg); RegStorage object_class = AllocTypedTemp(false, kRefReg); - LoadCurrMethodDirect(check_class); if (use_declaring_class) { - LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), check_class, + RegStorage r_method = LoadCurrMethodWithHint(check_class); + LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), check_class, + kNotVolatile); + LoadRefDisp(object.reg, mirror::Object::ClassOffset().Int32Value(), object_class, kNotVolatile); + } else if (CanUseOpPcRelDexCacheArrayLoad()) { + size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx); + OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, check_class); LoadRefDisp(object.reg, mirror::Object::ClassOffset().Int32Value(), object_class, kNotVolatile); } else { - LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), + RegStorage r_method = LoadCurrMethodWithHint(check_class); + LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), check_class, kNotVolatile); LoadRefDisp(object.reg, mirror::Object::ClassOffset().Int32Value(), object_class, kNotVolatile); @@ -1267,20 +1242,19 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know FlushAllRegs(); // May generate a call - use explicit registers LockCallTemps(); - RegStorage method_reg = TargetReg(kArg1, kRef); - LoadCurrMethodDirect(method_reg); // kArg1 <= current Method* RegStorage class_reg = TargetReg(kArg2, kRef); // kArg2 will hold the Class* RegStorage ref_reg = TargetReg(kArg0, kRef); // kArg0 will hold the ref. RegStorage ret_reg = GetReturn(kRefReg).reg; if (needs_access_check) { // Check we have access to type_idx and if not throw IllegalAccessError, // returns Class* in kArg0 - CallRuntimeHelperImm(kQuickInitializeTypeAndVerifyAccess, type_idx, true); + CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true); OpRegCopy(class_reg, ret_reg); // Align usage with fast path LoadValueDirectFixed(rl_src, ref_reg); // kArg0 <= ref } else if (use_declaring_class) { + RegStorage r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef)); LoadValueDirectFixed(rl_src, ref_reg); // kArg0 <= ref - LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), + LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), class_reg, kNotVolatile); } else { if (can_assume_type_is_in_dex_cache) { @@ -1288,42 +1262,23 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know LoadValueDirectFixed(rl_src, ref_reg); // kArg0 <= ref } - // Load dex cache entry into class_reg (kArg2) - LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), - class_reg, kNotVolatile); - int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); - LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile); + RegStorage r_method = RegStorage::InvalidReg(); + if (CanUseOpPcRelDexCacheArrayLoad()) { + size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx); + OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, class_reg); + } else { + r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef)); + // Load dex cache entry into class_reg (kArg2) + LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), + class_reg, kNotVolatile); + int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); + LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile); + } if (!can_assume_type_is_in_dex_cache) { - LIR* slow_path_branch = OpCmpImmBranch(kCondEq, class_reg, 0, NULL); - LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); + GenIfNullUseHelperImmMethod(class_reg, kQuickInitializeType, type_idx, r_method); // Should load value here. LoadValueDirectFixed(rl_src, ref_reg); // kArg0 <= ref - - class InitTypeSlowPath : public Mir2Lir::LIRSlowPath { - public: - InitTypeSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont, uint32_t type_idx_in, - RegLocation rl_src_in) - : LIRSlowPath(m2l, branch, cont), type_idx_(type_idx_in), - rl_src_(rl_src_in) { - } - - void Compile() OVERRIDE { - GenerateTargetLabel(); - - m2l_->CallRuntimeHelperImm(kQuickInitializeType, type_idx_, true); - m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kRef), - m2l_->TargetReg(kRet0, kRef)); // Align usage with fast path - m2l_->OpUnconditionalBranch(cont_); - } - - private: - uint32_t type_idx_; - RegLocation rl_src_; - }; - - AddSlowPath(new (arena_) InitTypeSlowPath(this, slow_path_branch, slow_path_target, - type_idx, rl_src)); } } /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result */ @@ -1426,55 +1381,34 @@ void Mir2Lir::GenCheckCast(int opt_flags, uint32_t insn_idx, uint32_t type_idx, FlushAllRegs(); // May generate a call - use explicit registers LockCallTemps(); - RegStorage method_reg = TargetReg(kArg1, kRef); - LoadCurrMethodDirect(method_reg); // kArg1 <= current Method* RegStorage class_reg = TargetReg(kArg2, kRef); // kArg2 will hold the Class* if (needs_access_check) { // Check we have access to type_idx and if not throw IllegalAccessError, // returns Class* in kRet0 // InitializeTypeAndVerifyAccess(idx, method) - CallRuntimeHelperImm(kQuickInitializeTypeAndVerifyAccess, type_idx, true); + CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true); OpRegCopy(class_reg, TargetReg(kRet0, kRef)); // Align usage with fast path } else if (use_declaring_class) { + RegStorage method_reg = LoadCurrMethodWithHint(TargetReg(kArg1, kRef)); LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), class_reg, kNotVolatile); } else { // Load dex cache entry into class_reg (kArg2) - LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), - class_reg, kNotVolatile); - int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); - LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile); + RegStorage r_method = RegStorage::InvalidReg(); + if (CanUseOpPcRelDexCacheArrayLoad()) { + size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx); + OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, class_reg); + } else { + r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef)); + + LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), + class_reg, kNotVolatile); + int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); + LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile); + } if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx)) { // Need to test presence of type in dex cache at runtime - LIR* hop_branch = OpCmpImmBranch(kCondEq, class_reg, 0, NULL); - LIR* cont = NewLIR0(kPseudoTargetLabel); - - // Slow path to initialize the type. Executed if the type is NULL. - class SlowPath : public LIRSlowPath { - public: - SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont_in, const int type_idx_in, - const RegStorage class_reg_in) - : LIRSlowPath(m2l, fromfast, cont_in), - type_idx_(type_idx_in), class_reg_(class_reg_in) { - } - - void Compile() { - GenerateTargetLabel(); - - // Call out to helper, which will return resolved type in kArg0 - // InitializeTypeFromCode(idx, method) - m2l_->CallRuntimeHelperImmReg(kQuickInitializeType, type_idx_, - m2l_->TargetReg(kArg1, kRef), true); - m2l_->OpRegCopy(class_reg_, m2l_->TargetReg(kRet0, kRef)); // Align usage with fast path - m2l_->OpUnconditionalBranch(cont_); - } - - public: - const int type_idx_; - const RegStorage class_reg_; - }; - - AddSlowPath(new (arena_) SlowPath(this, hop_branch, cont, type_idx, class_reg)); + GenIfNullUseHelperImmMethod(class_reg, kQuickInitializeType, type_idx, r_method); } } // At this point, class_reg (kArg2) has class diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 2d41ba1795..db7095dafb 100755 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -24,6 +24,7 @@ #include "dex/quick/dex_file_to_method_inliner_map.h" #include "dex_file-inl.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "entrypoints/quick/quick_entrypoints.h" #include "invoke_type.h" #include "mirror/array.h" @@ -1434,10 +1435,12 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long, void Mir2Lir::GenInvoke(CallInfo* info) { DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr); - const DexFile* dex_file = info->method_ref.dex_file; - if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(dex_file) - ->GenIntrinsic(this, info)) { - return; + if (mir_graph_->GetMethodLoweringInfo(info->mir).IsIntrinsic()) { + const DexFile* dex_file = info->method_ref.dex_file; + auto* inliner = cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(dex_file); + if (inliner->GenIntrinsic(this, info)) { + return; + } } GenInvokeNoInline(info); } diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc index b71691f20a..54e5742837 100644 --- a/compiler/dex/quick/gen_loadstore.cc +++ b/compiler/dex/quick/gen_loadstore.cc @@ -340,6 +340,20 @@ void Mir2Lir::LoadCurrMethodDirect(RegStorage r_tgt) { LoadValueDirectFixed(mir_graph_->GetMethodLoc(), r_tgt); } +RegStorage Mir2Lir::LoadCurrMethodWithHint(RegStorage r_hint) { + // If the method is promoted to a register, return that register, otherwise load it to r_hint. + // (Replacement for LoadCurrMethod() usually used when LockCallTemps() is in effect.) + DCHECK(r_hint.Valid()); + RegLocation rl_method = mir_graph_->GetMethodLoc(); + if (rl_method.location == kLocPhysReg) { + DCHECK(!IsTemp(rl_method.reg)); + return rl_method.reg; + } else { + LoadCurrMethodDirect(r_hint); + return r_hint; + } +} + RegLocation Mir2Lir::LoadCurrMethod() { return LoadValue(mir_graph_->GetMethodLoc(), kRefReg); } diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc b/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc new file mode 100644 index 0000000000..5cfb0ff557 --- /dev/null +++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lazy_debug_frame_opcode_writer.h" +#include "mir_to_lir.h" + +namespace art { +namespace dwarf { + +const ArenaVector<uint8_t>* LazyDebugFrameOpCodeWriter::Patch(size_t code_size) { + if (!this->enabled_) { + DCHECK(this->data()->empty()); + return this->data(); + } + if (!patched_) { + patched_ = true; + // Move our data buffer to temporary variable. + ArenaVector<uint8_t> old_opcodes(this->opcodes_.get_allocator()); + old_opcodes.swap(this->opcodes_); + // Refill our data buffer with patched opcodes. + this->opcodes_.reserve(old_opcodes.size() + advances_.size() + 4); + size_t pos = 0; + for (auto advance : advances_) { + DCHECK_GE(advance.pos, pos); + // Copy old data up to the point when advance was issued. + this->opcodes_.insert(this->opcodes_.end(), + old_opcodes.begin() + pos, + old_opcodes.begin() + advance.pos); + pos = advance.pos; + // This may be null if there is no slow-path code after return. + LIR* next_lir = NEXT_LIR(advance.last_lir_insn); + // Insert the advance command with its final offset. + Base::AdvancePC(next_lir != nullptr ? next_lir->offset : code_size); + } + // Copy the final segment. + this->opcodes_.insert(this->opcodes_.end(), + old_opcodes.begin() + pos, + old_opcodes.end()); + Base::AdvancePC(code_size); + } + return this->data(); +} + +} // namespace dwarf +} // namespace art diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.h b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h new file mode 100644 index 0000000000..94ffd7f957 --- /dev/null +++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_ +#define ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_ + +#include "base/arena_allocator.h" +#include "base/arena_containers.h" +#include "dwarf/debug_frame_opcode_writer.h" + +namespace art { +struct LIR; +namespace dwarf { + +// When we are generating the CFI code, we do not know the instuction offsets, +// this class stores the LIR references and patches the instruction stream later. +class LazyDebugFrameOpCodeWriter FINAL + : public DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> { + typedef DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> Base; + public: + // This method is implicitely called the by opcode writers. + virtual void ImplicitlyAdvancePC() OVERRIDE { + DCHECK_EQ(patched_, false); + DCHECK_EQ(this->current_pc_, 0); + advances_.push_back({this->data()->size(), *last_lir_insn_}); + } + + const ArenaVector<uint8_t>* Patch(size_t code_size); + + explicit LazyDebugFrameOpCodeWriter(LIR** last_lir_insn, bool enable_writes, + ArenaAllocator* allocator) + : Base(enable_writes, allocator->Adapter()), + last_lir_insn_(last_lir_insn), + advances_(allocator->Adapter()), + patched_(false) { + } + + private: + typedef struct { + size_t pos; + LIR* last_lir_insn; + } Advance; + + using Base::data; // Hidden. Use Patch method instead. + + LIR** last_lir_insn_; + ArenaVector<Advance> advances_; + bool patched_; + + DISALLOW_COPY_AND_ASSIGN(LazyDebugFrameOpCodeWriter); +}; + +} // namespace dwarf +} // namespace art + +#endif // ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_ diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc index e5738998a0..6cdf56773e 100644 --- a/compiler/dex/quick/local_optimizations.cc +++ b/compiler/dex/quick/local_optimizations.cc @@ -493,15 +493,14 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { /* Found a slot to hoist to */ if (slot >= 0) { LIR* cur_lir = prev_inst_list[slot]; - LIR* new_load_lir = - static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR)); - *new_load_lir = *this_lir; + LIR* prev_lir = PREV_LIR(this_lir); + UnlinkLIR(this_lir); /* * Insertion is guaranteed to succeed since check_lir * is never the first LIR on the list */ - InsertLIRBefore(cur_lir, new_load_lir); - NopLIR(this_lir); + InsertLIRBefore(cur_lir, this_lir); + this_lir = prev_lir; // Continue the loop with the next LIR. } } } diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc index de66b35418..05570e4bde 100644 --- a/compiler/dex/quick/mips/call_mips.cc +++ b/compiler/dex/quick/mips/call_mips.cc @@ -238,7 +238,12 @@ void MipsMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) { FreeTemp(reg_card_no); } +static dwarf::Reg DwarfCoreReg(int num) { + return dwarf::Reg::MipsCore(num); +} + void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); int spill_count = num_core_spills_ + num_fp_spills_; /* * On entry, A0, A1, A2 & A3 are live. On Mips64, A4, A5, A6 & A7 are also live. @@ -275,7 +280,6 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) */ skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, target); - NewLIR0(kPseudoMethodEntry); RegStorage check_reg = AllocPtrSizeTemp(); RegStorage new_sp = AllocPtrSizeTemp(); const RegStorage rs_sp = TargetPtrReg(kSp); @@ -305,10 +309,12 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) // RA is offset 0 since we push in reverse order. m2l_->LoadWordDisp(m2l_->TargetPtrReg(kSp), 0, m2l_->TargetPtrReg(kLr)); m2l_->OpRegImm(kOpAdd, m2l_->TargetPtrReg(kSp), sp_displace_); + m2l_->cfi().AdjustCFAOffset(-sp_displace_); m2l_->ClobberCallerSave(); RegStorage r_tgt = m2l_->CallHelperSetup(kQuickThrowStackOverflow); // Doesn't clobber LR. m2l_->CallHelper(r_tgt, kQuickThrowStackOverflow, false /* MarkSafepointPC */, false /* UseLink */); + m2l_->cfi().AdjustCFAOffset(sp_displace_); } private: @@ -319,8 +325,10 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_count * ptr_size)); // TODO: avoid copy for small frame sizes. OpRegCopy(rs_sp, new_sp); // Establish stack. + cfi_.AdjustCFAOffset(frame_sub); } else { OpRegImm(kOpSub, rs_sp, frame_sub); + cfi_.AdjustCFAOffset(frame_sub); } FlushIns(ArgLocs, rl_method); @@ -338,6 +346,7 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) } void MipsMir2Lir::GenExitSequence() { + cfi_.RememberState(); /* * In the exit path, rMIPS_RET0/rMIPS_RET1 are live - make sure they aren't * allocated by the register utilities as temps. @@ -345,9 +354,11 @@ void MipsMir2Lir::GenExitSequence() { LockTemp(TargetPtrReg(kRet0)); LockTemp(TargetPtrReg(kRet1)); - NewLIR0(kPseudoMethodExit); UnSpillCoreRegs(); OpReg(kOpBx, TargetPtrReg(kLr)); + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size_); } void MipsMir2Lir::GenSpecialExitSequence() { @@ -366,15 +377,20 @@ void MipsMir2Lir::GenSpecialEntryForSuspend() { fp_vmap_table_.clear(); const RegStorage rs_sp = TargetPtrReg(kSp); OpRegImm(kOpSub, rs_sp, frame_size_); + cfi_.AdjustCFAOffset(frame_size_); StoreWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr)); + cfi_.RelOffset(DwarfCoreReg(rRA), frame_size_ - (cu_->target64 ? 8 : 4)); StoreWordDisp(rs_sp, 0, TargetPtrReg(kArg0)); + // Do not generate CFI for scratch register A0. } void MipsMir2Lir::GenSpecialExitForSuspend() { // Pop the frame. Don't pop ArtMethod*, it's no longer needed. const RegStorage rs_sp = TargetPtrReg(kSp); LoadWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr)); + cfi_.Restore(DwarfCoreReg(rRA)); OpRegImm(kOpAdd, rs_sp, frame_size_); + cfi_.AdjustCFAOffset(-frame_size_); } /* @@ -387,73 +403,73 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED, Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); if (direct_code != 0 && direct_method != 0) { switch (state) { - case 0: // Get the current Method* [sets kArg0] - if (direct_code != static_cast<uintptr_t>(-1)) { - if (cu->target64) { - cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code); + case 0: // Get the current Method* [sets kArg0] + if (direct_code != static_cast<uintptr_t>(-1)) { + if (cu->target64) { + cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code); + } else { + cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code); + } } else { - cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code); + cg->LoadCodeAddress(target_method, type, kInvokeTgt); } - } else { - cg->LoadCodeAddress(target_method, type, kInvokeTgt); - } - if (direct_method != static_cast<uintptr_t>(-1)) { - if (cu->target64) { - cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method); + if (direct_method != static_cast<uintptr_t>(-1)) { + if (cu->target64) { + cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method); + } else { + cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method); + } } else { - cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method); + cg->LoadMethodAddress(target_method, type, kArg0); } - } else { - cg->LoadMethodAddress(target_method, type, kArg0); - } - break; - default: - return -1; + break; + default: + return -1; } } else { RegStorage arg0_ref = cg->TargetReg(kArg0, kRef); switch (state) { - case 0: // Get the current Method* [sets kArg0] - // TUNING: we can save a reg copy if Method* has been promoted. - cg->LoadCurrMethodDirect(arg0_ref); - break; - case 1: // Get method->dex_cache_resolved_methods_ - cg->LoadRefDisp(arg0_ref, - mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), - arg0_ref, - kNotVolatile); - // Set up direct code if known. - if (direct_code != 0) { - if (direct_code != static_cast<uintptr_t>(-1)) { - if (cu->target64) { - cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code); + case 0: // Get the current Method* [sets kArg0] + // TUNING: we can save a reg copy if Method* has been promoted. + cg->LoadCurrMethodDirect(arg0_ref); + break; + case 1: // Get method->dex_cache_resolved_methods_ + cg->LoadRefDisp(arg0_ref, + mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), + arg0_ref, + kNotVolatile); + // Set up direct code if known. + if (direct_code != 0) { + if (direct_code != static_cast<uintptr_t>(-1)) { + if (cu->target64) { + cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code); + } else { + cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code); + } } else { - cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code); + CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds()); + cg->LoadCodeAddress(target_method, type, kInvokeTgt); } - } else { - CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds()); - cg->LoadCodeAddress(target_method, type, kInvokeTgt); } - } - break; - case 2: // Grab target method* - CHECK_EQ(cu->dex_file, target_method.dex_file); - cg->LoadRefDisp(arg0_ref, - mirror::ObjectArray<mirror::Object>:: - OffsetOfElement(target_method.dex_method_index).Int32Value(), - arg0_ref, - kNotVolatile); - break; - case 3: // Grab the code from the method* - if (direct_code == 0) { - int32_t offset = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - InstructionSetPointerSize(cu->instruction_set)).Int32Value(); - // Get the compiled code address [use *alt_from or kArg0, set kInvokeTgt] - cg->LoadWordDisp(arg0_ref, offset, cg->TargetPtrReg(kInvokeTgt)); - } - break; - default: - return -1; + break; + case 2: // Grab target method* + CHECK_EQ(cu->dex_file, target_method.dex_file); + cg->LoadRefDisp(arg0_ref, + mirror::ObjectArray<mirror::Object>:: + OffsetOfElement(target_method.dex_method_index).Int32Value(), + arg0_ref, + kNotVolatile); + break; + case 3: // Grab the code from the method* + if (direct_code == 0) { + int32_t offset = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + InstructionSetPointerSize(cu->instruction_set)).Int32Value(); + // Get the compiled code address [use *alt_from or kArg0, set kInvokeTgt] + cg->LoadWordDisp(arg0_ref, offset, cg->TargetPtrReg(kInvokeTgt)); + } + break; + default: + return -1; } } return state + 1; diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc index 626b36ea28..1ca8bb618b 100644 --- a/compiler/dex/quick/mips/int_mips.cc +++ b/compiler/dex/quick/mips/int_mips.cc @@ -237,12 +237,12 @@ void MipsMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { // note the operands are swapped for the mtc1 and mthc1 instr. // Here if dest is fp reg and src is core reg. if (fpuIs32Bit_) { - NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg()); - NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg()); + NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg()); + NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg()); } else { - r_dest = Fp64ToSolo32(r_dest); - NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetReg()); - NewLIR2(kMipsMthc1, r_src.GetHighReg(), r_dest.GetReg()); + r_dest = Fp64ToSolo32(r_dest); + NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetReg()); + NewLIR2(kMipsMthc1, r_src.GetHighReg(), r_dest.GetReg()); } } } else { @@ -309,7 +309,13 @@ RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStor RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) { RegStorage t_reg = AllocTemp(); - NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit); + // lit is guarantee to be a 16-bit constant + if (IsUint<16>(lit)) { + NewLIR3(kMipsOri, t_reg.GetReg(), rZERO, lit); + } else { + // Addiu will sign extend the entire width (32 or 64) of the register. + NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit); + } RegLocation rl_result = GenDivRem(rl_dest, reg1, t_reg, is_div); FreeTemp(t_reg); return rl_result; @@ -815,20 +821,20 @@ void MipsMir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, } OpKind op = kOpBkpt; switch (opcode) { - case Instruction::SHL_LONG: - case Instruction::SHL_LONG_2ADDR: - op = kOpLsl; - break; - case Instruction::SHR_LONG: - case Instruction::SHR_LONG_2ADDR: - op = kOpAsr; - break; - case Instruction::USHR_LONG: - case Instruction::USHR_LONG_2ADDR: - op = kOpLsr; - break; - default: - LOG(FATAL) << "Unexpected case: " << opcode; + case Instruction::SHL_LONG: + case Instruction::SHL_LONG_2ADDR: + op = kOpLsl; + break; + case Instruction::SHR_LONG: + case Instruction::SHR_LONG_2ADDR: + op = kOpAsr; + break; + case Instruction::USHR_LONG: + case Instruction::USHR_LONG_2ADDR: + op = kOpLsr; + break; + default: + LOG(FATAL) << "Unexpected case: " << opcode; } rl_shift = LoadValue(rl_shift, kCoreReg); rl_src1 = LoadValueWide(rl_src1, kCoreReg); diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index a94fad7534..4c0bd8378b 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -830,6 +830,10 @@ LIR* MipsMir2Lir::GenAtomic64Store(RegStorage r_base, int displacement, RegStora return OpReg(kOpBlx, r_tgt); } +static dwarf::Reg DwarfCoreReg(int num) { + return dwarf::Reg::MipsCore(num); +} + void MipsMir2Lir::SpillCoreRegs() { if (num_core_spills_ == 0) { return; @@ -839,11 +843,13 @@ void MipsMir2Lir::SpillCoreRegs() { int offset = num_core_spills_ * ptr_size; const RegStorage rs_sp = TargetPtrReg(kSp); OpRegImm(kOpSub, rs_sp, offset); + cfi_.AdjustCFAOffset(offset); for (int reg = 0; mask; mask >>= 1, reg++) { if (mask & 0x1) { offset -= ptr_size; StoreWordDisp(rs_sp, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg)); + cfi_.RelOffset(DwarfCoreReg(reg), offset); } } } @@ -861,9 +867,11 @@ void MipsMir2Lir::UnSpillCoreRegs() { offset -= ptr_size; LoadWordDisp(rs_sp, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg)); + cfi_.Restore(DwarfCoreReg(reg)); } } OpRegImm(kOpAdd, rs_sp, frame_size_); + cfi_.AdjustCFAOffset(-frame_size_); } bool MipsMir2Lir::IsUnconditionalBranch(LIR* lir) { diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc index bf0e0fc78b..8ab542270d 100644 --- a/compiler/dex/quick/mips/utility_mips.cc +++ b/compiler/dex/quick/mips/utility_mips.cc @@ -283,9 +283,9 @@ LIR* MipsMir2Lir::OpReg(OpKind op, RegStorage r_dest_src) { break; case kOpBx: return NewLIR2(kMipsJalr, rZERO, r_dest_src.GetReg()); - break; default: LOG(FATAL) << "Bad case in OpReg"; + UNREACHABLE(); } return NewLIR2(opcode, cu_->target64 ? rRAd : rRA, r_dest_src.GetReg()); } @@ -295,8 +295,8 @@ LIR* MipsMir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { return OpRegRegImm(op, r_dest_src1, r_dest_src1, value); } else { LOG(FATAL) << "Bad case in OpRegImm"; + UNREACHABLE(); } - UNREACHABLE(); } LIR* MipsMir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) { diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 0b480a09c6..961cd4f06b 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -1250,10 +1250,17 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { if (bb->block_type == kEntryBlock) { ResetRegPool(); int start_vreg = mir_graph_->GetFirstInVR(); + AppendLIR(NewLIR0(kPseudoPrologueBegin)); GenEntrySequence(&mir_graph_->reg_location_[start_vreg], mir_graph_->GetMethodLoc()); + AppendLIR(NewLIR0(kPseudoPrologueEnd)); + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_); } else if (bb->block_type == kExitBlock) { ResetRegPool(); + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_); + AppendLIR(NewLIR0(kPseudoEpilogueBegin)); GenExitSequence(); + AppendLIR(NewLIR0(kPseudoEpilogueEnd)); + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_); } for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index cca4e5a30a..db59714742 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -29,9 +29,11 @@ #include "dex/quick/resource_mask.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "invoke_type.h" +#include "lazy_debug_frame_opcode_writer.h" #include "leb128.h" #include "safe_map.h" #include "utils/array_ref.h" +#include "utils/dex_cache_arrays_layout.h" #include "utils/stack_checks.h" namespace art { @@ -134,6 +136,7 @@ class BasicBlock; class BitVector; struct CallInfo; struct CompilationUnit; +struct CompilerTemp; struct InlineMethod; class MIR; struct LIR; @@ -141,6 +144,7 @@ struct RegisterInfo; class DexFileMethodInliner; class MIRGraph; class MirMethodLoweringInfo; +class MirSFieldLoweringInfo; typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int, const MethodReference& target_method, @@ -632,7 +636,7 @@ class Mir2Lir { RegisterClass ShortyToRegClass(char shorty_type); RegisterClass LocToRegClass(RegLocation loc); int ComputeFrameSize(); - virtual void Materialize(); + void Materialize(); virtual CompiledMethod* GetCompiledMethod(); void MarkSafepointPC(LIR* inst); void MarkSafepointPCAfter(LIR* after); @@ -773,9 +777,10 @@ class Mir2Lir { */ virtual RegLocation EvalLoc(RegLocation loc, int reg_class, bool update); - void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs); + virtual void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight); + virtual void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs); void DumpCounts(const RefCounts* arr, int size, const char* msg); - void DoPromotion(); + virtual void DoPromotion(); int VRegOffset(int v_reg); int SRegOffset(int s_reg); RegLocation GetReturnWide(RegisterClass reg_class); @@ -956,6 +961,7 @@ class Mir2Lir { // Shared by all targets - implemented in gen_loadstore.cc. RegLocation LoadCurrMethod(); void LoadCurrMethodDirect(RegStorage r_tgt); + RegStorage LoadCurrMethodWithHint(RegStorage r_hint); virtual LIR* LoadConstant(RegStorage r_dest, int value); // Natural word size. LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) { @@ -1093,6 +1099,18 @@ class Mir2Lir { virtual void LoadClassType(const DexFile& dex_file, uint32_t type_idx, SpecialTargetRegister symbolic_reg); + // TODO: Support PC-relative dex cache array loads on all platforms and + // replace CanUseOpPcRelDexCacheArrayLoad() with dex_cache_arrays_layout_.Valid(). + virtual bool CanUseOpPcRelDexCacheArrayLoad() const; + + /* + * @brief Load an element of one of the dex cache arrays. + * @param dex_file the dex file associated with the target dex cache. + * @param offset the offset of the element in the fixed dex cache arrays' layout. + * @param r_dest the register where to load the element. + */ + virtual void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest); + // Routines that work for the generic case, but may be overriden by target. /* * @brief Compare memory to immediate, and branch if condition true. @@ -1491,6 +1509,12 @@ class Mir2Lir { return 0; } + /** + * @brief Buffer of DWARF's Call Frame Information opcodes. + * @details It is used by debuggers and other tools to unwind the call stack. + */ + dwarf::LazyDebugFrameOpCodeWriter& cfi() { return cfi_; } + protected: Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); @@ -1556,11 +1580,6 @@ class Mir2Lir { bool can_assume_type_is_in_dex_cache, uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src); - /* - * @brief Generate the eh_frame FDE information if possible. - * @returns pointer to vector containg FDE information, or NULL. - */ - virtual std::vector<uint8_t>* ReturnFrameDescriptionEntry(); /** * @brief Used to insert marker that can be used to associate MIR with LIR. @@ -1596,7 +1615,6 @@ class Mir2Lir { */ virtual bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special); - protected: void ClobberBody(RegisterInfo* p); void SetCurrentDexPc(DexOffset dexpc) { current_dalvik_offset_ = dexpc; @@ -1669,6 +1687,23 @@ class Mir2Lir { */ bool GenSpecialIdentity(MIR* mir, const InlineMethod& special); + /** + * @brief Generate code to check if result is null and, if it is, call helper to load it. + * @param r_result the result register. + * @param trampoline the helper to call in slow path. + * @param imm the immediate passed to the helper. + * @param r_method the register with ArtMethod* if available, otherwise RegStorage::Invalid(). + */ + void GenIfNullUseHelperImmMethod( + RegStorage r_result, QuickEntrypointEnum trampoline, int imm, RegStorage r_method); + + /** + * @brief Generate code to retrieve Class* for another type to be used by SGET/SPUT. + * @param field_info information about the field to be accessed. + * @param opt_flags the optimization flags of the MIR. + */ + RegStorage GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info, int opt_flags); + void AddDivZeroCheckSlowPath(LIR* branch); // Copy arg0 and arg1 to kArg0 and kArg1 safely, possibly using @@ -1742,6 +1777,13 @@ class Mir2Lir { // Update references from prev_mir to mir. void UpdateReferenceVRegs(MIR* mir, MIR* prev_mir, BitVector* references); + /** + * Returns true if the frame spills the given core register. + */ + bool CoreSpillMaskContains(int reg) { + return (core_spill_mask_ & (1u << reg)) != 0; + } + public: // TODO: add accessors for these. LIR* literal_list_; // Constants. @@ -1815,7 +1857,23 @@ class Mir2Lir { // Record the MIR that generated a given safepoint (nullptr for prologue safepoints). ArenaVector<std::pair<LIR*, MIR*>> safepoints_; - protected: + // The layout of the cu_->dex_file's dex cache arrays for PC-relative addressing. + const DexCacheArraysLayout dex_cache_arrays_layout_; + + // For architectures that don't have true PC-relative addressing, we can promote + // a PC of an instruction (or another PC-relative address such as a pointer to + // the dex cache arrays if supported) to a register. This is indicated to the + // register promotion by allocating a backend temp. + CompilerTemp* pc_rel_temp_; + + // For architectures that don't have true PC-relative addressing (see pc_rel_temp_ + // above) and also have a limited range of offsets for loads, it's be useful to + // know the minimum offset into the dex cache arrays, so we calculate that as well + // if pc_rel_temp_ isn't nullptr. + uint32_t dex_cache_arrays_min_offset_; + + dwarf::LazyDebugFrameOpCodeWriter cfi_; + // ABI support class ShortyArg { public: @@ -1875,6 +1933,8 @@ class Mir2Lir { private: static bool SizeMatchesTypeForEntrypoint(OpSize size, Primitive::Type type); + + friend class QuickCFITest; }; // Class Mir2Lir } // namespace art diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc new file mode 100644 index 0000000000..2e62166b7b --- /dev/null +++ b/compiler/dex/quick/quick_cfi_test.cc @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vector> +#include <memory> + +#include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" +#include "cfi_test.h" +#include "dex/compiler_ir.h" +#include "dex/mir_graph.h" +#include "dex/pass_manager.h" +#include "dex/quick/dex_file_to_method_inliner_map.h" +#include "dex/quick/quick_compiler.h" +#include "dex/quick/mir_to_lir.h" +#include "dex/verification_results.h" +#include "driver/compiler_driver.h" +#include "driver/compiler_options.h" +#include "gtest/gtest.h" + +#include "dex/quick/quick_cfi_test_expected.inc" + +namespace art { + +// Run the tests only on host. +#ifndef HAVE_ANDROID_OS + +class QuickCFITest : public CFITest { + public: + // Enable this flag to generate the expected outputs. + static constexpr bool kGenerateExpected = false; + + void TestImpl(InstructionSet isa, const char* isa_str, + const std::vector<uint8_t>& expected_asm, + const std::vector<uint8_t>& expected_cfi) { + // Setup simple compiler context. + ArenaPool pool; + ArenaAllocator arena(&pool); + CompilerOptions compiler_options( + CompilerOptions::kDefaultCompilerFilter, + CompilerOptions::kDefaultHugeMethodThreshold, + CompilerOptions::kDefaultLargeMethodThreshold, + CompilerOptions::kDefaultSmallMethodThreshold, + CompilerOptions::kDefaultTinyMethodThreshold, + CompilerOptions::kDefaultNumDexMethodsThreshold, + true, // generate_gdb_information. + false, + CompilerOptions::kDefaultTopKProfileThreshold, + false, + true, // include_debug_symbols. + false, + false, + false, + false, + nullptr, + new PassManagerOptions(), + nullptr, + false); + VerificationResults verification_results(&compiler_options); + DexFileToMethodInlinerMap method_inliner_map; + std::unique_ptr<const InstructionSetFeatures> isa_features; + std::string error; + isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); + CompilerDriver driver(&compiler_options, &verification_results, &method_inliner_map, + Compiler::kQuick, isa, isa_features.get(), + false, 0, 0, 0, false, false, "", 0, -1, ""); + ClassLinker* linker = nullptr; + CompilationUnit cu(&pool, isa, &driver, linker); + DexFile::CodeItem code_item { 0, 0, 0, 0, 0, 0, { 0 } }; // NOLINT + cu.mir_graph.reset(new MIRGraph(&cu, &arena)); + cu.mir_graph->current_code_item_ = &code_item; + + // Generate empty method with some spills. + std::unique_ptr<Mir2Lir> m2l(QuickCompiler::GetCodeGenerator(&cu, nullptr)); + m2l->frame_size_ = 64u; + m2l->CompilerInitializeRegAlloc(); + for (const auto& info : m2l->reg_pool_->core_regs_) { + if (m2l->num_core_spills_ < 2 && !info->IsTemp() && !info->InUse()) { + m2l->core_spill_mask_ |= 1 << info->GetReg().GetReg(); + m2l->num_core_spills_++; + } + } + for (const auto& info : m2l->reg_pool_->sp_regs_) { + if (m2l->num_fp_spills_ < 2 && !info->IsTemp() && !info->InUse()) { + m2l->fp_spill_mask_ |= 1 << info->GetReg().GetReg(); + m2l->num_fp_spills_++; + } + } + m2l->AdjustSpillMask(); + m2l->GenEntrySequence(NULL, m2l->LocCReturnRef()); + m2l->GenExitSequence(); + m2l->HandleSlowPaths(); + m2l->AssembleLIR(); + std::vector<uint8_t> actual_asm(m2l->code_buffer_.begin(), m2l->code_buffer_.end()); + auto const& cfi_data = m2l->cfi().Patch(actual_asm.size()); + std::vector<uint8_t> actual_cfi(cfi_data->begin(), cfi_data->end()); + EXPECT_EQ(m2l->cfi().GetCurrentPC(), static_cast<int>(actual_asm.size())); + + if (kGenerateExpected) { + GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi); + } else { + EXPECT_EQ(expected_asm, actual_asm); + EXPECT_EQ(expected_cfi, actual_cfi); + } + } +}; + +#define TEST_ISA(isa) \ + TEST_F(QuickCFITest, isa) { \ + std::vector<uint8_t> expected_asm(expected_asm_##isa, \ + expected_asm_##isa + arraysize(expected_asm_##isa)); \ + std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \ + expected_cfi_##isa + arraysize(expected_cfi_##isa)); \ + TestImpl(isa, #isa, expected_asm, expected_cfi); \ + } + +TEST_ISA(kThumb2) +TEST_ISA(kArm64) +TEST_ISA(kX86) +TEST_ISA(kX86_64) +TEST_ISA(kMips) +TEST_ISA(kMips64) + +#endif // HAVE_ANDROID_OS + +} // namespace art diff --git a/compiler/dex/quick/quick_cfi_test_expected.inc b/compiler/dex/quick/quick_cfi_test_expected.inc new file mode 100644 index 0000000000..634fdeead0 --- /dev/null +++ b/compiler/dex/quick/quick_cfi_test_expected.inc @@ -0,0 +1,217 @@ +static constexpr uint8_t expected_asm_kThumb2[] = { + 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0, + 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kThumb2[] = { + 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14, + 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42, + 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x44, 0x0B, 0x0E, + 0x40, +}; +// 0x00000000: push {r5, r6, lr} +// 0x00000002: .cfi_def_cfa_offset: 12 +// 0x00000002: .cfi_offset: r5 at cfa-12 +// 0x00000002: .cfi_offset: r6 at cfa-8 +// 0x00000002: .cfi_offset: r14 at cfa-4 +// 0x00000002: vpush.f32 {s16-s17} +// 0x00000006: .cfi_def_cfa_offset: 20 +// 0x00000006: .cfi_offset_extended: r80 at cfa-20 +// 0x00000006: .cfi_offset_extended: r81 at cfa-16 +// 0x00000006: sub sp, sp, #44 +// 0x00000008: .cfi_def_cfa_offset: 64 +// 0x00000008: str r0, [sp, #0] +// 0x0000000a: .cfi_remember_state +// 0x0000000a: add sp, sp, #44 +// 0x0000000c: .cfi_def_cfa_offset: 20 +// 0x0000000c: vpop.f32 {s16-s17} +// 0x00000010: .cfi_def_cfa_offset: 12 +// 0x00000010: .cfi_restore_extended: r80 +// 0x00000010: .cfi_restore_extended: r81 +// 0x00000010: pop {r5, r6, pc} +// 0x00000012: lsls r0, r0, #0 +// 0x00000014: .cfi_restore_state +// 0x00000014: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kArm64[] = { + 0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF4, 0xD7, 0x02, 0xA9, + 0xFE, 0x1F, 0x00, 0xF9, 0xE0, 0x03, 0x00, 0xB9, 0xE8, 0xA7, 0x41, 0x6D, + 0xF4, 0xD7, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, + 0xC0, 0x03, 0x5F, 0xD6, +}; +static constexpr uint8_t expected_cfi_kArm64[] = { + 0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x94, + 0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06, + 0x49, 0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, + 0x40, +}; +// 0x00000000: sub sp, sp, #0x40 (64) +// 0x00000004: .cfi_def_cfa_offset: 64 +// 0x00000004: stp d8, d9, [sp, #24] +// 0x00000008: .cfi_offset_extended: r72 at cfa-40 +// 0x00000008: .cfi_offset_extended: r73 at cfa-32 +// 0x00000008: stp x20, x21, [sp, #40] +// 0x0000000c: .cfi_offset: r20 at cfa-24 +// 0x0000000c: .cfi_offset: r21 at cfa-16 +// 0x0000000c: str lr, [sp, #56] +// 0x00000010: .cfi_offset: r30 at cfa-8 +// 0x00000010: str w0, [sp] +// 0x00000014: .cfi_remember_state +// 0x00000014: ldp d8, d9, [sp, #24] +// 0x00000018: .cfi_restore_extended: r72 +// 0x00000018: .cfi_restore_extended: r73 +// 0x00000018: ldp x20, x21, [sp, #40] +// 0x0000001c: .cfi_restore: r20 +// 0x0000001c: .cfi_restore: r21 +// 0x0000001c: ldr lr, [sp, #56] +// 0x00000020: .cfi_restore: r30 +// 0x00000020: add sp, sp, #0x40 (64) +// 0x00000024: .cfi_def_cfa_offset: 0 +// 0x00000024: ret +// 0x00000028: .cfi_restore_state +// 0x00000028: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kX86[] = { + 0x83, 0xEC, 0x3C, 0x89, 0x6C, 0x24, 0x34, 0x89, 0x74, 0x24, 0x38, 0x89, + 0x04, 0x24, 0x8B, 0x6C, 0x24, 0x34, 0x8B, 0x74, 0x24, 0x38, 0x83, 0xC4, + 0x3C, 0xC3, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kX86[] = { + 0x43, 0x0E, 0x40, 0x44, 0x85, 0x03, 0x44, 0x86, 0x02, 0x43, 0x0A, 0x44, + 0xC5, 0x44, 0xC6, 0x43, 0x0E, 0x04, 0x43, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: sub esp, 60 +// 0x00000003: .cfi_def_cfa_offset: 64 +// 0x00000003: mov [esp + 52], ebp +// 0x00000007: .cfi_offset: r5 at cfa-12 +// 0x00000007: mov [esp + 56], esi +// 0x0000000b: .cfi_offset: r6 at cfa-8 +// 0x0000000b: mov [esp], eax +// 0x0000000e: .cfi_remember_state +// 0x0000000e: mov ebp, [esp + 52] +// 0x00000012: .cfi_restore: r5 +// 0x00000012: mov esi, [esp + 56] +// 0x00000016: .cfi_restore: r6 +// 0x00000016: add esp, 60 +// 0x00000019: .cfi_def_cfa_offset: 4 +// 0x00000019: ret +// 0x0000001a: addb [eax], al +// 0x0000001c: .cfi_restore_state +// 0x0000001c: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kX86_64[] = { + 0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0x5C, 0x24, 0x28, 0x48, 0x89, 0x6C, + 0x24, 0x30, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, + 0x11, 0x6C, 0x24, 0x20, 0x48, 0x8B, 0xC7, 0x89, 0x3C, 0x24, 0x48, 0x8B, + 0x5C, 0x24, 0x28, 0x48, 0x8B, 0x6C, 0x24, 0x30, 0xF2, 0x44, 0x0F, 0x10, + 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24, 0x20, 0x48, 0x83, + 0xC4, 0x38, 0xC3, 0x00, +}; +static constexpr uint8_t expected_cfi_kX86_64[] = { + 0x44, 0x0E, 0x40, 0x45, 0x83, 0x06, 0x45, 0x86, 0x04, 0x47, 0x9D, 0x0A, + 0x47, 0x9E, 0x08, 0x46, 0x0A, 0x45, 0xC3, 0x45, 0xC6, 0x47, 0xDD, 0x47, + 0xDE, 0x44, 0x0E, 0x08, 0x42, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: subq rsp, 56 +// 0x00000004: .cfi_def_cfa_offset: 64 +// 0x00000004: movq [rsp + 40], rbx +// 0x00000009: .cfi_offset: r3 at cfa-24 +// 0x00000009: movq [rsp + 48], rbp +// 0x0000000e: .cfi_offset: r6 at cfa-16 +// 0x0000000e: movsd [rsp + 24], xmm12 +// 0x00000015: .cfi_offset: r29 at cfa-40 +// 0x00000015: movsd [rsp + 32], xmm13 +// 0x0000001c: .cfi_offset: r30 at cfa-32 +// 0x0000001c: movq rax, rdi +// 0x0000001f: mov [rsp], edi +// 0x00000022: .cfi_remember_state +// 0x00000022: movq rbx, [rsp + 40] +// 0x00000027: .cfi_restore: r3 +// 0x00000027: movq rbp, [rsp + 48] +// 0x0000002c: .cfi_restore: r6 +// 0x0000002c: movsd xmm12, [rsp + 24] +// 0x00000033: .cfi_restore: r29 +// 0x00000033: movsd xmm13, [rsp + 32] +// 0x0000003a: .cfi_restore: r30 +// 0x0000003a: addq rsp, 56 +// 0x0000003e: .cfi_def_cfa_offset: 8 +// 0x0000003e: ret +// 0x0000003f: addb al, al +// 0x00000040: .cfi_restore_state +// 0x00000040: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips[] = { + 0xF4, 0xFF, 0xBD, 0x27, 0x08, 0x00, 0xB2, 0xAF, 0x04, 0x00, 0xB3, 0xAF, + 0x00, 0x00, 0xBF, 0xAF, 0xCC, 0xFF, 0xBD, 0x27, 0x25, 0x10, 0x80, 0x00, + 0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xB2, 0x8F, 0x38, 0x00, 0xB3, 0x8F, + 0x34, 0x00, 0xBF, 0x8F, 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, + 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips[] = { + 0x44, 0x0E, 0x0C, 0x44, 0x92, 0x01, 0x44, 0x93, 0x02, 0x44, 0x9F, 0x03, + 0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44, + 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: addiu r29, r29, -12 +// 0x00000004: .cfi_def_cfa_offset: 12 +// 0x00000004: sw r18, +8(r29) +// 0x00000008: .cfi_offset: r18 at cfa-4 +// 0x00000008: sw r19, +4(r29) +// 0x0000000c: .cfi_offset: r19 at cfa-8 +// 0x0000000c: sw r31, +0(r29) +// 0x00000010: .cfi_offset: r31 at cfa-12 +// 0x00000010: addiu r29, r29, -52 +// 0x00000014: .cfi_def_cfa_offset: 64 +// 0x00000014: or r2, r4, r0 +// 0x00000018: sw r4, +0(r29) +// 0x0000001c: .cfi_remember_state +// 0x0000001c: lw r18, +60(r29) +// 0x00000020: .cfi_restore: r18 +// 0x00000020: lw r19, +56(r29) +// 0x00000024: .cfi_restore: r19 +// 0x00000024: lw r31, +52(r29) +// 0x00000028: .cfi_restore: r31 +// 0x00000028: addiu r29, r29, 64 +// 0x0000002c: .cfi_def_cfa_offset: 0 +// 0x0000002c: jalr r0, r31 +// 0x00000030: nop +// 0x00000034: .cfi_restore_state +// 0x00000034: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips64[] = { + 0xE8, 0xFF, 0xBD, 0x67, 0x10, 0x00, 0xB2, 0xFF, 0x08, 0x00, 0xB3, 0xFF, + 0x00, 0x00, 0xBF, 0xFF, 0xD8, 0xFF, 0xBD, 0x67, 0x25, 0x10, 0x80, 0x00, + 0x00, 0x00, 0xA4, 0xAF, 0x38, 0x00, 0xB2, 0xDF, 0x30, 0x00, 0xB3, 0xDF, + 0x28, 0x00, 0xBF, 0xDF, 0x40, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, + 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips64[] = { + 0x44, 0x0E, 0x18, 0x44, 0x92, 0x02, 0x44, 0x93, 0x04, 0x44, 0x9F, 0x06, + 0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44, + 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: daddiu r29, r29, -24 +// 0x00000004: .cfi_def_cfa_offset: 24 +// 0x00000004: sd r18, +16(r29) +// 0x00000008: .cfi_offset: r18 at cfa-8 +// 0x00000008: sd r19, +8(r29) +// 0x0000000c: .cfi_offset: r19 at cfa-16 +// 0x0000000c: sd r31, +0(r29) +// 0x00000010: .cfi_offset: r31 at cfa-24 +// 0x00000010: daddiu r29, r29, -40 +// 0x00000014: .cfi_def_cfa_offset: 64 +// 0x00000014: or r2, r4, r0 +// 0x00000018: sw r4, +0(r29) +// 0x0000001c: .cfi_remember_state +// 0x0000001c: ld r18, +56(r29) +// 0x00000020: .cfi_restore: r18 +// 0x00000020: ld r19, +48(r29) +// 0x00000024: .cfi_restore: r19 +// 0x00000024: ld r31, +40(r29) +// 0x00000028: .cfi_restore: r31 +// 0x00000028: daddiu r29, r29, 64 +// 0x0000002c: .cfi_def_cfa_offset: 0 +// 0x0000002c: jr r31 +// 0x00000030: nop +// 0x00000034: .cfi_restore_state +// 0x00000034: .cfi_def_cfa_offset: 64 + diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc index 6d289843e8..2c0bd47405 100644 --- a/compiler/dex/quick/quick_compiler.cc +++ b/compiler/dex/quick/quick_compiler.cc @@ -634,6 +634,12 @@ CompiledMethod* QuickCompiler::Compile(const DexFile::CodeItem* code_item, instruction_set = kThumb2; } CompilationUnit cu(runtime->GetArenaPool(), instruction_set, driver, class_linker); + cu.dex_file = &dex_file; + cu.class_def_idx = class_def_idx; + cu.method_idx = method_idx; + cu.access_flags = access_flags; + cu.invoke_type = invoke_type; + cu.shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx)); CHECK((cu.instruction_set == kThumb2) || (cu.instruction_set == kArm64) || @@ -792,11 +798,16 @@ bool QuickCompiler::WriteElf(art::File* file, const std::vector<const art::DexFile*>& dex_files, const std::string& android_root, bool is_host) const { - return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, - *GetCompilerDriver()); + if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) { + return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, + *GetCompilerDriver()); + } else { + return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, + *GetCompilerDriver()); + } } -Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const { +Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) { UNUSED(compilation_unit); Mir2Lir* mir_to_lir = nullptr; switch (cu->instruction_set) { diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h index 5153a9e82e..09b08ace77 100644 --- a/compiler/dex/quick/quick_compiler.h +++ b/compiler/dex/quick/quick_compiler.h @@ -60,7 +60,7 @@ class QuickCompiler : public Compiler { OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const; + static Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit); void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE; diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index 741657bc69..e779479780 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -19,9 +19,11 @@ #include "mir_to_lir-inl.h" #include "dex/compiler_ir.h" +#include "dex/dataflow_iterator-inl.h" #include "dex/mir_graph.h" #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" +#include "utils/dex_cache_arrays_layout-inl.h" namespace art { @@ -1128,6 +1130,152 @@ RegLocation Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) { return loc; } +void Mir2Lir::AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) { + // NOTE: This should be in sync with functions that actually generate code for + // the opcodes below. However, if we get this wrong, the generated code will + // still be correct even if it may be sub-optimal. + int opcode = mir->dalvikInsn.opcode; + bool uses_method = false; + bool uses_pc_rel_load = false; + uint32_t dex_cache_array_offset = std::numeric_limits<uint32_t>::max(); + switch (opcode) { + case Instruction::CHECK_CAST: + case Instruction::INSTANCE_OF: { + if ((opcode == Instruction::CHECK_CAST) && + (mir->optimization_flags & MIR_IGNORE_CHECK_CAST) != 0) { + break; // No code generated. + } + uint32_t type_idx = + (opcode == Instruction::CHECK_CAST) ? mir->dalvikInsn.vB : mir->dalvikInsn.vC; + bool type_known_final, type_known_abstract, use_declaring_class; + bool needs_access_check = !cu_->compiler_driver->CanAccessTypeWithoutChecks( + cu_->method_idx, *cu_->dex_file, type_idx, + &type_known_final, &type_known_abstract, &use_declaring_class); + if (opcode == Instruction::CHECK_CAST && !needs_access_check && + cu_->compiler_driver->IsSafeCast( + mir_graph_->GetCurrentDexCompilationUnit(), mir->offset)) { + break; // No code generated. + } + if (!needs_access_check && !use_declaring_class && CanUseOpPcRelDexCacheArrayLoad()) { + uses_pc_rel_load = true; // And ignore method use in slow path. + dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(type_idx); + } else { + uses_method = true; + } + break; + } + + case Instruction::CONST_CLASS: + if (CanUseOpPcRelDexCacheArrayLoad() && + cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file, + mir->dalvikInsn.vB)) { + uses_pc_rel_load = true; // And ignore method use in slow path. + dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(mir->dalvikInsn.vB); + } else { + uses_method = true; + } + break; + + case Instruction::CONST_STRING: + case Instruction::CONST_STRING_JUMBO: + if (CanUseOpPcRelDexCacheArrayLoad()) { + uses_pc_rel_load = true; // And ignore method use in slow path. + dex_cache_array_offset = dex_cache_arrays_layout_.StringOffset(mir->dalvikInsn.vB); + } else { + uses_method = true; + } + break; + + case Instruction::INVOKE_VIRTUAL: + case Instruction::INVOKE_SUPER: + case Instruction::INVOKE_DIRECT: + case Instruction::INVOKE_STATIC: + case Instruction::INVOKE_INTERFACE: + case Instruction::INVOKE_VIRTUAL_RANGE: + case Instruction::INVOKE_SUPER_RANGE: + case Instruction::INVOKE_DIRECT_RANGE: + case Instruction::INVOKE_STATIC_RANGE: + case Instruction::INVOKE_INTERFACE_RANGE: + case Instruction::INVOKE_VIRTUAL_QUICK: + case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: { + const MirMethodLoweringInfo& info = mir_graph_->GetMethodLoweringInfo(mir); + InvokeType sharp_type = info.GetSharpType(); + if (info.IsIntrinsic()) { + // Nothing to do, if an intrinsic uses ArtMethod* it's in the slow-path - don't count it. + } else if (!info.FastPath() || (sharp_type != kStatic && sharp_type != kDirect)) { + // Nothing to do, the generated code or entrypoint uses method from the stack. + } else if (info.DirectCode() != 0 && info.DirectMethod() != 0) { + // Nothing to do, the generated code uses method from the stack. + } else if (CanUseOpPcRelDexCacheArrayLoad()) { + uses_pc_rel_load = true; + dex_cache_array_offset = dex_cache_arrays_layout_.MethodOffset(mir->dalvikInsn.vB); + } else { + uses_method = true; + } + break; + } + + case Instruction::NEW_INSTANCE: + case Instruction::NEW_ARRAY: + case Instruction::FILLED_NEW_ARRAY: + case Instruction::FILLED_NEW_ARRAY_RANGE: + uses_method = true; + break; + case Instruction::FILL_ARRAY_DATA: + // Nothing to do, the entrypoint uses method from the stack. + break; + case Instruction::THROW: + // Nothing to do, the entrypoint uses method from the stack. + break; + + case Instruction::SGET: + case Instruction::SGET_WIDE: + case Instruction::SGET_OBJECT: + case Instruction::SGET_BOOLEAN: + case Instruction::SGET_BYTE: + case Instruction::SGET_CHAR: + case Instruction::SGET_SHORT: + case Instruction::SPUT: + case Instruction::SPUT_WIDE: + case Instruction::SPUT_OBJECT: + case Instruction::SPUT_BOOLEAN: + case Instruction::SPUT_BYTE: + case Instruction::SPUT_CHAR: + case Instruction::SPUT_SHORT: { + const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir); + bool fast = IsInstructionSGet(static_cast<Instruction::Code>(opcode)) + ? field_info.FastGet() + : field_info.FastPut(); + if (fast && (cu_->enable_debug & (1 << kDebugSlowFieldPath)) == 0) { + if (!field_info.IsReferrersClass() && CanUseOpPcRelDexCacheArrayLoad()) { + uses_pc_rel_load = true; // And ignore method use in slow path. + dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex()); + } else { + uses_method = true; + } + } else { + // Nothing to do, the entrypoint uses method from the stack. + } + break; + } + + default: + break; + } + if (uses_method) { + core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count += weight; + } + if (uses_pc_rel_load) { + if (pc_rel_temp_ != nullptr) { + core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight; + DCHECK_NE(dex_cache_array_offset, std::numeric_limits<uint32_t>::max()); + dex_cache_arrays_min_offset_ = std::min(dex_cache_arrays_min_offset_, dex_cache_array_offset); + } else { + // Nothing to do, using PC-relative addressing without promoting base PC to register. + } + } +} + /* USE SSA names to count references of base Dalvik v_regs. */ void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) { for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) { @@ -1157,6 +1305,22 @@ void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num } } } + + // Now analyze the ArtMethod* and pc_rel_temp_ uses. + DCHECK_EQ(core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count, 0); + if (pc_rel_temp_ != nullptr) { + DCHECK_EQ(core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count, 0); + } + PreOrderDfsIterator iter(mir_graph_); + for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) { + if (bb->block_type == kDead) { + continue; + } + uint32_t weight = mir_graph_->GetUseCountWeight(bb); + for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) { + AnalyzeMIR(core_counts, mir, weight); + } + } } /* qsort callback function, sort descending */ diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 118ab1d843..af19f5eaed 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -544,7 +544,6 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86CallI, kCall, IS_UNARY_OP | IS_BRANCH, { 0, 0, 0xE8, 0, 0, 0, 0, 4, false }, "CallI", "!0d" }, { kX86Ret, kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xC3, 0, 0, 0, 0, 0, false }, "Ret", "" }, - { kX86StartOfMethod, kMacro, IS_UNARY_OP | REG_DEF0 | SETS_CCODES, { 0, 0, 0, 0, 0, 0, 0, 0, false }, "StartOfMethod", "!0r" }, { kX86PcRelLoadRA, kPcRel, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0, false }, "PcRelLoadRA", "!0r,[!1r+!2r<<!3d+!4p]" }, { kX86PcRelAdr, kPcRel, IS_LOAD | IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB8, 0, 0, 0, 0, 4, false }, "PcRelAdr", "!0r,!1p" }, { kX86RepneScasw, kNullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0, false }, "RepNE ScasW", "" }, @@ -865,13 +864,6 @@ size_t X86Mir2Lir::GetInsnSize(LIR* lir) { DCHECK_EQ(entry->opcode, kX86PcRelAdr); return 5; // opcode with reg + 4 byte immediate } - case kMacro: // lir operands - 0: reg - DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod)); - return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ + - ComputeSize(&X86Mir2Lir::EncodingMap[cu_->target64 ? kX86Sub64RI : kX86Sub32RI], - lir->operands[0], NO_REG, NO_REG, 0) - - // Shorter ax encoding. - (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0); case kUnimplemented: break; } @@ -1586,8 +1578,8 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t int32_t raw_index, int scale, int32_t table_or_disp) { int disp; if (entry->opcode == kX86PcRelLoadRA) { - const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(table_or_disp); - disp = tab_rec->offset; + const SwitchTable* tab_rec = UnwrapPointer<SwitchTable>(table_or_disp); + disp = tab_rec->offset - tab_rec->anchor->offset; } else { DCHECK(entry->opcode == kX86PcRelAdr); const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(raw_base_or_table); @@ -1621,23 +1613,6 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t DCHECK_EQ(0, entry->skeleton.ax_opcode); } -void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset) { - DCHECK_EQ(entry->opcode, kX86StartOfMethod) << entry->name; - DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefix(entry, raw_reg, NO_REG, NO_REG); - code_buffer_.push_back(0xE8); // call +0 - code_buffer_.push_back(0); - code_buffer_.push_back(0); - code_buffer_.push_back(0); - code_buffer_.push_back(0); - - uint8_t low_reg = LowRegisterBits(raw_reg); - code_buffer_.push_back(0x58 + low_reg); // pop reg - - EmitRegImm(&X86Mir2Lir::EncodingMap[cu_->target64 ? kX86Sub64RI : kX86Sub32RI], - raw_reg, offset + 5 /* size of call +0 */); -} - void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) { UNIMPLEMENTED(WARNING) << "encoding kind for " << entry->name << " " << BuildInsnString(entry->fmt, lir, 0); @@ -1780,7 +1755,8 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { // Offset is relative to next instruction. lir->operands[2] = target - (lir->offset + lir->flags.size); } else { - lir->operands[2] = target; + const LIR* anchor = UnwrapPointer<LIR>(lir->operands[4]); + lir->operands[2] = target - anchor->offset; int newSize = GetInsnSize(lir); if (newSize != lir->flags.size) { lir->flags.size = newSize; @@ -1951,9 +1927,6 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { EmitPcRel(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3], lir->operands[4]); break; - case kMacro: // lir operands - 0: reg - EmitMacro(entry, lir->operands[0], lir->offset); - break; case kNop: // TODO: these instruction kinds are missing implementations. case kThreadReg: case kRegArrayImm: @@ -2044,9 +2017,13 @@ void X86Mir2Lir::AssembleLIR() { cu_->NewTimingSplit("Assemble"); // We will remove the method address if we never ended up using it - if (store_method_addr_ && !store_method_addr_used_) { - setup_method_address_[0]->flags.is_nop = true; - setup_method_address_[1]->flags.is_nop = true; + if (pc_rel_base_reg_.Valid() && !pc_rel_base_reg_used_) { + if (kIsDebugBuild) { + LOG(WARNING) << "PC-relative addressing base promoted but unused in " + << PrettyMethod(cu_->method_idx, *cu_->dex_file); + } + setup_pc_rel_base_reg_->flags.is_nop = true; + NEXT_LIR(setup_pc_rel_base_reg_)->flags.is_nop = true; } AssignOffsets(); diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index abee87254b..d7a5eb04db 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -21,9 +21,11 @@ #include "base/logging.h" #include "dex/quick/mir_to_lir-inl.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "gc/accounting/card_table.h" #include "mirror/art_method.h" #include "mirror/object_array-inl.h" +#include "utils/dex_cache_arrays_layout-inl.h" #include "x86_lir.h" namespace art { @@ -95,29 +97,23 @@ void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocat // Add the offset from the table to the table base. OpRegReg(kOpAdd, addr_for_jump, table_base); + tab_rec->anchor = nullptr; // Unused for x86-64. } else { - // Materialize a pointer to the switch table. - RegStorage start_of_method_reg; - if (base_of_code_ != nullptr) { - // We can use the saved value. - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - rl_method = LoadValue(rl_method, kCoreReg); - start_of_method_reg = rl_method.reg; - store_method_addr_used_ = true; - } else { - start_of_method_reg = AllocTempRef(); - NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg()); - } + // Get the PC to a register and get the anchor. + LIR* anchor; + RegStorage r_pc = GetPcAndAnchor(&anchor); + // Load the displacement from the switch table. addr_for_jump = AllocTemp(); - NewLIR5(kX86PcRelLoadRA, addr_for_jump.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(), + NewLIR5(kX86PcRelLoadRA, addr_for_jump.GetReg(), r_pc.GetReg(), keyReg.GetReg(), 2, WrapPointer(tab_rec)); - // Add displacement to start of method. - OpRegReg(kOpAdd, addr_for_jump, start_of_method_reg); + // Add displacement and r_pc to get the address. + OpRegReg(kOpAdd, addr_for_jump, r_pc); + tab_rec->anchor = anchor; } // ..and go! - tab_rec->anchor = NewLIR1(kX86JmpR, addr_for_jump.GetReg()); + NewLIR1(kX86JmpR, addr_for_jump.GetReg()); /* branch_over target here */ LIR* target = NewLIR0(kPseudoTargetLabel); @@ -148,6 +144,10 @@ void X86Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) { FreeTemp(reg_card_no); } +static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) { + return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num); +} + void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { /* * On entry, rX86_ARG0, rX86_ARG1, rX86_ARG2 are live. Let the register @@ -182,10 +182,10 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { } /* Build frame, return address already on stack */ - stack_decrement_ = OpRegImm(kOpSub, rs_rSP, frame_size_ - - GetInstructionSetPointerSize(cu_->instruction_set)); + cfi_.SetCurrentCFAOffset(GetInstructionSetPointerSize(cu_->instruction_set)); + OpRegImm(kOpSub, rs_rSP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set)); + cfi_.DefCFAOffset(frame_size_); - NewLIR0(kPseudoMethodEntry); /* Spill core callee saves */ SpillCoreRegs(); SpillFPRegs(); @@ -201,10 +201,12 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { GenerateTargetLabel(kPseudoThrowTarget); const RegStorage local_rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; m2l_->OpRegImm(kOpAdd, local_rs_rSP, sp_displace_); + m2l_->cfi().AdjustCFAOffset(-sp_displace_); m2l_->ClobberCallerSave(); // Assumes codegen and target are in thumb2 mode. m2l_->CallHelper(RegStorage::InvalidReg(), kQuickThrowStackOverflow, false /* MarkSafepointPC */, false /* UseLink */); + m2l_->cfi().AdjustCFAOffset(sp_displace_); } private: @@ -235,14 +237,12 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { FlushIns(ArgLocs, rl_method); - if (base_of_code_ != nullptr) { - RegStorage method_start = TargetPtrReg(kArg0); - // We have been asked to save the address of the method start for later use. - setup_method_address_[0] = NewLIR1(kX86StartOfMethod, method_start.GetReg()); - int displacement = SRegOffset(base_of_code_->s_reg_low); - // Native pointer - must be natural word size. - setup_method_address_[1] = StoreBaseDisp(rs_rSP, displacement, method_start, - cu_->target64 ? k64 : k32, kNotVolatile); + // We can promote the PC of an anchor for PC-relative addressing to a register + // if it's used at least twice. Without investigating where we should lazily + // load the reference, we conveniently load it after flushing inputs. + if (pc_rel_base_reg_.Valid()) { + DCHECK(!cu_->target64); + setup_pc_rel_base_reg_ = OpLoadPc(pc_rel_base_reg_); } FreeTemp(arg0); @@ -251,6 +251,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { } void X86Mir2Lir::GenExitSequence() { + cfi_.RememberState(); /* * In the exit path, rX86_RET0/rX86_RET1 are live - make sure they aren't * allocated by the register utilities as temps. @@ -258,14 +259,18 @@ void X86Mir2Lir::GenExitSequence() { LockTemp(rs_rX86_RET0); LockTemp(rs_rX86_RET1); - NewLIR0(kPseudoMethodExit); UnSpillCoreRegs(); UnSpillFPRegs(); /* Remove frame except for return address */ const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; - stack_increment_ = OpRegImm(kOpAdd, rs_rSP, - frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set)); + int adjust = frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set); + OpRegImm(kOpAdd, rs_rSP, adjust); + cfi_.AdjustCFAOffset(-adjust); + // There is only the return PC on the stack now. NewLIR0(kX86Ret); + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size_); } void X86Mir2Lir::GenSpecialExitSequence() { @@ -276,6 +281,8 @@ void X86Mir2Lir::GenSpecialEntryForSuspend() { // Keep 16-byte stack alignment, there's already the return address, so // - for 32-bit push EAX, i.e. ArtMethod*, ESI, EDI, // - for 64-bit push RAX, i.e. ArtMethod*. + const int kRegSize = cu_->target64 ? 8 : 4; + cfi_.SetCurrentCFAOffset(kRegSize); // Return address. if (!cu_->target64) { DCHECK(!IsTemp(rs_rSI)); DCHECK(!IsTemp(rs_rDI)); @@ -293,17 +300,29 @@ void X86Mir2Lir::GenSpecialEntryForSuspend() { fp_vmap_table_.clear(); if (!cu_->target64) { NewLIR1(kX86Push32R, rs_rDI.GetReg()); + cfi_.AdjustCFAOffset(kRegSize); + cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()), 0); NewLIR1(kX86Push32R, rs_rSI.GetReg()); + cfi_.AdjustCFAOffset(kRegSize); + cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()), 0); } NewLIR1(kX86Push32R, TargetReg(kArg0, kRef).GetReg()); // ArtMethod* + cfi_.AdjustCFAOffset(kRegSize); + // Do not generate CFI for scratch register. } void X86Mir2Lir::GenSpecialExitForSuspend() { + const int kRegSize = cu_->target64 ? 8 : 4; // Pop the frame. (ArtMethod* no longer needed but restore it anyway.) NewLIR1(kX86Pop32R, TargetReg(kArg0, kRef).GetReg()); // ArtMethod* + cfi_.AdjustCFAOffset(-kRegSize); if (!cu_->target64) { NewLIR1(kX86Pop32R, rs_rSI.GetReg()); + cfi_.AdjustCFAOffset(-kRegSize); + cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum())); NewLIR1(kX86Pop32R, rs_rDI.GetReg()); + cfi_.AdjustCFAOffset(-kRegSize); + cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum())); } } @@ -321,13 +340,13 @@ void X86Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) { * Bit of a hack here - in the absence of a real scheduling pass, * emit the next instruction in static & direct invoke sequences. */ -static int X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info, - int state, const MethodReference& target_method, - uint32_t, - uintptr_t direct_code, uintptr_t direct_method, - InvokeType type) { +int X86Mir2Lir::X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info, + int state, const MethodReference& target_method, + uint32_t, + uintptr_t direct_code, uintptr_t direct_method, + InvokeType type) { UNUSED(info, direct_code); - Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); + X86Mir2Lir* cg = static_cast<X86Mir2Lir*>(cu->cg.get()); if (direct_method != 0) { switch (state) { case 0: // Get the current Method* [sets kArg0] @@ -345,6 +364,17 @@ static int X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info, default: return -1; } + } else if (cg->CanUseOpPcRelDexCacheArrayLoad()) { + switch (state) { + case 0: { + CHECK_EQ(cu->dex_file, target_method.dex_file); + size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index); + cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, cg->TargetReg(kArg0, kRef)); + break; + } + default: + return -1; + } } else { RegStorage arg0_ref = cg->TargetReg(kArg0, kRef); switch (state) { diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 040a8c4bef..72580a3e39 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -28,7 +28,7 @@ namespace art { -class X86Mir2Lir : public Mir2Lir { +class X86Mir2Lir FINAL : public Mir2Lir { protected: class InToRegStorageX86_64Mapper : public InToRegStorageMapper { public: @@ -104,6 +104,9 @@ class X86Mir2Lir : public Mir2Lir { /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage) void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE; + bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE; + void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) OVERRIDE; + void GenImplicitNullCheck(RegStorage reg, int opt_flags) OVERRIDE; // Required for target - register utilities. @@ -372,17 +375,15 @@ class X86Mir2Lir : public Mir2Lir { */ LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE; + void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) OVERRIDE; + void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) OVERRIDE; + void DoPromotion() OVERRIDE; + /* * @brief Handle x86 specific literals */ void InstallLiteralPools() OVERRIDE; - /* - * @brief Generate the debug_frame FDE information. - * @returns pointer to vector containing CFE information - */ - std::vector<uint8_t>* ReturnFrameDescriptionEntry() OVERRIDE; - LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE; protected: @@ -491,7 +492,6 @@ class X86Mir2Lir : public Mir2Lir { void EmitCallThread(const X86EncodingMap* entry, int32_t disp); void EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base_or_table, int32_t raw_index, int scale, int32_t table_or_disp); - void EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset); void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir); void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val, ConditionCode ccode); @@ -862,12 +862,6 @@ class X86Mir2Lir : public Mir2Lir { void SpillFPRegs(); /* - * @brief Perform MIR analysis before compiling method. - * @note Invokes Mir2LiR::Materialize after analysis. - */ - void Materialize(); - - /* * Mir2Lir's UpdateLoc() looks to see if the Dalvik value is currently live in any temp register * without regard to data type. In practice, this can result in UpdateLoc returning a * location record for a Dalvik float value in a core register, and vis-versa. For targets @@ -881,67 +875,39 @@ class X86Mir2Lir : public Mir2Lir { RegLocation UpdateLocWideTyped(RegLocation loc); /* - * @brief Analyze MIR before generating code, to prepare for the code generation. - */ - void AnalyzeMIR(); - - /* - * @brief Analyze one basic block. - * @param bb Basic block to analyze. - */ - void AnalyzeBB(BasicBlock* bb); - - /* - * @brief Analyze one extended MIR instruction - * @param opcode MIR instruction opcode. - * @param bb Basic block containing instruction. - * @param mir Extended instruction to analyze. - */ - void AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir); - - /* - * @brief Analyze one MIR instruction - * @param opcode MIR instruction opcode. - * @param bb Basic block containing instruction. - * @param mir Instruction to analyze. - */ - virtual void AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir); - - /* * @brief Analyze one MIR float/double instruction * @param opcode MIR instruction opcode. - * @param bb Basic block containing instruction. * @param mir Instruction to analyze. + * @return true iff the instruction needs to load a literal using PC-relative addressing. */ - virtual void AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir); + bool AnalyzeFPInstruction(int opcode, MIR* mir); /* * @brief Analyze one use of a double operand. * @param rl_use Double RegLocation for the operand. + * @return true iff the instruction needs to load a literal using PC-relative addressing. */ - void AnalyzeDoubleUse(RegLocation rl_use); + bool AnalyzeDoubleUse(RegLocation rl_use); /* * @brief Analyze one invoke-static MIR instruction - * @param opcode MIR instruction opcode. - * @param bb Basic block containing instruction. * @param mir Instruction to analyze. + * @return true iff the instruction needs to load a literal using PC-relative addressing. */ - void AnalyzeInvokeStatic(int opcode, BasicBlock* bb, MIR* mir); + bool AnalyzeInvokeStaticIntrinsic(MIR* mir); // Information derived from analysis of MIR - // The compiler temporary for the code address of the method. - CompilerTemp *base_of_code_; - - // Have we decided to compute a ptr to code and store in temporary VR? - bool store_method_addr_; + // The base register for PC-relative addressing if promoted (32-bit only). + RegStorage pc_rel_base_reg_; - // Have we used the stored method address? - bool store_method_addr_used_; + // Have we actually used the pc_rel_base_reg_? + bool pc_rel_base_reg_used_; - // Instructions to remove if we didn't use the stored method address. - LIR* setup_method_address_[2]; + // Pointer to the "call +0" insn that sets up the promoted register for PC-relative addressing. + // The anchor "pop" insn is NEXT_LIR(setup_pc_rel_base_reg_). The whole "call +0; pop <reg>" + // sequence will be removed in AssembleLIR() if we do not actually use PC-relative addressing. + LIR* setup_pc_rel_base_reg_; // There are 2 chained insns (no reordering allowed). // Instructions needing patching with Method* values. ArenaVector<LIR*> method_address_insns_; @@ -952,11 +918,8 @@ class X86Mir2Lir : public Mir2Lir { // Instructions needing patching with PC relative code addresses. ArenaVector<LIR*> call_method_insns_; - // Prologue decrement of stack pointer. - LIR* stack_decrement_; - - // Epilogue increment of stack pointer. - LIR* stack_increment_; + // Instructions needing patching with PC relative code addresses. + ArenaVector<LIR*> dex_cache_access_insns_; // The list of const vector literals. LIR* const_vectors_; @@ -992,6 +955,20 @@ class X86Mir2Lir : public Mir2Lir { void SwapBits(RegStorage result_reg, int shift, int32_t value); void SwapBits64(RegStorage result_reg, int shift, int64_t value); + static int X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info, + int state, const MethodReference& target_method, + uint32_t, + uintptr_t direct_code, uintptr_t direct_method, + InvokeType type); + + LIR* OpLoadPc(RegStorage r_dest); + RegStorage GetPcAndAnchor(LIR** anchor, RegStorage r_tmp = RegStorage::InvalidReg()); + + // When we don't know the proper offset for the value, pick one that will force + // 4 byte offset. We will fix this up in the assembler or linker later to have + // the right value. + static constexpr int kDummy32BitOffset = 256; + static const X86EncodingMap EncodingMap[kX86Last]; friend std::ostream& operator<<(std::ostream& os, const X86OpCode& rhs); diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc index d8616a7bf3..cfe0480c54 100755 --- a/compiler/dex/quick/x86/fp_x86.cc +++ b/compiler/dex/quick/x86/fp_x86.cc @@ -756,24 +756,6 @@ bool X86Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) branch_nan->target = NewLIR0(kPseudoTargetLabel); LoadConstantWide(rl_result.reg, INT64_C(0x7ff8000000000000)); - // The base_of_code_ compiler temp is non-null when it is reserved - // for being able to do data accesses relative to method start. - if (base_of_code_ != nullptr) { - // Loading from the constant pool may have used base of code register. - // However, the code here generates logic in diamond shape and not all - // paths load base of code register. Therefore, we ensure it is clobbered so - // that the temp caching system does not believe it is live at merge point. - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - if (rl_method.wide) { - rl_method = UpdateLocWide(rl_method); - } else { - rl_method = UpdateLoc(rl_method); - } - if (rl_method.location == kLocPhysReg) { - Clobber(rl_method.reg); - } - } - LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0); // Handle Min/Max. Copy greater/lesser value from src2. branch_cond1->target = NewLIR0(kPseudoTargetLabel); diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 4eb626c14f..1043815e10 100755 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -830,6 +830,10 @@ RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, return rl_result; } +static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) { + return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num); +} + bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); @@ -928,6 +932,7 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { // Do we have a free register for intermediate calculations? RegStorage tmp = AllocTemp(false); + const int kRegSize = cu_->target64 ? 8 : 4; if (tmp == RegStorage::InvalidReg()) { /* * No, will use 'edi'. @@ -946,6 +951,11 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { IsTemp(rl_result.reg.GetHigh())); tmp = rs_rDI; NewLIR1(kX86Push32R, tmp.GetReg()); + cfi_.AdjustCFAOffset(kRegSize); + // Record cfi only if it is not already spilled. + if (!CoreSpillMaskContains(tmp.GetReg())) { + cfi_.RelOffset(DwarfCoreReg(cu_->target64, tmp.GetReg()), 0); + } } // Now we are ready to do calculations. @@ -957,6 +967,10 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { // Let's put pop 'edi' here to break a bit the dependency chain. if (tmp == rs_rDI) { NewLIR1(kX86Pop32R, tmp.GetReg()); + cfi_.AdjustCFAOffset(-kRegSize); + if (!CoreSpillMaskContains(tmp.GetReg())) { + cfi_.Restore(DwarfCoreReg(cu_->target64, tmp.GetReg())); + } } else { FreeTemp(tmp); } @@ -1104,6 +1118,7 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { // If is_long, high half is in info->args[5] RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object // If is_long, high half is in info->args[7] + const int kRegSize = cu_->target64 ? 8 : 4; if (is_long && cu_->target64) { // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX. @@ -1125,7 +1140,6 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { FreeTemp(rs_r0q); } else if (is_long) { // TODO: avoid unnecessary loads of SI and DI when the values are in registers. - // TODO: CFI support. FlushAllRegs(); LockCallTemps(); RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX); @@ -1148,11 +1162,21 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { NewLIR1(kX86Push32R, rs_rDI.GetReg()); MarkTemp(rs_rDI); LockTemp(rs_rDI); + cfi_.AdjustCFAOffset(kRegSize); + // Record cfi only if it is not already spilled. + if (!CoreSpillMaskContains(rs_rDI.GetReg())) { + cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0); + } } if (push_si) { NewLIR1(kX86Push32R, rs_rSI.GetReg()); MarkTemp(rs_rSI); LockTemp(rs_rSI); + cfi_.AdjustCFAOffset(kRegSize); + // Record cfi only if it is not already spilled. + if (!CoreSpillMaskContains(rs_rSI.GetReg())) { + cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetReg()), 0); + } } ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u); @@ -1183,11 +1207,19 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { FreeTemp(rs_rSI); UnmarkTemp(rs_rSI); NewLIR1(kX86Pop32R, rs_rSI.GetReg()); + cfi_.AdjustCFAOffset(-kRegSize); + if (!CoreSpillMaskContains(rs_rSI.GetReg())) { + cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum())); + } } if (push_di) { FreeTemp(rs_rDI); UnmarkTemp(rs_rDI); NewLIR1(kX86Pop32R, rs_rDI.GetReg()); + cfi_.AdjustCFAOffset(-kRegSize); + if (!CoreSpillMaskContains(rs_rDI.GetReg())) { + cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum())); + } } FreeCallTemps(); } else { @@ -1327,37 +1359,79 @@ bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) { void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { if (cu_->target64) { // We can do this directly using RIP addressing. - // We don't know the proper offset for the value, so pick one that will force - // 4 byte offset. We will fix this up in the assembler later to have the right - // value. ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); - LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, 256); + LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, kDummy32BitOffset); res->target = target; res->flags.fixup = kFixupLoad; return; } - CHECK(base_of_code_ != nullptr); - - // Address the start of the method - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - if (rl_method.wide) { - LoadValueDirectWideFixed(rl_method, reg); - } else { - LoadValueDirectFixed(rl_method, reg); - } - store_method_addr_used_ = true; + // Get the PC to a register and get the anchor. + LIR* anchor; + RegStorage r_pc = GetPcAndAnchor(&anchor); // Load the proper value from the literal area. - // We don't know the proper offset for the value, so pick one that will force - // 4 byte offset. We will fix this up in the assembler later to have the right - // value. ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); - LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256); + LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), r_pc.GetReg(), kDummy32BitOffset); + res->operands[4] = WrapPointer(anchor); res->target = target; res->flags.fixup = kFixupLoad; } +bool X86Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const { + return dex_cache_arrays_layout_.Valid(); +} + +LIR* X86Mir2Lir::OpLoadPc(RegStorage r_dest) { + DCHECK(!cu_->target64); + LIR* call = NewLIR1(kX86CallI, 0); + call->flags.fixup = kFixupLabel; + LIR* pop = NewLIR1(kX86Pop32R, r_dest.GetReg()); + pop->flags.fixup = kFixupLabel; + DCHECK(NEXT_LIR(call) == pop); + return call; +} + +RegStorage X86Mir2Lir::GetPcAndAnchor(LIR** anchor, RegStorage r_tmp) { + if (pc_rel_base_reg_.Valid()) { + DCHECK(setup_pc_rel_base_reg_ != nullptr); + *anchor = NEXT_LIR(setup_pc_rel_base_reg_); + DCHECK(*anchor != nullptr); + DCHECK_EQ((*anchor)->opcode, kX86Pop32R); + pc_rel_base_reg_used_ = true; + return pc_rel_base_reg_; + } else { + RegStorage r_pc = r_tmp.Valid() ? r_tmp : AllocTempRef(); + LIR* load_pc = OpLoadPc(r_pc); + *anchor = NEXT_LIR(load_pc); + DCHECK(*anchor != nullptr); + DCHECK_EQ((*anchor)->opcode, kX86Pop32R); + return r_pc; + } +} + +void X86Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, + RegStorage r_dest) { + if (cu_->target64) { + LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), kRIPReg, kDummy32BitOffset); + mov->flags.fixup = kFixupLabel; + mov->operands[3] = WrapPointer(dex_file); + mov->operands[4] = offset; + mov->target = mov; // Used for pc_insn_offset (not used by x86-64 relative patcher). + dex_cache_access_insns_.push_back(mov); + } else { + // Get the PC to a register and get the anchor. Use r_dest for the temp if needed. + LIR* anchor; + RegStorage r_pc = GetPcAndAnchor(&anchor, r_dest); + LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), r_pc.GetReg(), kDummy32BitOffset); + mov->flags.fixup = kFixupLabel; + mov->operands[3] = WrapPointer(dex_file); + mov->operands[4] = offset; + mov->target = anchor; // Used for pc_insn_offset. + dex_cache_access_insns_.push_back(mov); + } +} + LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) { UNUSED(r_base, count); LOG(FATAL) << "Unexpected use of OpVldm for x86"; diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index f128eb78a3..a16e242d08 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -32,7 +32,6 @@ #include "mirror/string.h" #include "oat.h" #include "x86_lir.h" -#include "utils/dwarf_cfi.h" namespace art { @@ -725,6 +724,14 @@ int X86Mir2Lir::NumReservableVectorRegisters(bool long_or_fp) { return long_or_fp ? num_vector_temps - 2 : num_vector_temps - 1; } +static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) { + return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num); +} + +static dwarf::Reg DwarfFpReg(bool is_x86_64, int num) { + return is_x86_64 ? dwarf::Reg::X86_64Fp(num) : dwarf::Reg::X86Fp(num); +} + void X86Mir2Lir::SpillCoreRegs() { if (num_core_spills_ == 0) { return; @@ -735,11 +742,11 @@ void X86Mir2Lir::SpillCoreRegs() { frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); OpSize size = cu_->target64 ? k64 : k32; const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; - for (int reg = 0; mask; mask >>= 1, reg++) { - if (mask & 0x1) { - StoreBaseDisp(rs_rSP, offset, - cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg), - size, kNotVolatile); + for (int reg = 0; mask != 0u; mask >>= 1, reg++) { + if ((mask & 0x1) != 0u) { + RegStorage r_src = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg); + StoreBaseDisp(rs_rSP, offset, r_src, size, kNotVolatile); + cfi_.RelOffset(DwarfCoreReg(cu_->target64, reg), offset); offset += GetInstructionSetPointerSize(cu_->instruction_set); } } @@ -754,10 +761,11 @@ void X86Mir2Lir::UnSpillCoreRegs() { int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); OpSize size = cu_->target64 ? k64 : k32; const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; - for (int reg = 0; mask; mask >>= 1, reg++) { - if (mask & 0x1) { - LoadBaseDisp(rs_rSP, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg), - size, kNotVolatile); + for (int reg = 0; mask != 0u; mask >>= 1, reg++) { + if ((mask & 0x1) != 0u) { + RegStorage r_dest = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg); + LoadBaseDisp(rs_rSP, offset, r_dest, size, kNotVolatile); + cfi_.Restore(DwarfCoreReg(cu_->target64, reg)); offset += GetInstructionSetPointerSize(cu_->instruction_set); } } @@ -771,9 +779,10 @@ void X86Mir2Lir::SpillFPRegs() { int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_)); const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; - for (int reg = 0; mask; mask >>= 1, reg++) { - if (mask & 0x1) { + for (int reg = 0; mask != 0u; mask >>= 1, reg++) { + if ((mask & 0x1) != 0u) { StoreBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg), k64, kNotVolatile); + cfi_.RelOffset(DwarfFpReg(cu_->target64, reg), offset); offset += sizeof(double); } } @@ -786,10 +795,11 @@ void X86Mir2Lir::UnSpillFPRegs() { int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_)); const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; - for (int reg = 0; mask; mask >>= 1, reg++) { - if (mask & 0x1) { + for (int reg = 0; mask != 0u; mask >>= 1, reg++) { + if ((mask & 0x1) != 0u) { LoadBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg), k64, kNotVolatile); + cfi_.Restore(DwarfFpReg(cu_->target64, reg)); offset += sizeof(double); } } @@ -825,21 +835,22 @@ RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatil X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) : Mir2Lir(cu, mir_graph, arena), in_to_reg_storage_x86_64_mapper_(this), in_to_reg_storage_x86_mapper_(this), - base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false), + pc_rel_base_reg_(RegStorage::InvalidReg()), + pc_rel_base_reg_used_(false), + setup_pc_rel_base_reg_(nullptr), method_address_insns_(arena->Adapter()), class_type_address_insns_(arena->Adapter()), call_method_insns_(arena->Adapter()), - stack_decrement_(nullptr), stack_increment_(nullptr), + dex_cache_access_insns_(arena->Adapter()), const_vectors_(nullptr) { method_address_insns_.reserve(100); class_type_address_insns_.reserve(100); call_method_insns_.reserve(100); - store_method_addr_used_ = false; - for (int i = 0; i < kX86Last; i++) { - DCHECK_EQ(X86Mir2Lir::EncodingMap[i].opcode, i) - << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name - << " is wrong: expecting " << i << ", seeing " - << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode); + for (int i = 0; i < kX86Last; i++) { + DCHECK_EQ(X86Mir2Lir::EncodingMap[i].opcode, i) + << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name + << " is wrong: expecting " << i << ", seeing " + << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode); } } @@ -924,14 +935,6 @@ void X86Mir2Lir::DumpRegLocation(RegLocation loc) { << ", orig: " << loc.orig_sreg; } -void X86Mir2Lir::Materialize() { - // A good place to put the analysis before starting. - AnalyzeMIR(); - - // Now continue with regular code generation. - Mir2Lir::Materialize(); -} - void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type, SpecialTargetRegister symbolic_reg) { /* @@ -1058,6 +1061,9 @@ void X86Mir2Lir::InstallLiteralPools() { } } + patches_.reserve(method_address_insns_.size() + class_type_address_insns_.size() + + call_method_insns_.size() + dex_cache_access_insns_.size()); + // Handle the fixups for methods. for (LIR* p : method_address_insns_) { DCHECK_EQ(p->opcode, kX86Mov32RI); @@ -1084,7 +1090,6 @@ void X86Mir2Lir::InstallLiteralPools() { } // And now the PC-relative calls to methods. - patches_.reserve(call_method_insns_.size()); for (LIR* p : call_method_insns_) { DCHECK_EQ(p->opcode, kX86CallI); uint32_t target_method_idx = p->operands[1]; @@ -1096,6 +1101,18 @@ void X86Mir2Lir::InstallLiteralPools() { target_dex_file, target_method_idx)); } + // PC-relative references to dex cache arrays. + for (LIR* p : dex_cache_access_insns_) { + DCHECK(p->opcode == kX86Mov32RM); + const DexFile* dex_file = UnwrapPointer<DexFile>(p->operands[3]); + uint32_t offset = p->operands[4]; + // The offset to patch is the last 4 bytes of the instruction. + int patch_offset = p->offset + p->flags.size - 4; + DCHECK(!p->flags.is_nop); + patches_.push_back(LinkerPatch::DexCacheArrayPatch(patch_offset, dex_file, + p->target->offset, offset)); + } + // And do the normal processing. Mir2Lir::InstallLiteralPools(); } @@ -1303,6 +1320,11 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { if (!cu_->target64) { // EDI is promotable in 32-bit mode. NewLIR1(kX86Push32R, rs_rDI.GetReg()); + cfi_.AdjustCFAOffset(4); + // Record cfi only if it is not already spilled. + if (!CoreSpillMaskContains(rs_rDI.GetReg())) { + cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0); + } } if (zero_based) { @@ -1398,8 +1420,13 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { // And join up at the end. all_done->target = NewLIR0(kPseudoTargetLabel); - if (!cu_->target64) + if (!cu_->target64) { NewLIR1(kX86Pop32R, rs_rDI.GetReg()); + cfi_.AdjustCFAOffset(-4); + if (!CoreSpillMaskContains(rs_rDI.GetReg())) { + cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetReg())); + } + } // Out of line code returns here. if (slowpath_branch != nullptr) { @@ -1412,100 +1439,6 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { return true; } -static bool ARTRegIDToDWARFRegID(bool is_x86_64, int art_reg_id, int* dwarf_reg_id) { - if (is_x86_64) { - switch (art_reg_id) { - case 3 : *dwarf_reg_id = 3; return true; // %rbx - // This is the only discrepancy between ART & DWARF register numbering. - case 5 : *dwarf_reg_id = 6; return true; // %rbp - case 12: *dwarf_reg_id = 12; return true; // %r12 - case 13: *dwarf_reg_id = 13; return true; // %r13 - case 14: *dwarf_reg_id = 14; return true; // %r14 - case 15: *dwarf_reg_id = 15; return true; // %r15 - default: return false; // Should not get here - } - } else { - switch (art_reg_id) { - case 5: *dwarf_reg_id = 5; return true; // %ebp - case 6: *dwarf_reg_id = 6; return true; // %esi - case 7: *dwarf_reg_id = 7; return true; // %edi - default: return false; // Should not get here - } - } -} - -std::vector<uint8_t>* X86Mir2Lir::ReturnFrameDescriptionEntry() { - std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>; - - // Generate the FDE for the method. - DCHECK_NE(data_offset_, 0U); - - WriteFDEHeader(cfi_info, cu_->target64); - WriteFDEAddressRange(cfi_info, data_offset_, cu_->target64); - - // The instructions in the FDE. - if (stack_decrement_ != nullptr) { - // Advance LOC to just past the stack decrement. - uint32_t pc = NEXT_LIR(stack_decrement_)->offset; - DW_CFA_advance_loc(cfi_info, pc); - - // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size. - DW_CFA_def_cfa_offset(cfi_info, frame_size_); - - // Handle register spills - const uint32_t kSpillInstLen = (cu_->target64) ? 5 : 4; - const int kDataAlignmentFactor = (cu_->target64) ? -8 : -4; - uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum()); - int offset = -(GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); - for (int reg = 0; mask; mask >>= 1, reg++) { - if (mask & 0x1) { - pc += kSpillInstLen; - - // Advance LOC to pass this instruction - DW_CFA_advance_loc(cfi_info, kSpillInstLen); - - int dwarf_reg_id; - if (ARTRegIDToDWARFRegID(cu_->target64, reg, &dwarf_reg_id)) { - // DW_CFA_offset_extended_sf reg offset - DW_CFA_offset_extended_sf(cfi_info, dwarf_reg_id, offset / kDataAlignmentFactor); - } - - offset += GetInstructionSetPointerSize(cu_->instruction_set); - } - } - - // We continue with that stack until the epilogue. - if (stack_increment_ != nullptr) { - uint32_t new_pc = NEXT_LIR(stack_increment_)->offset; - DW_CFA_advance_loc(cfi_info, new_pc - pc); - - // We probably have code snippets after the epilogue, so save the - // current state: DW_CFA_remember_state. - DW_CFA_remember_state(cfi_info); - - // We have now popped the stack: DW_CFA_def_cfa_offset 4/8. - // There is only the return PC on the stack now. - DW_CFA_def_cfa_offset(cfi_info, GetInstructionSetPointerSize(cu_->instruction_set)); - - // Everything after that is the same as before the epilogue. - // Stack bump was followed by RET instruction. - LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_)); - if (post_ret_insn != nullptr) { - pc = new_pc; - new_pc = post_ret_insn->offset; - DW_CFA_advance_loc(cfi_info, new_pc - pc); - // Restore the state: DW_CFA_restore_state. - DW_CFA_restore_state(cfi_info); - } - } - } - - PadCFI(cfi_info); - WriteCFILength(cfi_info, cu_->target64); - - return cfi_info; -} - void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) { switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) { case kMirOpReserveVectorRegisters: @@ -1642,20 +1575,17 @@ void X86Mir2Lir::AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir) { LIR* load; ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); if (cu_->target64) { - load = NewLIR3(opcode, reg, kRIPReg, 256 /* bogus */); + load = NewLIR3(opcode, reg, kRIPReg, kDummy32BitOffset); } else { - // Address the start of the method. - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - if (rl_method.wide) { - rl_method = LoadValueWide(rl_method, kCoreReg); - } else { - rl_method = LoadValue(rl_method, kCoreReg); + // Get the PC to a register and get the anchor. + LIR* anchor; + RegStorage r_pc = GetPcAndAnchor(&anchor); + + load = NewLIR3(opcode, reg, r_pc.GetReg(), kDummy32BitOffset); + load->operands[4] = WrapPointer(anchor); + if (IsTemp(r_pc)) { + FreeTemp(r_pc); } - - load = NewLIR3(opcode, reg, rl_method.reg.GetReg(), 256 /* bogus */); - - // The literal pool needs position independent logic. - store_method_addr_used_ = true; } load->flags.fixup = kFixupLoad; load->target = data_target; diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index 893b98a49d..efcb9eefb5 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -17,6 +17,7 @@ #include "codegen_x86.h" #include "base/logging.h" +#include "dex/mir_graph.h" #include "dex/quick/mir_to_lir-inl.h" #include "dex/dataflow_iterator-inl.h" #include "dex/quick/dex_file_method_inliner.h" @@ -574,7 +575,7 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { DCHECK(r_dest.IsDouble()); if (value == 0) { return NewLIR2(kX86XorpdRR, low_reg_val, low_reg_val); - } else if (base_of_code_ != nullptr || cu_->target64) { + } else if (pc_rel_base_reg_.Valid() || cu_->target64) { // We will load the value from the literal area. LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); if (data_target == NULL) { @@ -589,17 +590,16 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { if (cu_->target64) { res = NewLIR3(kX86MovsdRM, low_reg_val, kRIPReg, 256 /* bogus */); } else { - // Address the start of the method. - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - if (rl_method.wide) { - rl_method = LoadValueWide(rl_method, kCoreReg); - } else { - rl_method = LoadValue(rl_method, kCoreReg); - } + // Get the PC to a register and get the anchor. + LIR* anchor; + RegStorage r_pc = GetPcAndAnchor(&anchor); - res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val), + res = LoadBaseDisp(r_pc, kDummy32BitOffset, RegStorage::FloatSolo64(low_reg_val), kDouble, kNotVolatile); - store_method_addr_used_ = true; + res->operands[4] = WrapPointer(anchor); + if (IsTemp(r_pc)) { + FreeTemp(r_pc); + } } res->target = data_target; res->flags.fixup = kFixupLoad; @@ -954,82 +954,14 @@ LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegS return branch; } -void X86Mir2Lir::AnalyzeMIR() { - // Assume we don't need a pointer to the base of the code. - cu_->NewTimingSplit("X86 MIR Analysis"); - store_method_addr_ = false; - - // Walk the MIR looking for interesting items. - PreOrderDfsIterator iter(mir_graph_); - BasicBlock* curr_bb = iter.Next(); - while (curr_bb != NULL) { - AnalyzeBB(curr_bb); - curr_bb = iter.Next(); - } - - // Did we need a pointer to the method code? Not in 64 bit mode. - base_of_code_ = nullptr; - - // store_method_addr_ must be false for x86_64, since RIP addressing is used. - CHECK(!(cu_->target64 && store_method_addr_)); - if (store_method_addr_) { - base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false); - DCHECK(base_of_code_ != nullptr); - } -} - -void X86Mir2Lir::AnalyzeBB(BasicBlock* bb) { - if (bb->block_type == kDead) { - // Ignore dead blocks +void X86Mir2Lir::AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) { + if (cu_->target64) { + Mir2Lir::AnalyzeMIR(core_counts, mir, weight); return; } - for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { - int opcode = mir->dalvikInsn.opcode; - if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) { - AnalyzeExtendedMIR(opcode, bb, mir); - } else { - AnalyzeMIR(opcode, bb, mir); - } - } -} - - -void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir) { - switch (opcode) { - // Instructions referencing doubles. - case kMirOpFusedCmplDouble: - case kMirOpFusedCmpgDouble: - AnalyzeFPInstruction(opcode, bb, mir); - break; - case kMirOpConstVector: - if (!cu_->target64) { - store_method_addr_ = true; - } - break; - case kMirOpPackedMultiply: - case kMirOpPackedShiftLeft: - case kMirOpPackedSignedShiftRight: - case kMirOpPackedUnsignedShiftRight: - if (!cu_->target64) { - // Byte emulation requires constants from the literal pool. - OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16); - if (opsize == kSignedByte || opsize == kUnsignedByte) { - store_method_addr_ = true; - } - } - break; - default: - // Ignore the rest. - break; - } -} - -void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) { - // Looking for - // - Do we need a pointer to the code (used for packed switches and double lits)? - // 64 bit uses RIP addressing instead. - + int opcode = mir->dalvikInsn.opcode; + bool uses_pc_rel_load = false; switch (opcode) { // Instructions referencing doubles. case Instruction::CMPL_DOUBLE: @@ -1045,34 +977,62 @@ void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) { case Instruction::MUL_DOUBLE_2ADDR: case Instruction::DIV_DOUBLE_2ADDR: case Instruction::REM_DOUBLE_2ADDR: - AnalyzeFPInstruction(opcode, bb, mir); + case kMirOpFusedCmplDouble: + case kMirOpFusedCmpgDouble: + uses_pc_rel_load = AnalyzeFPInstruction(opcode, mir); break; - // Packed switches and array fills need a pointer to the base of the method. - case Instruction::FILL_ARRAY_DATA: + // Packed switch needs the PC-relative pointer if it's large. case Instruction::PACKED_SWITCH: - if (!cu_->target64) { - store_method_addr_ = true; + if (mir_graph_->GetTable(mir, mir->dalvikInsn.vB)[1] > kSmallSwitchThreshold) { + uses_pc_rel_load = true; } break; + + case kMirOpConstVector: + uses_pc_rel_load = true; + break; + case kMirOpPackedMultiply: + case kMirOpPackedShiftLeft: + case kMirOpPackedSignedShiftRight: + case kMirOpPackedUnsignedShiftRight: + { + // Byte emulation requires constants from the literal pool. + OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16); + if (opsize == kSignedByte || opsize == kUnsignedByte) { + uses_pc_rel_load = true; + } + } + break; + case Instruction::INVOKE_STATIC: case Instruction::INVOKE_STATIC_RANGE: - AnalyzeInvokeStatic(opcode, bb, mir); - break; + if (mir_graph_->GetMethodLoweringInfo(mir).IsIntrinsic()) { + uses_pc_rel_load = AnalyzeInvokeStaticIntrinsic(mir); + break; + } + FALLTHROUGH_INTENDED; default: - // Other instructions are not interesting yet. + Mir2Lir::AnalyzeMIR(core_counts, mir, weight); break; } + + if (uses_pc_rel_load) { + DCHECK(pc_rel_temp_ != nullptr); + core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight; + } } -void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir) { - UNUSED(bb); +bool X86Mir2Lir::AnalyzeFPInstruction(int opcode, MIR* mir) { + DCHECK(!cu_->target64); // Look at all the uses, and see if they are double constants. uint64_t attrs = MIRGraph::GetDataFlowAttributes(static_cast<Instruction::Code>(opcode)); int next_sreg = 0; if (attrs & DF_UA) { if (attrs & DF_A_WIDE) { - AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg)); + if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) { + return true; + } next_sreg += 2; } else { next_sreg++; @@ -1080,7 +1040,9 @@ void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir) { } if (attrs & DF_UB) { if (attrs & DF_B_WIDE) { - AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg)); + if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) { + return true; + } next_sreg += 2; } else { next_sreg++; @@ -1088,15 +1050,39 @@ void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir) { } if (attrs & DF_UC) { if (attrs & DF_C_WIDE) { - AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg)); + if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) { + return true; + } } } + return false; } -void X86Mir2Lir::AnalyzeDoubleUse(RegLocation use) { +inline bool X86Mir2Lir::AnalyzeDoubleUse(RegLocation use) { // If this is a double literal, we will want it in the literal pool on 32b platforms. - if (use.is_const && !cu_->target64) { - store_method_addr_ = true; + DCHECK(!cu_->target64); + return use.is_const; +} + +bool X86Mir2Lir::AnalyzeInvokeStaticIntrinsic(MIR* mir) { + // 64 bit RIP addressing doesn't need this analysis. + DCHECK(!cu_->target64); + + // Retrieve the type of the intrinsic. + MethodReference method_ref = mir_graph_->GetMethodLoweringInfo(mir).GetTargetMethod(); + DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr); + DexFileMethodInliner* method_inliner = + cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(method_ref.dex_file); + InlineMethod method; + bool is_intrinsic = method_inliner->IsIntrinsic(method_ref.dex_method_index, &method); + DCHECK(is_intrinsic); + + switch (method.opcode) { + case kIntrinsicAbsDouble: + case kIntrinsicMinMaxDouble: + return true; + default: + return false; } } @@ -1128,37 +1114,47 @@ RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc) { return loc; } -void X86Mir2Lir::AnalyzeInvokeStatic(int opcode, BasicBlock* bb, MIR* mir) { - UNUSED(opcode, bb); - - // 64 bit RIP addressing doesn't need store_method_addr_ set. +LIR* X86Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) { + UNUSED(r_tgt); // Call to absolute memory location doesn't need a temporary target register. if (cu_->target64) { - return; + return OpThreadMem(op, GetThreadOffset<8>(trampoline)); + } else { + return OpThreadMem(op, GetThreadOffset<4>(trampoline)); } +} - uint32_t index = mir->dalvikInsn.vB; - DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr); - DexFileMethodInliner* method_inliner = - cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file); - InlineMethod method; - if (method_inliner->IsIntrinsic(index, &method)) { - switch (method.opcode) { - case kIntrinsicAbsDouble: - case kIntrinsicMinMaxDouble: - store_method_addr_ = true; - break; - default: - break; +void X86Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) { + // Start with the default counts. + Mir2Lir::CountRefs(core_counts, fp_counts, num_regs); + + if (pc_rel_temp_ != nullptr) { + // Now, if the dex cache array base temp is used only once outside any loops (weight = 1), + // avoid the promotion, otherwise boost the weight by factor 2 because the full PC-relative + // load sequence is 3 instructions long and by promoting the PC base we save 2 instructions + // per use. + int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low); + if (core_counts[p_map_idx].count == 1) { + core_counts[p_map_idx].count = 0; + } else { + core_counts[p_map_idx].count *= 2; } } } -LIR* X86Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) { - UNUSED(r_tgt); // Call to absolute memory location doesn't need a temporary target register. - if (cu_->target64) { - return OpThreadMem(op, GetThreadOffset<8>(trampoline)); - } else { - return OpThreadMem(op, GetThreadOffset<4>(trampoline)); +void X86Mir2Lir::DoPromotion() { + if (!cu_->target64) { + pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false); + } + + Mir2Lir::DoPromotion(); + + if (pc_rel_temp_ != nullptr) { + // Now, if the dex cache array base temp is promoted, remember the register but + // always remove the temp's stack location to avoid unnecessarily bloating the stack. + pc_rel_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg; + DCHECK(!pc_rel_base_reg_.Valid() || !pc_rel_base_reg_.IsFloat()); + mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_); + pc_rel_temp_ = nullptr; } } diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 7dea09a579..57db0158e4 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -635,8 +635,6 @@ enum X86OpCode { kX86CallT, // call fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp kX86CallI, // call <relative> - 0: disp; Used for core.oat linking only kX86Ret, // ret; no lir operands - kX86StartOfMethod, // call 0; pop reg; sub reg, # - generate start of method into reg - // lir operands - 0: reg kX86PcRelLoadRA, // mov reg, [base + index * scale + PC relative displacement] // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table kX86PcRelAdr, // mov reg, PC relative displacement; lir operands - 0: reg, 1: table @@ -670,7 +668,6 @@ enum X86EncodingKind { kRegMemCond, // RM instruction kind followed by a condition. kJmp, kJcc, kCall, // Branch instruction kinds. kPcRel, // Operation with displacement that is PC relative - kMacro, // An instruction composing multiple others kUnimplemented // Encoding used when an instruction isn't yet implemented. }; diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 100d49a99e..c2b837512c 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -31,6 +31,7 @@ #include "base/timing_logger.h" #include "class_linker.h" #include "compiled_class.h" +#include "compiled_method.h" #include "compiler.h" #include "compiler_driver-inl.h" #include "dex_compilation_unit.h" @@ -62,6 +63,7 @@ #include "thread_pool.h" #include "trampolines/trampoline_compiler.h" #include "transaction.h" +#include "utils/dex_cache_arrays_layout-inl.h" #include "utils/swap_space.h" #include "verifier/method_verifier.h" #include "verifier/method_verifier-inl.h" @@ -348,6 +350,7 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, verification_results_(verification_results), method_inliner_map_(method_inliner_map), compiler_(Compiler::Create(this, compiler_kind)), + compiler_kind_(compiler_kind), instruction_set_(instruction_set), instruction_set_features_(instruction_set_features), freezing_constructor_lock_("freezing constructor lock"), @@ -1173,6 +1176,13 @@ uint32_t CompilerDriver::GetReferenceDisableFlagOffset() const { return klass->GetDisableIntrinsicFlagOffset().Uint32Value(); } +DexCacheArraysLayout CompilerDriver::GetDexCacheArraysLayout(const DexFile* dex_file) { + // Currently only image dex caches have fixed array layout. + return IsImage() && GetSupportBootImageFixup() + ? DexCacheArraysLayout(dex_file) + : DexCacheArraysLayout(); +} + void CompilerDriver::ProcessedInstanceField(bool resolved) { if (!resolved) { stats_->UnresolvedInstanceField(); @@ -2205,10 +2215,8 @@ void CompilerDriver::CompileMethod(Thread* self, const DexFile::CodeItem* code_i InstructionSetHasGenericJniStub(instruction_set_)) { // Leaving this empty will trigger the generic JNI version } else { - if (instruction_set_ != kMips64) { // Use generic JNI for Mips64 (temporarily). - compiled_method = compiler_->JniCompile(access_flags, method_idx, dex_file); - CHECK(compiled_method != nullptr); - } + compiled_method = compiler_->JniCompile(access_flags, method_idx, dex_file); + CHECK(compiled_method != nullptr); } } else if ((access_flags & kAccAbstract) != 0) { // Abstract methods don't have code. @@ -2246,7 +2254,7 @@ void CompilerDriver::CompileMethod(Thread* self, const DexFile::CodeItem* code_i // Count non-relative linker patches. size_t non_relative_linker_patch_count = 0u; for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (patch.Type() != kLinkerPatchCallRelative) { + if (!patch.IsPcRelative()) { ++non_relative_linker_patch_count; } } @@ -2263,8 +2271,11 @@ void CompilerDriver::CompileMethod(Thread* self, const DexFile::CodeItem* code_i DCHECK(GetCompiledMethod(method_ref) != nullptr) << PrettyMethod(method_idx, dex_file); } - // Done compiling, delete the verified method to reduce native memory usage. - verification_results_->RemoveVerifiedMethod(method_ref); + // Done compiling, delete the verified method to reduce native memory usage. Do not delete in + // optimizing compiler, which may need the verified method again for inlining. + if (compiler_kind_ != Compiler::kOptimizing) { + verification_results_->RemoveVerifiedMethod(method_ref); + } if (self->IsExceptionPending()) { ScopedObjectAccess soa(self); @@ -2359,44 +2370,6 @@ bool CompilerDriver::WriteElf(const std::string& android_root, SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { return compiler_->WriteElf(file, oat_writer, dex_files, android_root, is_host); } -void CompilerDriver::InstructionSetToLLVMTarget(InstructionSet instruction_set, - std::string* target_triple, - std::string* target_cpu, - std::string* target_attr) { - switch (instruction_set) { - case kThumb2: - *target_triple = "thumb-none-linux-gnueabi"; - *target_cpu = "cortex-a9"; - *target_attr = "+thumb2,+neon,+neonfp,+vfp3,+db"; - break; - - case kArm: - *target_triple = "armv7-none-linux-gnueabi"; - // TODO: Fix for Nexus S. - *target_cpu = "cortex-a9"; - // TODO: Fix for Xoom. - *target_attr = "+v7,+neon,+neonfp,+vfp3,+db"; - break; - - case kX86: - *target_triple = "i386-pc-linux-gnu"; - *target_attr = ""; - break; - - case kX86_64: - *target_triple = "x86_64-pc-linux-gnu"; - *target_attr = ""; - break; - - case kMips: - *target_triple = "mipsel-unknown-linux"; - *target_attr = "mips32r2"; - break; - - default: - LOG(FATAL) << "Unknown instruction set: " << instruction_set; - } - } bool CompilerDriver::SkipCompilation(const std::string& method_name) { if (!profile_present_) { @@ -2438,7 +2411,7 @@ std::string CompilerDriver::GetMemoryUsageString(bool extended) const { gc::Heap* const heap = runtime->GetHeap(); oss << "arena alloc=" << PrettySize(arena_pool->GetBytesAllocated()); oss << " java alloc=" << PrettySize(heap->GetBytesAllocated()); -#ifdef HAVE_MALLOC_H +#if defined(__BIONIC__) || defined(__GLIBC__) struct mallinfo info = mallinfo(); const size_t allocated_space = static_cast<size_t>(info.uordblks); const size_t free_space = static_cast<size_t>(info.fordblks); diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index b825293c33..a6ed5590dc 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -26,11 +26,8 @@ #include "base/mutex.h" #include "base/timing_logger.h" #include "class_reference.h" -#include "compiled_method.h" #include "compiler.h" #include "dex_file.h" -#include "dex/verified_method.h" -#include "driver/compiler_options.h" #include "invoke_type.h" #include "method_reference.h" #include "mirror/class.h" // For mirror::Class::Status. @@ -39,7 +36,9 @@ #include "runtime.h" #include "safe_map.h" #include "thread_pool.h" +#include "utils/array_ref.h" #include "utils/dedupe_set.h" +#include "utils/dex_cache_arrays_layout.h" #include "utils/swap_space.h" #include "utils.h" @@ -54,6 +53,7 @@ class MethodVerifier; } // namespace verifier class CompiledClass; +class CompiledMethod; class CompilerOptions; class DexCompilationUnit; class DexFileToMethodInlinerMap; @@ -62,6 +62,9 @@ class InstructionSetFeatures; class OatWriter; class ParallelCompilationManager; class ScopedObjectAccess; +template <class Allocator> class SrcMap; +class SrcMapElem; +using SwapSrcMap = SrcMap<SwapAllocator<SrcMapElem>>; template<class T> class Handle; class TimingLogger; class VerificationResults; @@ -318,6 +321,10 @@ class CompilerDriver { bool IsMethodsClassInitialized(mirror::Class* referrer_class, mirror::ArtMethod* resolved_method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + // Get the layout of dex cache arrays for a dex file. Returns invalid layout if the + // dex cache arrays don't have a fixed layout. + DexCacheArraysLayout GetDexCacheArraysLayout(const DexFile* dex_file); + void ProcessedInstanceField(bool resolved); void ProcessedStaticField(bool resolved, bool local); void ProcessedInvoke(InvokeType invoke_type, int flags); @@ -378,12 +385,6 @@ class CompilerDriver { OatWriter* oat_writer, File* file); - // TODO: move to a common home for llvm helpers once quick/portable are merged. - static void InstructionSetToLLVMTarget(InstructionSet instruction_set, - std::string* target_triple, - std::string* target_cpu, - std::string* target_attr); - void SetCompilerContext(void* compiler_context) { compiler_context_ = compiler_context; } @@ -550,6 +551,7 @@ class CompilerDriver { DexFileToMethodInlinerMap* const method_inliner_map_; std::unique_ptr<Compiler> compiler_; + Compiler::Kind compiler_kind_; const InstructionSet instruction_set_; const InstructionSetFeatures* const instruction_set_features_; diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index e436f52db3..fc00c926b2 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -42,6 +42,11 @@ CompilerOptions::CompilerOptions() init_failure_output_(nullptr) { } +CompilerOptions::~CompilerOptions() { + // The destructor looks empty but it destroys a PassManagerOptions object. We keep it here + // because we don't want to include the PassManagerOptions definition from the header file. +} + CompilerOptions::CompilerOptions(CompilerFilter compiler_filter, size_t huge_method_threshold, size_t large_method_threshold, diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index d06ec278ab..f7ea385e19 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -53,6 +53,7 @@ class CompilerOptions FINAL { static const bool kDefaultIncludePatchInformation = false; CompilerOptions(); + ~CompilerOptions(); CompilerOptions(CompilerFilter compiler_filter, size_t huge_method_threshold, diff --git a/compiler/dwarf/debug_frame_opcode_writer.h b/compiler/dwarf/debug_frame_opcode_writer.h new file mode 100644 index 0000000000..d0d182106f --- /dev/null +++ b/compiler/dwarf/debug_frame_opcode_writer.h @@ -0,0 +1,333 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_ +#define ART_COMPILER_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_ + +#include "dwarf.h" +#include "register.h" +#include "writer.h" +#include "utils.h" + +namespace art { +namespace dwarf { + +// Writer for .debug_frame opcodes (DWARF-3). +// See the DWARF specification for the precise meaning of the opcodes. +// The writer is very light-weight, however it will do the following for you: +// * Choose the most compact encoding of a given opcode. +// * Keep track of current state and convert absolute values to deltas. +// * Divide by header-defined factors as appropriate. +template<typename Allocator = std::allocator<uint8_t> > +class DebugFrameOpCodeWriter : private Writer<Allocator> { + public: + // To save space, DWARF divides most offsets by header-defined factors. + // They are used in integer divisions, so we make them constants. + // We usually subtract from stack base pointer, so making the factor + // negative makes the encoded values positive and thus easier to encode. + static constexpr int kDataAlignmentFactor = -4; + static constexpr int kCodeAlignmentFactor = 1; + + // Explicitely advance the program counter to given location. + void ALWAYS_INLINE AdvancePC(int absolute_pc) { + DCHECK_GE(absolute_pc, current_pc_); + if (UNLIKELY(enabled_)) { + int delta = FactorCodeOffset(absolute_pc - current_pc_); + if (delta != 0) { + if (delta <= 0x3F) { + this->PushUint8(DW_CFA_advance_loc | delta); + } else if (delta <= UINT8_MAX) { + this->PushUint8(DW_CFA_advance_loc1); + this->PushUint8(delta); + } else if (delta <= UINT16_MAX) { + this->PushUint8(DW_CFA_advance_loc2); + this->PushUint16(delta); + } else { + this->PushUint8(DW_CFA_advance_loc4); + this->PushUint32(delta); + } + } + current_pc_ = absolute_pc; + } + } + + // Override this method to automatically advance the PC before each opcode. + virtual void ImplicitlyAdvancePC() { } + + // Common alias in assemblers - spill relative to current stack pointer. + void ALWAYS_INLINE RelOffset(Reg reg, int offset) { + Offset(reg, offset - current_cfa_offset_); + } + + // Common alias in assemblers - increase stack frame size. + void ALWAYS_INLINE AdjustCFAOffset(int delta) { + DefCFAOffset(current_cfa_offset_ + delta); + } + + // Custom alias - spill many registers based on bitmask. + void ALWAYS_INLINE RelOffsetForMany(Reg reg_base, int offset, + uint32_t reg_mask, int reg_size) { + DCHECK(reg_size == 4 || reg_size == 8); + if (UNLIKELY(enabled_)) { + for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) { + // Skip zero bits and go to the set bit. + int num_zeros = CTZ(reg_mask); + i += num_zeros; + reg_mask >>= num_zeros; + RelOffset(Reg(reg_base.num() + i), offset); + offset += reg_size; + } + } + } + + // Custom alias - unspill many registers based on bitmask. + void ALWAYS_INLINE RestoreMany(Reg reg_base, uint32_t reg_mask) { + if (UNLIKELY(enabled_)) { + for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) { + // Skip zero bits and go to the set bit. + int num_zeros = CTZ(reg_mask); + i += num_zeros; + reg_mask >>= num_zeros; + Restore(Reg(reg_base.num() + i)); + } + } + } + + void ALWAYS_INLINE Nop() { + if (UNLIKELY(enabled_)) { + this->PushUint8(DW_CFA_nop); + } + } + + void ALWAYS_INLINE Offset(Reg reg, int offset) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + int factored_offset = FactorDataOffset(offset); // May change sign. + if (factored_offset >= 0) { + if (0 <= reg.num() && reg.num() <= 0x3F) { + this->PushUint8(DW_CFA_offset | reg.num()); + this->PushUleb128(factored_offset); + } else { + this->PushUint8(DW_CFA_offset_extended); + this->PushUleb128(reg.num()); + this->PushUleb128(factored_offset); + } + } else { + uses_dwarf3_features_ = true; + this->PushUint8(DW_CFA_offset_extended_sf); + this->PushUleb128(reg.num()); + this->PushSleb128(factored_offset); + } + } + } + + void ALWAYS_INLINE Restore(Reg reg) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + if (0 <= reg.num() && reg.num() <= 0x3F) { + this->PushUint8(DW_CFA_restore | reg.num()); + } else { + this->PushUint8(DW_CFA_restore_extended); + this->PushUleb128(reg.num()); + } + } + } + + void ALWAYS_INLINE Undefined(Reg reg) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + this->PushUint8(DW_CFA_undefined); + this->PushUleb128(reg.num()); + } + } + + void ALWAYS_INLINE SameValue(Reg reg) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + this->PushUint8(DW_CFA_same_value); + this->PushUleb128(reg.num()); + } + } + + // The previous value of "reg" is stored in register "new_reg". + void ALWAYS_INLINE Register(Reg reg, Reg new_reg) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + this->PushUint8(DW_CFA_register); + this->PushUleb128(reg.num()); + this->PushUleb128(new_reg.num()); + } + } + + void ALWAYS_INLINE RememberState() { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + this->PushUint8(DW_CFA_remember_state); + } + } + + void ALWAYS_INLINE RestoreState() { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + this->PushUint8(DW_CFA_restore_state); + } + } + + void ALWAYS_INLINE DefCFA(Reg reg, int offset) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + if (offset >= 0) { + this->PushUint8(DW_CFA_def_cfa); + this->PushUleb128(reg.num()); + this->PushUleb128(offset); // Non-factored. + } else { + uses_dwarf3_features_ = true; + this->PushUint8(DW_CFA_def_cfa_sf); + this->PushUleb128(reg.num()); + this->PushSleb128(FactorDataOffset(offset)); + } + } + current_cfa_offset_ = offset; + } + + void ALWAYS_INLINE DefCFARegister(Reg reg) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + this->PushUint8(DW_CFA_def_cfa_register); + this->PushUleb128(reg.num()); + } + } + + void ALWAYS_INLINE DefCFAOffset(int offset) { + if (UNLIKELY(enabled_)) { + if (current_cfa_offset_ != offset) { + ImplicitlyAdvancePC(); + if (offset >= 0) { + this->PushUint8(DW_CFA_def_cfa_offset); + this->PushUleb128(offset); // Non-factored. + } else { + uses_dwarf3_features_ = true; + this->PushUint8(DW_CFA_def_cfa_offset_sf); + this->PushSleb128(FactorDataOffset(offset)); + } + } + } + // Uncoditional so that the user can still get and check the value. + current_cfa_offset_ = offset; + } + + void ALWAYS_INLINE ValOffset(Reg reg, int offset) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + uses_dwarf3_features_ = true; + int factored_offset = FactorDataOffset(offset); // May change sign. + if (factored_offset >= 0) { + this->PushUint8(DW_CFA_val_offset); + this->PushUleb128(reg.num()); + this->PushUleb128(factored_offset); + } else { + this->PushUint8(DW_CFA_val_offset_sf); + this->PushUleb128(reg.num()); + this->PushSleb128(factored_offset); + } + } + } + + void ALWAYS_INLINE DefCFAExpression(void * expr, int expr_size) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + uses_dwarf3_features_ = true; + this->PushUint8(DW_CFA_def_cfa_expression); + this->PushUleb128(expr_size); + this->PushData(expr, expr_size); + } + } + + void ALWAYS_INLINE Expression(Reg reg, void * expr, int expr_size) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + uses_dwarf3_features_ = true; + this->PushUint8(DW_CFA_expression); + this->PushUleb128(reg.num()); + this->PushUleb128(expr_size); + this->PushData(expr, expr_size); + } + } + + void ALWAYS_INLINE ValExpression(Reg reg, void * expr, int expr_size) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + uses_dwarf3_features_ = true; + this->PushUint8(DW_CFA_val_expression); + this->PushUleb128(reg.num()); + this->PushUleb128(expr_size); + this->PushData(expr, expr_size); + } + } + + bool IsEnabled() const { return enabled_; } + + void SetEnabled(bool value) { enabled_ = value; } + + int GetCurrentPC() const { return current_pc_; } + + int GetCurrentCFAOffset() const { return current_cfa_offset_; } + + void SetCurrentCFAOffset(int offset) { current_cfa_offset_ = offset; } + + using Writer<Allocator>::data; + + DebugFrameOpCodeWriter(bool enabled = true, + const Allocator& alloc = Allocator()) + : Writer<Allocator>(&opcodes_), + enabled_(enabled), + opcodes_(alloc), + current_cfa_offset_(0), + current_pc_(0), + uses_dwarf3_features_(false) { + if (enabled) { + // Best guess based on couple of observed outputs. + opcodes_.reserve(16); + } + } + + virtual ~DebugFrameOpCodeWriter() { } + + protected: + int FactorDataOffset(int offset) const { + DCHECK_EQ(offset % kDataAlignmentFactor, 0); + return offset / kDataAlignmentFactor; + } + + int FactorCodeOffset(int offset) const { + DCHECK_EQ(offset % kCodeAlignmentFactor, 0); + return offset / kCodeAlignmentFactor; + } + + bool enabled_; // If disabled all writes are no-ops. + std::vector<uint8_t, Allocator> opcodes_; + int current_cfa_offset_; + int current_pc_; + bool uses_dwarf3_features_; + + private: + DISALLOW_COPY_AND_ASSIGN(DebugFrameOpCodeWriter); +}; + +} // namespace dwarf +} // namespace art + +#endif // ART_COMPILER_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_ diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h new file mode 100644 index 0000000000..c0350b6f8a --- /dev/null +++ b/compiler/dwarf/debug_info_entry_writer.h @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_ +#define ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_ + +#include <unordered_map> + +#include "dwarf.h" +#include "leb128.h" +#include "writer.h" + +namespace art { +namespace dwarf { + +// 32-bit FNV-1a hash function which we use to find duplicate abbreviations. +// See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function +template< typename Allocator > +struct FNVHash { + size_t operator()(const std::vector<uint8_t, Allocator>& v) const { + uint32_t hash = 2166136261u; + for (size_t i = 0; i < v.size(); i++) { + hash = (hash ^ v[i]) * 16777619u; + } + return hash; + } +}; + +/* + * Writer for debug information entries (DIE). + * It also handles generation of abbreviations. + * + * Usage: + * StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes); + * WriteStrp(DW_AT_producer, "Compiler name", debug_str); + * StartTag(DW_TAG_subprogram, DW_CHILDREN_no); + * WriteStrp(DW_AT_name, "Foo", debug_str); + * EndTag(); + * EndTag(); + */ +template< typename Allocator = std::allocator<uint8_t> > +class DebugInfoEntryWriter FINAL : private Writer<Allocator> { + public: + // Start debugging information entry. + void StartTag(Tag tag, Children children) { + DCHECK(has_children) << "This tag can not have nested tags"; + if (inside_entry_) { + // Write abbrev code for the previous entry. + this->UpdateUleb128(abbrev_code_offset_, EndAbbrev()); + inside_entry_ = false; + } + StartAbbrev(tag, children); + // Abbrev code placeholder of sufficient size. + abbrev_code_offset_ = this->data()->size(); + this->PushUleb128(NextAbbrevCode()); + depth_++; + inside_entry_ = true; + has_children = (children == DW_CHILDREN_yes); + } + + // End debugging information entry. + void EndTag() { + DCHECK_GT(depth_, 0); + if (inside_entry_) { + // Write abbrev code for this tag. + this->UpdateUleb128(abbrev_code_offset_, EndAbbrev()); + inside_entry_ = false; + } + if (has_children) { + this->PushUint8(0); // End of children. + } + depth_--; + has_children = true; // Parent tag obviously has children. + } + + void WriteAddr(Attribute attrib, uint64_t value) { + AddAbbrevAttribute(attrib, DW_FORM_addr); + if (is64bit_) { + this->PushUint64(value); + } else { + this->PushUint32(value); + } + } + + void WriteBlock(Attribute attrib, const void* ptr, int size) { + AddAbbrevAttribute(attrib, DW_FORM_block); + this->PushUleb128(size); + this->PushData(ptr, size); + } + + void WriteData1(Attribute attrib, uint8_t value) { + AddAbbrevAttribute(attrib, DW_FORM_data1); + this->PushUint8(value); + } + + void WriteData2(Attribute attrib, uint16_t value) { + AddAbbrevAttribute(attrib, DW_FORM_data2); + this->PushUint16(value); + } + + void WriteData4(Attribute attrib, uint32_t value) { + AddAbbrevAttribute(attrib, DW_FORM_data4); + this->PushUint32(value); + } + + void WriteData8(Attribute attrib, uint64_t value) { + AddAbbrevAttribute(attrib, DW_FORM_data8); + this->PushUint64(value); + } + + void WriteSdata(Attribute attrib, int value) { + AddAbbrevAttribute(attrib, DW_FORM_sdata); + this->PushSleb128(value); + } + + void WriteUdata(Attribute attrib, int value) { + AddAbbrevAttribute(attrib, DW_FORM_udata); + this->PushUleb128(value); + } + + void WriteUdata(Attribute attrib, uint32_t value) { + AddAbbrevAttribute(attrib, DW_FORM_udata); + this->PushUleb128(value); + } + + void WriteFlag(Attribute attrib, bool value) { + AddAbbrevAttribute(attrib, DW_FORM_flag); + this->PushUint8(value ? 1 : 0); + } + + void WriteRef4(Attribute attrib, int cu_offset) { + AddAbbrevAttribute(attrib, DW_FORM_ref4); + this->PushUint32(cu_offset); + } + + void WriteRef(Attribute attrib, int cu_offset) { + AddAbbrevAttribute(attrib, DW_FORM_ref_udata); + this->PushUleb128(cu_offset); + } + + void WriteString(Attribute attrib, const char* value) { + AddAbbrevAttribute(attrib, DW_FORM_string); + this->PushString(value); + } + + void WriteStrp(Attribute attrib, int address) { + AddAbbrevAttribute(attrib, DW_FORM_strp); + this->PushUint32(address); + } + + void WriteStrp(Attribute attrib, const char* value, std::vector<uint8_t>* debug_str) { + AddAbbrevAttribute(attrib, DW_FORM_strp); + int address = debug_str->size(); + debug_str->insert(debug_str->end(), value, value + strlen(value) + 1); + this->PushUint32(address); + } + + bool is64bit() const { return is64bit_; } + + using Writer<Allocator>::data; + + DebugInfoEntryWriter(bool is64bitArch, + std::vector<uint8_t, Allocator>* debug_abbrev, + const Allocator& alloc = Allocator()) + : Writer<Allocator>(&entries_), + debug_abbrev_(debug_abbrev), + current_abbrev_(alloc), + abbrev_codes_(alloc), + entries_(alloc), + is64bit_(is64bitArch) { + debug_abbrev_.PushUint8(0); // Add abbrev table terminator. + } + + ~DebugInfoEntryWriter() { + DCHECK_EQ(depth_, 0); + } + + private: + // Start abbreviation declaration. + void StartAbbrev(Tag tag, Children children) { + DCHECK(!inside_entry_); + current_abbrev_.clear(); + EncodeUnsignedLeb128(¤t_abbrev_, tag); + current_abbrev_.push_back(children); + } + + // Add attribute specification. + void AddAbbrevAttribute(Attribute name, Form type) { + DCHECK(inside_entry_) << "Call StartTag before adding attributes."; + EncodeUnsignedLeb128(¤t_abbrev_, name); + EncodeUnsignedLeb128(¤t_abbrev_, type); + } + + int NextAbbrevCode() { + return 1 + abbrev_codes_.size(); + } + + // End abbreviation declaration and return its code. + int EndAbbrev() { + DCHECK(inside_entry_); + auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_), + NextAbbrevCode())); + int abbrev_code = it.first->second; + if (UNLIKELY(it.second)) { // Inserted new entry. + const std::vector<uint8_t, Allocator>& abbrev = it.first->first; + debug_abbrev_.Pop(); // Remove abbrev table terminator. + debug_abbrev_.PushUleb128(abbrev_code); + debug_abbrev_.PushData(abbrev.data(), abbrev.size()); + debug_abbrev_.PushUint8(0); // Attribute list end. + debug_abbrev_.PushUint8(0); // Attribute list end. + debug_abbrev_.PushUint8(0); // Add abbrev table terminator. + } + return abbrev_code; + } + + private: + // Fields for writing and deduplication of abbrevs. + Writer<Allocator> debug_abbrev_; + std::vector<uint8_t, Allocator> current_abbrev_; + std::unordered_map<std::vector<uint8_t, Allocator>, int, + FNVHash<Allocator> > abbrev_codes_; + + // Fields for writing of debugging information entries. + std::vector<uint8_t, Allocator> entries_; + bool is64bit_; + int depth_ = 0; + size_t abbrev_code_offset_ = 0; // Location to patch once we know the code. + bool inside_entry_ = false; // Entry ends at first child (if any). + bool has_children = true; +}; + +} // namespace dwarf +} // namespace art + +#endif // ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_ diff --git a/compiler/dwarf/debug_line_opcode_writer.h b/compiler/dwarf/debug_line_opcode_writer.h new file mode 100644 index 0000000000..f34acee647 --- /dev/null +++ b/compiler/dwarf/debug_line_opcode_writer.h @@ -0,0 +1,243 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DWARF_DEBUG_LINE_OPCODE_WRITER_H_ +#define ART_COMPILER_DWARF_DEBUG_LINE_OPCODE_WRITER_H_ + +#include "dwarf.h" +#include "writer.h" + +namespace art { +namespace dwarf { + +// Writer for the .debug_line opcodes (DWARF-3). +// The writer is very light-weight, however it will do the following for you: +// * Choose the most compact encoding of a given opcode. +// * Keep track of current state and convert absolute values to deltas. +// * Divide by header-defined factors as appropriate. +template<typename Allocator = std::allocator<uint8_t>> +class DebugLineOpCodeWriter FINAL : private Writer<Allocator> { + public: + static constexpr int kOpcodeBase = 13; + static constexpr bool kDefaultIsStmt = true; + static constexpr int kLineBase = -5; + static constexpr int kLineRange = 14; + + void AddRow() { + this->PushUint8(DW_LNS_copy); + } + + void AdvancePC(uint64_t absolute_address) { + DCHECK_NE(current_address_, 0u); // Use SetAddress for the first advance. + DCHECK_GE(absolute_address, current_address_); + if (absolute_address != current_address_) { + uint64_t delta = FactorCodeOffset(absolute_address - current_address_); + if (delta <= INT32_MAX) { + this->PushUint8(DW_LNS_advance_pc); + this->PushUleb128(static_cast<int>(delta)); + current_address_ = absolute_address; + } else { + SetAddress(absolute_address); + } + } + } + + void AdvanceLine(int absolute_line) { + int delta = absolute_line - current_line_; + if (delta != 0) { + this->PushUint8(DW_LNS_advance_line); + this->PushSleb128(delta); + current_line_ = absolute_line; + } + } + + void SetFile(int file) { + if (current_file_ != file) { + this->PushUint8(DW_LNS_set_file); + this->PushUleb128(file); + current_file_ = file; + } + } + + void SetColumn(int column) { + this->PushUint8(DW_LNS_set_column); + this->PushUleb128(column); + } + + void NegateStmt() { + this->PushUint8(DW_LNS_negate_stmt); + } + + void SetBasicBlock() { + this->PushUint8(DW_LNS_set_basic_block); + } + + void SetPrologueEnd() { + uses_dwarf3_features_ = true; + this->PushUint8(DW_LNS_set_prologue_end); + } + + void SetEpilogueBegin() { + uses_dwarf3_features_ = true; + this->PushUint8(DW_LNS_set_epilogue_begin); + } + + void SetISA(int isa) { + uses_dwarf3_features_ = true; + this->PushUint8(DW_LNS_set_isa); + this->PushUleb128(isa); + } + + void EndSequence() { + this->PushUint8(0); + this->PushUleb128(1); + this->PushUint8(DW_LNE_end_sequence); + current_address_ = 0; + current_file_ = 1; + current_line_ = 1; + } + + // Uncoditionally set address using the long encoding. + // This gives the linker opportunity to relocate the address. + void SetAddress(uint64_t absolute_address) { + DCHECK_GE(absolute_address, current_address_); + FactorCodeOffset(absolute_address); // Check if it is factorable. + this->PushUint8(0); + if (use_64bit_address_) { + this->PushUleb128(1 + 8); + this->PushUint8(DW_LNE_set_address); + this->PushUint64(absolute_address); + } else { + this->PushUleb128(1 + 4); + this->PushUint8(DW_LNE_set_address); + this->PushUint32(absolute_address); + } + current_address_ = absolute_address; + } + + void DefineFile(const char* filename, + int directory_index, + int modification_time, + int file_size) { + int size = 1 + + strlen(filename) + 1 + + UnsignedLeb128Size(directory_index) + + UnsignedLeb128Size(modification_time) + + UnsignedLeb128Size(file_size); + this->PushUint8(0); + this->PushUleb128(size); + size_t start = data()->size(); + this->PushUint8(DW_LNE_define_file); + this->PushString(filename); + this->PushUleb128(directory_index); + this->PushUleb128(modification_time); + this->PushUleb128(file_size); + DCHECK_EQ(start + size, data()->size()); + } + + // Compact address and line opcode. + void AddRow(uint64_t absolute_address, int absolute_line) { + DCHECK_GE(absolute_address, current_address_); + + // If the address is definitely too far, use the long encoding. + uint64_t delta_address = FactorCodeOffset(absolute_address - current_address_); + if (delta_address > UINT8_MAX) { + AdvancePC(absolute_address); + delta_address = 0; + } + + // If the line is definitely too far, use the long encoding. + int delta_line = absolute_line - current_line_; + if (!(kLineBase <= delta_line && delta_line < kLineBase + kLineRange)) { + AdvanceLine(absolute_line); + delta_line = 0; + } + + // Both address and line should be reasonable now. Use the short encoding. + int opcode = kOpcodeBase + (delta_line - kLineBase) + + (static_cast<int>(delta_address) * kLineRange); + if (opcode > UINT8_MAX) { + // If the address is still too far, try to increment it by const amount. + int const_advance = (0xFF - kOpcodeBase) / kLineRange; + opcode -= (kLineRange * const_advance); + if (opcode <= UINT8_MAX) { + this->PushUint8(DW_LNS_const_add_pc); + } else { + // Give up and use long encoding for address. + AdvancePC(absolute_address); + // Still use the opcode to do line advance and copy. + opcode = kOpcodeBase + (delta_line - kLineBase); + } + } + DCHECK(kOpcodeBase <= opcode && opcode <= 0xFF); + this->PushUint8(opcode); // Special opcode. + current_line_ = absolute_line; + current_address_ = absolute_address; + } + + int GetCodeFactorBits() const { + return code_factor_bits_; + } + + uint64_t CurrentAddress() const { + return current_address_; + } + + int CurrentFile() const { + return current_file_; + } + + int CurrentLine() const { + return current_line_; + } + + using Writer<Allocator>::data; + + DebugLineOpCodeWriter(bool use64bitAddress, + int codeFactorBits, + const Allocator& alloc = Allocator()) + : Writer<Allocator>(&opcodes_), + opcodes_(alloc), + uses_dwarf3_features_(false), + use_64bit_address_(use64bitAddress), + code_factor_bits_(codeFactorBits), + current_address_(0), + current_file_(1), + current_line_(1) { + } + + private: + uint64_t FactorCodeOffset(uint64_t offset) const { + DCHECK_GE(code_factor_bits_, 0); + DCHECK_EQ((offset >> code_factor_bits_) << code_factor_bits_, offset); + return offset >> code_factor_bits_; + } + + std::vector<uint8_t, Allocator> opcodes_; + bool uses_dwarf3_features_; + bool use_64bit_address_; + int code_factor_bits_; + uint64_t current_address_; + int current_file_; + int current_line_; + + DISALLOW_COPY_AND_ASSIGN(DebugLineOpCodeWriter); +}; + +} // namespace dwarf +} // namespace art + +#endif // ART_COMPILER_DWARF_DEBUG_LINE_OPCODE_WRITER_H_ diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc new file mode 100644 index 0000000000..ec18e96b4b --- /dev/null +++ b/compiler/dwarf/dwarf_test.cc @@ -0,0 +1,281 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dwarf_test.h" + +#include "dwarf/debug_frame_opcode_writer.h" +#include "dwarf/debug_info_entry_writer.h" +#include "dwarf/debug_line_opcode_writer.h" +#include "dwarf/headers.h" +#include "gtest/gtest.h" + +namespace art { +namespace dwarf { + +// Run the tests only on host since we need objdump. +#ifndef HAVE_ANDROID_OS + +TEST_F(DwarfTest, DebugFrame) { + const bool is64bit = false; + + // Pick offset value which would catch Uleb vs Sleb errors. + const int offset = 40000; + ASSERT_EQ(UnsignedLeb128Size(offset / 4), 2u); + ASSERT_EQ(SignedLeb128Size(offset / 4), 3u); + DW_CHECK("Data alignment factor: -4"); + const Reg reg(6); + + // Test the opcodes in the order mentioned in the spec. + // There are usually several encoding variations of each opcode. + DebugFrameOpCodeWriter<> opcodes; + DW_CHECK("FDE"); + int pc = 0; + for (int i : {0, 1, 0x3F, 0x40, 0xFF, 0x100, 0xFFFF, 0x10000}) { + pc += i; + opcodes.AdvancePC(pc); + } + DW_CHECK_NEXT("DW_CFA_advance_loc: 1 to 01000001"); + DW_CHECK_NEXT("DW_CFA_advance_loc: 63 to 01000040"); + DW_CHECK_NEXT("DW_CFA_advance_loc1: 64 to 01000080"); + DW_CHECK_NEXT("DW_CFA_advance_loc1: 255 to 0100017f"); + DW_CHECK_NEXT("DW_CFA_advance_loc2: 256 to 0100027f"); + DW_CHECK_NEXT("DW_CFA_advance_loc2: 65535 to 0101027e"); + DW_CHECK_NEXT("DW_CFA_advance_loc4: 65536 to 0102027e"); + opcodes.DefCFA(reg, offset); + DW_CHECK_NEXT("DW_CFA_def_cfa: r6 (esi) ofs 40000"); + opcodes.DefCFA(reg, -offset); + DW_CHECK_NEXT("DW_CFA_def_cfa_sf: r6 (esi) ofs -40000"); + opcodes.DefCFARegister(reg); + DW_CHECK_NEXT("DW_CFA_def_cfa_register: r6 (esi)"); + opcodes.DefCFAOffset(offset); + DW_CHECK_NEXT("DW_CFA_def_cfa_offset: 40000"); + opcodes.DefCFAOffset(-offset); + DW_CHECK_NEXT("DW_CFA_def_cfa_offset_sf: -40000"); + uint8_t expr[] = { 0 }; + opcodes.DefCFAExpression(expr, arraysize(expr)); + DW_CHECK_NEXT("DW_CFA_def_cfa_expression"); + opcodes.Undefined(reg); + DW_CHECK_NEXT("DW_CFA_undefined: r6 (esi)"); + opcodes.SameValue(reg); + DW_CHECK_NEXT("DW_CFA_same_value: r6 (esi)"); + opcodes.Offset(Reg(0x3F), -offset); + // Bad register likely means that it does not exist on x86, + // but we want to test high register numbers anyway. + DW_CHECK_NEXT("DW_CFA_offset: bad register: r63 at cfa-40000"); + opcodes.Offset(Reg(0x40), -offset); + DW_CHECK_NEXT("DW_CFA_offset_extended: bad register: r64 at cfa-40000"); + opcodes.Offset(Reg(0x40), offset); + DW_CHECK_NEXT("DW_CFA_offset_extended_sf: bad register: r64 at cfa+40000"); + opcodes.ValOffset(reg, -offset); + DW_CHECK_NEXT("DW_CFA_val_offset: r6 (esi) at cfa-40000"); + opcodes.ValOffset(reg, offset); + DW_CHECK_NEXT("DW_CFA_val_offset_sf: r6 (esi) at cfa+40000"); + opcodes.Register(reg, Reg(1)); + DW_CHECK_NEXT("DW_CFA_register: r6 (esi) in r1 (ecx)"); + opcodes.Expression(reg, expr, arraysize(expr)); + DW_CHECK_NEXT("DW_CFA_expression: r6 (esi)"); + opcodes.ValExpression(reg, expr, arraysize(expr)); + DW_CHECK_NEXT("DW_CFA_val_expression: r6 (esi)"); + opcodes.Restore(Reg(0x3F)); + DW_CHECK_NEXT("DW_CFA_restore: bad register: r63"); + opcodes.Restore(Reg(0x40)); + DW_CHECK_NEXT("DW_CFA_restore_extended: bad register: r64"); + opcodes.Restore(reg); + DW_CHECK_NEXT("DW_CFA_restore: r6 (esi)"); + opcodes.RememberState(); + DW_CHECK_NEXT("DW_CFA_remember_state"); + opcodes.RestoreState(); + DW_CHECK_NEXT("DW_CFA_restore_state"); + opcodes.Nop(); + DW_CHECK_NEXT("DW_CFA_nop"); + + // Also test helpers. + opcodes.DefCFA(Reg(4), 100); // ESP + DW_CHECK_NEXT("DW_CFA_def_cfa: r4 (esp) ofs 100"); + opcodes.AdjustCFAOffset(8); + DW_CHECK_NEXT("DW_CFA_def_cfa_offset: 108"); + opcodes.RelOffset(Reg(0), 0); // push R0 + DW_CHECK_NEXT("DW_CFA_offset: r0 (eax) at cfa-108"); + opcodes.RelOffset(Reg(1), 4); // push R1 + DW_CHECK_NEXT("DW_CFA_offset: r1 (ecx) at cfa-104"); + opcodes.RelOffsetForMany(Reg(2), 8, 1 | (1 << 3), 4); // push R2 and R5 + DW_CHECK_NEXT("DW_CFA_offset: r2 (edx) at cfa-100"); + DW_CHECK_NEXT("DW_CFA_offset: r5 (ebp) at cfa-96"); + opcodes.RestoreMany(Reg(2), 1 | (1 << 3)); // pop R2 and R5 + DW_CHECK_NEXT("DW_CFA_restore: r2 (edx)"); + DW_CHECK_NEXT("DW_CFA_restore: r5 (ebp)"); + + DebugFrameOpCodeWriter<> initial_opcodes; + WriteEhFrameCIE(is64bit, Reg(is64bit ? 16 : 8), initial_opcodes, &eh_frame_data_); + WriteEhFrameFDE(is64bit, 0, 0x01000000, 0x01000000, opcodes.data(), &eh_frame_data_); + CheckObjdumpOutput(is64bit, "-W"); +} + +// TODO: objdump seems to have trouble with 64bit CIE length. +TEST_F(DwarfTest, DISABLED_DebugFrame64) { + constexpr bool is64bit = true; + DebugFrameOpCodeWriter<> initial_opcodes; + WriteEhFrameCIE(is64bit, Reg(16), initial_opcodes, &eh_frame_data_); + DebugFrameOpCodeWriter<> opcodes; + WriteEhFrameFDE(is64bit, 0, 0x0100000000000000, 0x0200000000000000, + opcodes.data(), &eh_frame_data_); + DW_CHECK("FDE cie=00000000 pc=100000000000000..300000000000000"); + CheckObjdumpOutput(is64bit, "-W"); +} + +TEST_F(DwarfTest, DebugLine) { + const bool is64bit = false; + const int code_factor_bits = 1; + DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits); + + std::vector<std::string> include_directories; + include_directories.push_back("/path/to/source"); + DW_CHECK("/path/to/source"); + + std::vector<FileEntry> files { + { "file0.c", 0, 1000, 2000 }, + { "file1.c", 1, 1000, 2000 }, + { "file2.c", 1, 1000, 2000 }, + }; + DW_CHECK("1\t0\t1000\t2000\tfile0.c"); + DW_CHECK_NEXT("2\t1\t1000\t2000\tfile1.c"); + DW_CHECK_NEXT("3\t1\t1000\t2000\tfile2.c"); + + DW_CHECK("Line Number Statements"); + opcodes.SetAddress(0x01000000); + DW_CHECK_NEXT("Extended opcode 2: set Address to 0x1000000"); + opcodes.AddRow(); + DW_CHECK_NEXT("Copy"); + opcodes.AdvancePC(0x01000100); + DW_CHECK_NEXT("Advance PC by 256 to 0x1000100"); + opcodes.SetFile(2); + DW_CHECK_NEXT("Set File Name to entry 2 in the File Name Table"); + opcodes.AdvanceLine(3); + DW_CHECK_NEXT("Advance Line by 2 to 3"); + opcodes.SetColumn(4); + DW_CHECK_NEXT("Set column to 4"); + opcodes.NegateStmt(); + DW_CHECK_NEXT("Set is_stmt to 0"); + opcodes.SetBasicBlock(); + DW_CHECK_NEXT("Set basic block"); + opcodes.SetPrologueEnd(); + DW_CHECK_NEXT("Set prologue_end to true"); + opcodes.SetEpilogueBegin(); + DW_CHECK_NEXT("Set epilogue_begin to true"); + opcodes.SetISA(5); + DW_CHECK_NEXT("Set ISA to 5"); + opcodes.EndSequence(); + DW_CHECK_NEXT("Extended opcode 1: End of Sequence"); + opcodes.DefineFile("file.c", 0, 1000, 2000); + DW_CHECK_NEXT("Extended opcode 3: define new File Table entry"); + DW_CHECK_NEXT("Entry\tDir\tTime\tSize\tName"); + DW_CHECK_NEXT("1\t0\t1000\t2000\tfile.c"); + + WriteDebugLineTable(include_directories, files, opcodes, &debug_line_data_); + CheckObjdumpOutput(is64bit, "-W"); +} + +// DWARF has special one byte codes which advance PC and line at the same time. +TEST_F(DwarfTest, DebugLineSpecialOpcodes) { + const bool is64bit = false; + const int code_factor_bits = 1; + uint32_t pc = 0x01000000; + int line = 1; + DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits); + opcodes.SetAddress(pc); + size_t num_rows = 0; + DW_CHECK("Line Number Statements:"); + DW_CHECK("Special opcode"); + DW_CHECK("Advance PC by constant"); + DW_CHECK("Decoded dump of debug contents of section .debug_line:"); + DW_CHECK("Line number Starting address"); + for (int addr_delta = 0; addr_delta < 80; addr_delta += 2) { + for (int line_delta = 16; line_delta >= -16; --line_delta) { + pc += addr_delta; + line += line_delta; + opcodes.AddRow(pc, line); + num_rows++; + ASSERT_EQ(opcodes.CurrentAddress(), pc); + ASSERT_EQ(opcodes.CurrentLine(), line); + char expected[1024]; + sprintf(expected, "%i 0x%x", line, pc); + DW_CHECK_NEXT(expected); + } + } + EXPECT_LT(opcodes.data()->size(), num_rows * 3); + + std::vector<std::string> directories; + std::vector<FileEntry> files { { "file.c", 0, 1000, 2000 } }; // NOLINT + WriteDebugLineTable(directories, files, opcodes, &debug_line_data_); + CheckObjdumpOutput(is64bit, "-W -WL"); +} + +TEST_F(DwarfTest, DebugInfo) { + constexpr bool is64bit = false; + DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_data_); + DW_CHECK("Contents of the .debug_info section:"); + info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes); + DW_CHECK("Abbrev Number: 1 (DW_TAG_compile_unit)"); + info.WriteStrp(dwarf::DW_AT_producer, "Compiler name", &debug_str_data_); + DW_CHECK_NEXT("DW_AT_producer : (indirect string, offset: 0x0): Compiler name"); + info.WriteAddr(dwarf::DW_AT_low_pc, 0x01000000); + DW_CHECK_NEXT("DW_AT_low_pc : 0x1000000"); + info.WriteAddr(dwarf::DW_AT_high_pc, 0x02000000); + DW_CHECK_NEXT("DW_AT_high_pc : 0x2000000"); + info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no); + DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)"); + info.WriteStrp(dwarf::DW_AT_name, "Foo", &debug_str_data_); + DW_CHECK_NEXT("DW_AT_name : (indirect string, offset: 0xe): Foo"); + info.WriteAddr(dwarf::DW_AT_low_pc, 0x01010000); + DW_CHECK_NEXT("DW_AT_low_pc : 0x1010000"); + info.WriteAddr(dwarf::DW_AT_high_pc, 0x01020000); + DW_CHECK_NEXT("DW_AT_high_pc : 0x1020000"); + info.EndTag(); // DW_TAG_subprogram + info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no); + DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)"); + info.WriteStrp(dwarf::DW_AT_name, "Bar", &debug_str_data_); + DW_CHECK_NEXT("DW_AT_name : (indirect string, offset: 0x12): Bar"); + info.WriteAddr(dwarf::DW_AT_low_pc, 0x01020000); + DW_CHECK_NEXT("DW_AT_low_pc : 0x1020000"); + info.WriteAddr(dwarf::DW_AT_high_pc, 0x01030000); + DW_CHECK_NEXT("DW_AT_high_pc : 0x1030000"); + info.EndTag(); // DW_TAG_subprogram + info.EndTag(); // DW_TAG_compile_unit + // Test that previous list was properly terminated and empty children. + info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes); + info.EndTag(); // DW_TAG_compile_unit + + // The abbrev table is just side product, but check it as well. + DW_CHECK("Abbrev Number: 3 (DW_TAG_compile_unit)"); + DW_CHECK("Contents of the .debug_abbrev section:"); + DW_CHECK("1 DW_TAG_compile_unit [has children]"); + DW_CHECK_NEXT("DW_AT_producer DW_FORM_strp"); + DW_CHECK_NEXT("DW_AT_low_pc DW_FORM_addr"); + DW_CHECK_NEXT("DW_AT_high_pc DW_FORM_addr"); + DW_CHECK("2 DW_TAG_subprogram [no children]"); + DW_CHECK_NEXT("DW_AT_name DW_FORM_strp"); + DW_CHECK_NEXT("DW_AT_low_pc DW_FORM_addr"); + DW_CHECK_NEXT("DW_AT_high_pc DW_FORM_addr"); + DW_CHECK("3 DW_TAG_compile_unit [has children]"); + + dwarf::WriteDebugInfoCU(0 /* debug_abbrev_offset */, info, &debug_info_data_); + CheckObjdumpOutput(is64bit, "-W"); +} + +#endif // HAVE_ANDROID_OS + +} // namespace dwarf +} // namespace art diff --git a/compiler/dwarf/dwarf_test.h b/compiler/dwarf/dwarf_test.h new file mode 100644 index 0000000000..dd5e0c286e --- /dev/null +++ b/compiler/dwarf/dwarf_test.h @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DWARF_DWARF_TEST_H_ +#define ART_COMPILER_DWARF_DWARF_TEST_H_ + +#include <cstring> +#include <dirent.h> +#include <memory> +#include <set> +#include <stdio.h> +#include <string> +#include <sys/types.h> + +#include "utils.h" +#include "base/unix_file/fd_file.h" +#include "common_runtime_test.h" +#include "elf_builder.h" +#include "gtest/gtest.h" +#include "os.h" + +namespace art { +namespace dwarf { + +#define DW_CHECK(substring) Check(substring, false, __FILE__, __LINE__) +#define DW_CHECK_NEXT(substring) Check(substring, true, __FILE__, __LINE__) + +class DwarfTest : public CommonRuntimeTest { + public: + static constexpr bool kPrintObjdumpOutput = false; // debugging. + + struct ExpectedLine { + std::string substring; + bool next; + const char* at_file; + int at_line; + }; + + // Check that the objdump output contains given output. + // If next is true, it must be the next line. Otherwise lines are skipped. + void Check(const char* substr, bool next, const char* at_file, int at_line) { + expected_lines_.push_back(ExpectedLine {substr, next, at_file, at_line}); + } + + static std::string GetObjdumpPath() { + const char* android_build_top = getenv("ANDROID_BUILD_TOP"); + if (android_build_top != nullptr) { + std::string host_prebuilts = std::string(android_build_top) + + "/prebuilts/gcc/linux-x86/host/"; + // Read the content of the directory. + std::set<std::string> entries; + DIR* dir = opendir(host_prebuilts.c_str()); + if (dir != nullptr) { + struct dirent* entry; + while ((entry = readdir(dir)) != nullptr) { + if (strstr(entry->d_name, "linux-glibc")) { + entries.insert(host_prebuilts + entry->d_name); + } + } + closedir(dir); + } + // Strings are sorted so the last one should be the most recent version. + if (!entries.empty()) { + std::string path = *entries.rbegin() + "/x86_64-linux/bin/objdump"; + struct stat st; + if (stat(path.c_str(), &st) == 0) { + return path; // File exists. + } + } + } + ADD_FAILURE() << "Can not find prebuild objdump."; + return "objdump"; // Use the system objdump as fallback. + } + + // Pretty-print the generated DWARF data using objdump. + template<typename Elf_Word, typename Elf_Sword, typename Elf_Addr, typename Elf_Dyn, + typename Elf_Sym, typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr> + std::vector<std::string> Objdump(bool is64bit, const char* args) { + // Write simple elf file with just the DWARF sections. + class NoCode : public CodeOutput { + virtual void SetCodeOffset(size_t) { } + virtual bool Write(OutputStream*) { return true; } + } code; + ScratchFile file; + InstructionSet isa = is64bit ? kX86_64 : kX86; + ElfBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn, + Elf_Sym, Elf_Ehdr, Elf_Phdr, Elf_Shdr> builder( + &code, file.GetFile(), isa, 0, 0, 0, 0, 0, 0, false, false); + typedef ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> Section; + if (!debug_info_data_.empty()) { + Section debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + debug_info.SetBuffer(debug_info_data_); + builder.RegisterRawSection(debug_info); + } + if (!debug_abbrev_data_.empty()) { + Section debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + debug_abbrev.SetBuffer(debug_abbrev_data_); + builder.RegisterRawSection(debug_abbrev); + } + if (!debug_str_data_.empty()) { + Section debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + debug_str.SetBuffer(debug_str_data_); + builder.RegisterRawSection(debug_str); + } + if (!debug_line_data_.empty()) { + Section debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + debug_line.SetBuffer(debug_line_data_); + builder.RegisterRawSection(debug_line); + } + if (!eh_frame_data_.empty()) { + Section eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0); + eh_frame.SetBuffer(eh_frame_data_); + builder.RegisterRawSection(eh_frame); + } + builder.Init(); + builder.Write(); + + // Read the elf file back using objdump. + std::vector<std::string> lines; + std::string cmd = GetObjdumpPath(); + cmd = cmd + " " + args + " " + file.GetFilename() + " 2>&1"; + FILE* output = popen(cmd.data(), "r"); + char buffer[1024]; + const char* line; + while ((line = fgets(buffer, sizeof(buffer), output)) != nullptr) { + if (kPrintObjdumpOutput) { + printf("%s", line); + } + if (line[0] != '\0' && line[0] != '\n') { + EXPECT_TRUE(strstr(line, "objdump: Error:") == nullptr) << line; + EXPECT_TRUE(strstr(line, "objdump: Warning:") == nullptr) << line; + std::string str(line); + if (str.back() == '\n') { + str.pop_back(); + } + lines.push_back(str); + } + } + pclose(output); + return lines; + } + + std::vector<std::string> Objdump(bool is64bit, const char* args) { + if (is64bit) { + return Objdump<Elf64_Word, Elf64_Sword, Elf64_Addr, Elf64_Dyn, + Elf64_Sym, Elf64_Ehdr, Elf64_Phdr, Elf64_Shdr>(is64bit, args); + } else { + return Objdump<Elf32_Word, Elf32_Sword, Elf32_Addr, Elf32_Dyn, + Elf32_Sym, Elf32_Ehdr, Elf32_Phdr, Elf32_Shdr>(is64bit, args); + } + } + + // Compare objdump output to the recorded checks. + void CheckObjdumpOutput(bool is64bit, const char* args) { + std::vector<std::string> actual_lines = Objdump(is64bit, args); + auto actual_line = actual_lines.begin(); + for (const ExpectedLine& expected_line : expected_lines_) { + const std::string& substring = expected_line.substring; + if (actual_line == actual_lines.end()) { + ADD_FAILURE_AT(expected_line.at_file, expected_line.at_line) << + "Expected '" << substring << "'.\n" << + "Seen end of output."; + } else if (expected_line.next) { + if (actual_line->find(substring) == std::string::npos) { + ADD_FAILURE_AT(expected_line.at_file, expected_line.at_line) << + "Expected '" << substring << "'.\n" << + "Seen '" << actual_line->data() << "'."; + } else { + // printf("Found '%s' in '%s'.\n", substring.data(), actual_line->data()); + } + actual_line++; + } else { + bool found = false; + for (auto it = actual_line; it < actual_lines.end(); it++) { + if (it->find(substring) != std::string::npos) { + actual_line = it; + found = true; + break; + } + } + if (!found) { + ADD_FAILURE_AT(expected_line.at_file, expected_line.at_line) << + "Expected '" << substring << "'.\n" << + "Not found anywhere in the rest of the output."; + } else { + // printf("Found '%s' in '%s'.\n", substring.data(), actual_line->data()); + actual_line++; + } + } + } + } + + // Buffers which are going to assembled into ELF file and passed to objdump. + std::vector<uint8_t> eh_frame_data_; + std::vector<uint8_t> debug_info_data_; + std::vector<uint8_t> debug_abbrev_data_; + std::vector<uint8_t> debug_str_data_; + std::vector<uint8_t> debug_line_data_; + + // The expected output of objdump. + std::vector<ExpectedLine> expected_lines_; +}; + +} // namespace dwarf +} // namespace art + +#endif // ART_COMPILER_DWARF_DWARF_TEST_H_ diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h new file mode 100644 index 0000000000..d866b91ae7 --- /dev/null +++ b/compiler/dwarf/headers.h @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DWARF_HEADERS_H_ +#define ART_COMPILER_DWARF_HEADERS_H_ + +#include "debug_frame_opcode_writer.h" +#include "debug_info_entry_writer.h" +#include "debug_line_opcode_writer.h" +#include "register.h" +#include "writer.h" + +namespace art { +namespace dwarf { + +// Write common information entry (CIE) to .eh_frame section. +template<typename Allocator> +void WriteEhFrameCIE(bool is64bit, Reg return_address_register, + const DebugFrameOpCodeWriter<Allocator>& opcodes, + std::vector<uint8_t>* eh_frame) { + Writer<> writer(eh_frame); + size_t cie_header_start_ = writer.data()->size(); + if (is64bit) { + // TODO: This is not related to being 64bit. + writer.PushUint32(0xffffffff); + writer.PushUint64(0); // Length placeholder. + writer.PushUint64(0); // CIE id. + } else { + writer.PushUint32(0); // Length placeholder. + writer.PushUint32(0); // CIE id. + } + writer.PushUint8(1); // Version. + writer.PushString("zR"); + writer.PushUleb128(DebugFrameOpCodeWriter<Allocator>::kCodeAlignmentFactor); + writer.PushSleb128(DebugFrameOpCodeWriter<Allocator>::kDataAlignmentFactor); + writer.PushUleb128(return_address_register.num()); // ubyte in DWARF2. + writer.PushUleb128(1); // z: Augmentation data size. + if (is64bit) { + writer.PushUint8(0x04); // R: ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata8). + } else { + writer.PushUint8(0x03); // R: ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4). + } + writer.PushData(opcodes.data()); + writer.Pad(is64bit ? 8 : 4); + if (is64bit) { + writer.UpdateUint64(cie_header_start_ + 4, writer.data()->size() - cie_header_start_ - 12); + } else { + writer.UpdateUint32(cie_header_start_, writer.data()->size() - cie_header_start_ - 4); + } +} + +// Write frame description entry (FDE) to .eh_frame section. +template<typename Allocator> +void WriteEhFrameFDE(bool is64bit, size_t cie_offset, + uint64_t initial_address, uint64_t address_range, + const std::vector<uint8_t, Allocator>* opcodes, + std::vector<uint8_t>* eh_frame) { + Writer<> writer(eh_frame); + size_t fde_header_start = writer.data()->size(); + if (is64bit) { + // TODO: This is not related to being 64bit. + writer.PushUint32(0xffffffff); + writer.PushUint64(0); // Length placeholder. + uint64_t cie_pointer = writer.data()->size() - cie_offset; + writer.PushUint64(cie_pointer); + } else { + writer.PushUint32(0); // Length placeholder. + uint32_t cie_pointer = writer.data()->size() - cie_offset; + writer.PushUint32(cie_pointer); + } + if (is64bit) { + writer.PushUint64(initial_address); + writer.PushUint64(address_range); + } else { + writer.PushUint32(initial_address); + writer.PushUint32(address_range); + } + writer.PushUleb128(0); // Augmentation data size. + writer.PushData(opcodes); + writer.Pad(is64bit ? 8 : 4); + if (is64bit) { + writer.UpdateUint64(fde_header_start + 4, writer.data()->size() - fde_header_start - 12); + } else { + writer.UpdateUint32(fde_header_start, writer.data()->size() - fde_header_start - 4); + } +} + +// Write compilation unit (CU) to .debug_info section. +template<typename Allocator> +void WriteDebugInfoCU(uint32_t debug_abbrev_offset, + const DebugInfoEntryWriter<Allocator>& entries, + std::vector<uint8_t>* debug_info) { + Writer<> writer(debug_info); + size_t start = writer.data()->size(); + writer.PushUint32(0); // Length placeholder. + writer.PushUint16(3); // Version. + writer.PushUint32(debug_abbrev_offset); + writer.PushUint8(entries.is64bit() ? 8 : 4); + writer.PushData(entries.data()); + writer.UpdateUint32(start, writer.data()->size() - start - 4); +} + +struct FileEntry { + std::string file_name; + int directory_index; + int modification_time; + int file_size; +}; + +// Write line table to .debug_line section. +template<typename Allocator> +void WriteDebugLineTable(const std::vector<std::string>& include_directories, + const std::vector<FileEntry>& files, + const DebugLineOpCodeWriter<Allocator>& opcodes, + std::vector<uint8_t>* debug_line) { + Writer<> writer(debug_line); + size_t header_start = writer.data()->size(); + writer.PushUint32(0); // Section-length placeholder. + // Claim DWARF-2 version even though we use some DWARF-3 features. + // DWARF-2 consumers will ignore the unknown opcodes. + // This is what clang currently does. + writer.PushUint16(2); // .debug_line version. + size_t header_length_pos = writer.data()->size(); + writer.PushUint32(0); // Header-length placeholder. + writer.PushUint8(1 << opcodes.GetCodeFactorBits()); + writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kDefaultIsStmt ? 1 : 0); + writer.PushInt8(DebugLineOpCodeWriter<Allocator>::kLineBase); + writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kLineRange); + writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kOpcodeBase); + static const int opcode_lengths[DebugLineOpCodeWriter<Allocator>::kOpcodeBase] = { + 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 }; + for (int i = 1; i < DebugLineOpCodeWriter<Allocator>::kOpcodeBase; i++) { + writer.PushUint8(opcode_lengths[i]); + } + for (const std::string& directory : include_directories) { + writer.PushData(directory.data(), directory.size() + 1); + } + writer.PushUint8(0); // Terminate include_directories list. + for (const FileEntry& file : files) { + writer.PushData(file.file_name.data(), file.file_name.size() + 1); + writer.PushUleb128(file.directory_index); + writer.PushUleb128(file.modification_time); + writer.PushUleb128(file.file_size); + } + writer.PushUint8(0); // Terminate file list. + writer.UpdateUint32(header_length_pos, writer.data()->size() - header_length_pos - 4); + writer.PushData(opcodes.data()->data(), opcodes.data()->size()); + writer.UpdateUint32(header_start, writer.data()->size() - header_start - 4); +} + +} // namespace dwarf +} // namespace art + +#endif // ART_COMPILER_DWARF_HEADERS_H_ diff --git a/compiler/dwarf/register.h b/compiler/dwarf/register.h new file mode 100644 index 0000000000..fa666dffa9 --- /dev/null +++ b/compiler/dwarf/register.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DWARF_REGISTER_H_ +#define ART_COMPILER_DWARF_REGISTER_H_ + +namespace art { +namespace dwarf { + +// Represents DWARF register. +class Reg { + public: + explicit Reg(int reg_num) : num_(reg_num) { } + int num() const { return num_; } + + // TODO: Arm S0–S31 register mapping is obsolescent. + // We should use VFP-v3/Neon D0-D31 mapping instead. + // However, D0 is aliased to pair of S0 and S1, so using that + // mapping we can not easily say S0 is spilled and S1 is not. + // There are ways around this in DWARF but they are complex. + // It would be much simpler to always spill whole D registers. + // Arm64 mapping is correct since we already do this there. + + static Reg ArmCore(int num) { return Reg(num); } + static Reg ArmFp(int num) { return Reg(64 + num); } // S0–S31. + static Reg Arm64Core(int num) { return Reg(num); } + static Reg Arm64Fp(int num) { return Reg(64 + num); } // V0-V31. + static Reg MipsCore(int num) { return Reg(num); } + static Reg Mips64Core(int num) { return Reg(num); } + static Reg X86Core(int num) { return Reg(num); } + static Reg X86Fp(int num) { return Reg(21 + num); } + static Reg X86_64Core(int num) { + static const int map[8] = {0, 2, 1, 3, 7, 6, 4, 5}; + return Reg(num < 8 ? map[num] : num); + } + static Reg X86_64Fp(int num) { return Reg(17 + num); } + + private: + int num_; +}; + +} // namespace dwarf +} // namespace art + +#endif // ART_COMPILER_DWARF_REGISTER_H_ diff --git a/compiler/dwarf/writer.h b/compiler/dwarf/writer.h new file mode 100644 index 0000000000..3b9c55866a --- /dev/null +++ b/compiler/dwarf/writer.h @@ -0,0 +1,173 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DWARF_WRITER_H_ +#define ART_COMPILER_DWARF_WRITER_H_ + +#include <vector> +#include "leb128.h" +#include "base/logging.h" +#include "utils.h" + +namespace art { +namespace dwarf { + +// The base class for all DWARF writers. +template<typename Allocator = std::allocator<uint8_t>> +class Writer { + public: + void PushUint8(int value) { + DCHECK_GE(value, 0); + DCHECK_LE(value, UINT8_MAX); + data_->push_back(value & 0xff); + } + + void PushUint16(int value) { + DCHECK_GE(value, 0); + DCHECK_LE(value, UINT16_MAX); + data_->push_back((value >> 0) & 0xff); + data_->push_back((value >> 8) & 0xff); + } + + void PushUint32(uint32_t value) { + data_->push_back((value >> 0) & 0xff); + data_->push_back((value >> 8) & 0xff); + data_->push_back((value >> 16) & 0xff); + data_->push_back((value >> 24) & 0xff); + } + + void PushUint32(int value) { + DCHECK_GE(value, 0); + PushUint32(static_cast<uint32_t>(value)); + } + + void PushUint32(uint64_t value) { + DCHECK_LE(value, UINT32_MAX); + PushUint32(static_cast<uint32_t>(value)); + } + + void PushUint64(uint64_t value) { + data_->push_back((value >> 0) & 0xff); + data_->push_back((value >> 8) & 0xff); + data_->push_back((value >> 16) & 0xff); + data_->push_back((value >> 24) & 0xff); + data_->push_back((value >> 32) & 0xff); + data_->push_back((value >> 40) & 0xff); + data_->push_back((value >> 48) & 0xff); + data_->push_back((value >> 56) & 0xff); + } + + void PushInt8(int value) { + DCHECK_GE(value, INT8_MIN); + DCHECK_LE(value, INT8_MAX); + PushUint8(static_cast<uint8_t>(value)); + } + + void PushInt16(int value) { + DCHECK_GE(value, INT16_MIN); + DCHECK_LE(value, INT16_MAX); + PushUint16(static_cast<uint16_t>(value)); + } + + void PushInt32(int value) { + PushUint32(static_cast<uint32_t>(value)); + } + + void PushInt64(int64_t value) { + PushUint64(static_cast<uint64_t>(value)); + } + + // Variable-length encoders. + + void PushUleb128(uint32_t value) { + EncodeUnsignedLeb128(data_, value); + } + + void PushUleb128(int value) { + DCHECK_GE(value, 0); + EncodeUnsignedLeb128(data_, value); + } + + void PushSleb128(int value) { + EncodeSignedLeb128(data_, value); + } + + // Miscellaneous functions. + + void PushString(const char* value) { + data_->insert(data_->end(), value, value + strlen(value) + 1); + } + + void PushData(const void* ptr, size_t size) { + const char* p = reinterpret_cast<const char*>(ptr); + data_->insert(data_->end(), p, p + size); + } + + template<typename Allocator2> + void PushData(const std::vector<uint8_t, Allocator2>* buffer) { + data_->insert(data_->end(), buffer->begin(), buffer->end()); + } + + void UpdateUint32(size_t offset, uint32_t value) { + DCHECK_LT(offset + 3, data_->size()); + (*data_)[offset + 0] = (value >> 0) & 0xFF; + (*data_)[offset + 1] = (value >> 8) & 0xFF; + (*data_)[offset + 2] = (value >> 16) & 0xFF; + (*data_)[offset + 3] = (value >> 24) & 0xFF; + } + + void UpdateUint64(size_t offset, uint64_t value) { + DCHECK_LT(offset + 7, data_->size()); + (*data_)[offset + 0] = (value >> 0) & 0xFF; + (*data_)[offset + 1] = (value >> 8) & 0xFF; + (*data_)[offset + 2] = (value >> 16) & 0xFF; + (*data_)[offset + 3] = (value >> 24) & 0xFF; + (*data_)[offset + 4] = (value >> 32) & 0xFF; + (*data_)[offset + 5] = (value >> 40) & 0xFF; + (*data_)[offset + 6] = (value >> 48) & 0xFF; + (*data_)[offset + 7] = (value >> 56) & 0xFF; + } + + void UpdateUleb128(size_t offset, uint32_t value) { + DCHECK_LE(offset + UnsignedLeb128Size(value), data_->size()); + UpdateUnsignedLeb128(data_->data() + offset, value); + } + + void Pop() { + return data_->pop_back(); + } + + void Pad(int alignment) { + DCHECK_NE(alignment, 0); + data_->resize(RoundUp(data_->size(), alignment), 0); + } + + const std::vector<uint8_t, Allocator>* data() const { + return data_; + } + + explicit Writer(std::vector<uint8_t, Allocator>* buffer) : data_(buffer) { } + + private: + std::vector<uint8_t, Allocator>* data_; + + DISALLOW_COPY_AND_ASSIGN(Writer); +}; + +} // namespace dwarf +} // namespace art + +#endif // ART_COMPILER_DWARF_WRITER_H_ diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h index 9ab3602606..124ed03c21 100644 --- a/compiler/elf_builder.h +++ b/compiler/elf_builder.h @@ -40,6 +40,7 @@ class ElfSectionBuilder : public ValueObject { section_.sh_addralign = align; section_.sh_entsize = entsize; } + ElfSectionBuilder(const ElfSectionBuilder&) = default; ~ElfSectionBuilder() {} @@ -144,6 +145,7 @@ class ElfRawSectionBuilder FINAL : public ElfSectionBuilder<Elf_Word, Elf_Sword, : ElfSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr>(sec_name, type, flags, link, info, align, entsize) { } + ElfRawSectionBuilder(const ElfRawSectionBuilder&) = default; ~ElfRawSectionBuilder() {} diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc new file mode 100644 index 0000000000..5e8e24b035 --- /dev/null +++ b/compiler/elf_writer_debug.cc @@ -0,0 +1,360 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "elf_writer_debug.h" + +#include "compiled_method.h" +#include "driver/compiler_driver.h" +#include "dex_file-inl.h" +#include "dwarf/headers.h" +#include "dwarf/register.h" +#include "oat_writer.h" + +namespace art { +namespace dwarf { + +static void WriteEhFrameCIE(InstructionSet isa, std::vector<uint8_t>* eh_frame) { + // Scratch registers should be marked as undefined. This tells the + // debugger that its value in the previous frame is not recoverable. + bool is64bit = Is64BitInstructionSet(isa); + switch (isa) { + case kArm: + case kThumb2: { + DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(Reg::ArmCore(13), 0); // R13(SP). + // core registers. + for (int reg = 0; reg < 13; reg++) { + if (reg < 4 || reg == 12) { + opcodes.Undefined(Reg::ArmCore(reg)); + } else { + opcodes.SameValue(Reg::ArmCore(reg)); + } + } + // fp registers. + for (int reg = 0; reg < 32; reg++) { + if (reg < 16) { + opcodes.Undefined(Reg::ArmFp(reg)); + } else { + opcodes.SameValue(Reg::ArmFp(reg)); + } + } + auto return_address_reg = Reg::ArmCore(14); // R14(LR). + WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame); + return; + } + case kArm64: { + DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(Reg::Arm64Core(31), 0); // R31(SP). + // core registers. + for (int reg = 0; reg < 30; reg++) { + if (reg < 8 || reg == 16 || reg == 17) { + opcodes.Undefined(Reg::Arm64Core(reg)); + } else { + opcodes.SameValue(Reg::Arm64Core(reg)); + } + } + // fp registers. + for (int reg = 0; reg < 32; reg++) { + if (reg < 8 || reg >= 16) { + opcodes.Undefined(Reg::Arm64Fp(reg)); + } else { + opcodes.SameValue(Reg::Arm64Fp(reg)); + } + } + auto return_address_reg = Reg::Arm64Core(30); // R30(LR). + WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame); + return; + } + case kMips: + case kMips64: { + DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(Reg::MipsCore(29), 0); // R29(SP). + // core registers. + for (int reg = 1; reg < 26; reg++) { + if (reg < 16 || reg == 24 || reg == 25) { // AT, V*, A*, T*. + opcodes.Undefined(Reg::MipsCore(reg)); + } else { + opcodes.SameValue(Reg::MipsCore(reg)); + } + } + auto return_address_reg = Reg::MipsCore(31); // R31(RA). + WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame); + return; + } + case kX86: { + DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(Reg::X86Core(4), 4); // R4(ESP). + opcodes.Offset(Reg::X86Core(8), -4); // R8(EIP). + // core registers. + for (int reg = 0; reg < 8; reg++) { + if (reg <= 3) { + opcodes.Undefined(Reg::X86Core(reg)); + } else if (reg == 4) { + // Stack pointer. + } else { + opcodes.SameValue(Reg::X86Core(reg)); + } + } + // fp registers. + for (int reg = 0; reg < 8; reg++) { + opcodes.Undefined(Reg::X86Fp(reg)); + } + auto return_address_reg = Reg::X86Core(8); // R8(EIP). + WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame); + return; + } + case kX86_64: { + DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(Reg::X86_64Core(4), 8); // R4(RSP). + opcodes.Offset(Reg::X86_64Core(16), -8); // R16(RIP). + // core registers. + for (int reg = 0; reg < 16; reg++) { + if (reg == 4) { + // Stack pointer. + } else if (reg < 12 && reg != 3 && reg != 5) { // except EBX and EBP. + opcodes.Undefined(Reg::X86_64Core(reg)); + } else { + opcodes.SameValue(Reg::X86_64Core(reg)); + } + } + // fp registers. + for (int reg = 0; reg < 16; reg++) { + if (reg < 12) { + opcodes.Undefined(Reg::X86_64Fp(reg)); + } else { + opcodes.SameValue(Reg::X86_64Fp(reg)); + } + } + auto return_address_reg = Reg::X86_64Core(16); // R16(RIP). + WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame); + return; + } + case kNone: + break; + } + LOG(FATAL) << "Can not write CIE frame for ISA " << isa; + UNREACHABLE(); +} + +/* + * @brief Generate the DWARF sections. + * @param oat_writer The Oat file Writer. + * @param eh_frame Call Frame Information. + * @param debug_info Compilation unit information. + * @param debug_abbrev Abbreviations used to generate dbg_info. + * @param debug_str Debug strings. + * @param debug_line Line number table. + */ +void WriteDebugSections(const CompilerDriver* compiler, + const OatWriter* oat_writer, + uint32_t text_section_offset, + std::vector<uint8_t>* eh_frame, + std::vector<uint8_t>* debug_info, + std::vector<uint8_t>* debug_abbrev, + std::vector<uint8_t>* debug_str, + std::vector<uint8_t>* debug_line) { + const std::vector<OatWriter::DebugInfo>& method_infos = oat_writer->GetMethodDebugInfo(); + const InstructionSet isa = compiler->GetInstructionSet(); + uint32_t cunit_low_pc = static_cast<uint32_t>(-1); + uint32_t cunit_high_pc = 0; + for (auto method_info : method_infos) { + cunit_low_pc = std::min(cunit_low_pc, method_info.low_pc_); + cunit_high_pc = std::max(cunit_high_pc, method_info.high_pc_); + } + + // Write .eh_frame section. + size_t cie_offset = eh_frame->size(); + WriteEhFrameCIE(isa, eh_frame); + for (const OatWriter::DebugInfo& mi : method_infos) { + const SwapVector<uint8_t>* opcodes = mi.compiled_method_->GetCFIInfo(); + if (opcodes != nullptr) { + WriteEhFrameFDE(Is64BitInstructionSet(isa), cie_offset, + text_section_offset + mi.low_pc_, mi.high_pc_ - mi.low_pc_, + opcodes, eh_frame); + } + } + + // Write .debug_info section. + size_t debug_abbrev_offset = debug_abbrev->size(); + DebugInfoEntryWriter<> info(false /* 32 bit */, debug_abbrev); + info.StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes); + info.WriteStrp(DW_AT_producer, "Android dex2oat", debug_str); + info.WriteData1(DW_AT_language, DW_LANG_Java); + info.WriteAddr(DW_AT_low_pc, cunit_low_pc + text_section_offset); + info.WriteAddr(DW_AT_high_pc, cunit_high_pc + text_section_offset); + info.WriteData4(DW_AT_stmt_list, debug_line->size()); + for (auto method_info : method_infos) { + std::string method_name = PrettyMethod(method_info.dex_method_index_, + *method_info.dex_file_, true); + if (method_info.deduped_) { + // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol + // so that it will show up in a debuggerd crash report. + method_name += " [ DEDUPED ]"; + } + info.StartTag(DW_TAG_subprogram, DW_CHILDREN_no); + info.WriteStrp(DW_AT_name, method_name.data(), debug_str); + info.WriteAddr(DW_AT_low_pc, method_info.low_pc_ + text_section_offset); + info.WriteAddr(DW_AT_high_pc, method_info.high_pc_ + text_section_offset); + info.EndTag(); // DW_TAG_subprogram + } + info.EndTag(); // DW_TAG_compile_unit + WriteDebugInfoCU(debug_abbrev_offset, info, debug_info); + + // TODO: in gdb info functions <regexp> - reports Java functions, but + // source file is <unknown> because .debug_line is formed as one + // compilation unit. To fix this it is possible to generate + // a separate compilation unit for every distinct Java source. + // Each of the these compilation units can have several non-adjacent + // method ranges. + + // Write .debug_line section. + std::vector<FileEntry> files; + std::unordered_map<std::string, size_t> files_map; + std::vector<std::string> directories; + std::unordered_map<std::string, size_t> directories_map; + int code_factor_bits_ = 0; + int dwarf_isa = -1; + switch (isa) { + case kArm: // arm actually means thumb2. + case kThumb2: + code_factor_bits_ = 1; // 16-bit instuctions + dwarf_isa = 1; // DW_ISA_ARM_thumb. + break; + case kArm64: + case kMips: + case kMips64: + code_factor_bits_ = 2; // 32-bit instructions + break; + case kNone: + case kX86: + case kX86_64: + break; + } + DebugLineOpCodeWriter<> opcodes(false /* 32bit */, code_factor_bits_); + opcodes.SetAddress(text_section_offset + cunit_low_pc); + if (dwarf_isa != -1) { + opcodes.SetISA(dwarf_isa); + } + for (const OatWriter::DebugInfo& mi : method_infos) { + // Addresses in the line table should be unique and increasing. + if (mi.deduped_) { + continue; + } + + struct DebugInfoCallbacks { + static bool NewPosition(void* ctx, uint32_t address, uint32_t line) { + auto* context = reinterpret_cast<DebugInfoCallbacks*>(ctx); + context->dex2line_.push_back({address, static_cast<int32_t>(line)}); + return false; + } + DefaultSrcMap dex2line_; + } debug_info_callbacks; + + const DexFile* dex = mi.dex_file_; + if (mi.code_item_ != nullptr) { + dex->DecodeDebugInfo(mi.code_item_, + (mi.access_flags_ & kAccStatic) != 0, + mi.dex_method_index_, + DebugInfoCallbacks::NewPosition, + nullptr, + &debug_info_callbacks); + } + + // Get and deduplicate directory and filename. + int file_index = 0; // 0 - primary source file of the compilation. + auto& dex_class_def = dex->GetClassDef(mi.class_def_index_); + const char* source_file = dex->GetSourceFile(dex_class_def); + if (source_file != nullptr) { + std::string file_name(source_file); + size_t file_name_slash = file_name.find_last_of('/'); + std::string class_name(dex->GetClassDescriptor(dex_class_def)); + size_t class_name_slash = class_name.find_last_of('/'); + std::string full_path(file_name); + + // Guess directory from package name. + int directory_index = 0; // 0 - current directory of the compilation. + if (file_name_slash == std::string::npos && // Just filename. + class_name.front() == 'L' && // Type descriptor for a class. + class_name_slash != std::string::npos) { // Has package name. + std::string package_name = class_name.substr(1, class_name_slash - 1); + auto it = directories_map.find(package_name); + if (it == directories_map.end()) { + directory_index = 1 + directories.size(); + directories_map.emplace(package_name, directory_index); + directories.push_back(package_name); + } else { + directory_index = it->second; + } + full_path = package_name + "/" + file_name; + } + + // Add file entry. + auto it2 = files_map.find(full_path); + if (it2 == files_map.end()) { + file_index = 1 + files.size(); + files_map.emplace(full_path, file_index); + files.push_back(FileEntry { + file_name, + directory_index, + 0, // Modification time - NA. + 0, // File size - NA. + }); + } else { + file_index = it2->second; + } + } + opcodes.SetFile(file_index); + + // Generate mapping opcodes from PC to Java lines. + const DefaultSrcMap& dex2line_map = debug_info_callbacks.dex2line_; + uint32_t low_pc = text_section_offset + mi.low_pc_; + if (file_index != 0 && !dex2line_map.empty()) { + bool first = true; + for (SrcMapElem pc2dex : mi.compiled_method_->GetSrcMappingTable()) { + uint32_t pc = pc2dex.from_; + int dex_pc = pc2dex.to_; + auto dex2line = dex2line_map.Find(static_cast<uint32_t>(dex_pc)); + if (dex2line.first) { + int line = dex2line.second; + if (first) { + first = false; + if (pc > 0) { + // Assume that any preceding code is prologue. + int first_line = dex2line_map.front().to_; + // Prologue is not a sensible place for a breakpoint. + opcodes.NegateStmt(); + opcodes.AddRow(low_pc, first_line); + opcodes.NegateStmt(); + opcodes.SetPrologueEnd(); + } + opcodes.AddRow(low_pc + pc, line); + } else if (line != opcodes.CurrentLine()) { + opcodes.AddRow(low_pc + pc, line); + } + } + } + } else { + // line 0 - instruction cannot be attributed to any source line. + opcodes.AddRow(low_pc, 0); + } + } + opcodes.AdvancePC(text_section_offset + cunit_high_pc); + opcodes.EndSequence(); + WriteDebugLineTable(directories, files, opcodes, debug_line); +} + +} // namespace dwarf +} // namespace art diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h new file mode 100644 index 0000000000..39a99d6d38 --- /dev/null +++ b/compiler/elf_writer_debug.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_ +#define ART_COMPILER_ELF_WRITER_DEBUG_H_ + +#include <vector> + +#include "oat_writer.h" + +namespace art { +namespace dwarf { + +void WriteDebugSections(const CompilerDriver* compiler, + const OatWriter* oat_writer, + uint32_t text_section_offset, + std::vector<uint8_t>* eh_frame_data, + std::vector<uint8_t>* debug_info_data, + std::vector<uint8_t>* debug_abbrev_data, + std::vector<uint8_t>* debug_str_data, + std::vector<uint8_t>* debug_line_data); + +} // namespace dwarf +} // namespace art + +#endif // ART_COMPILER_ELF_WRITER_DEBUG_H_ diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index a822b24cde..e9af25f293 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -21,11 +21,14 @@ #include "base/logging.h" #include "base/unix_file/fd_file.h" #include "buffered_output_stream.h" +#include "compiled_method.h" +#include "dex_file-inl.h" #include "driver/compiler_driver.h" -#include "dwarf.h" +#include "driver/compiler_options.h" #include "elf_builder.h" #include "elf_file.h" #include "elf_utils.h" +#include "elf_writer_debug.h" #include "file_output_stream.h" #include "globals.h" #include "leb128.h" @@ -35,42 +38,6 @@ namespace art { -static void PushByte(std::vector<uint8_t>* buf, int data) { - buf->push_back(data & 0xff); -} - -static uint32_t PushStr(std::vector<uint8_t>* buf, const char* str, const char* def = nullptr) { - if (str == nullptr) { - str = def; - } - - uint32_t offset = buf->size(); - for (size_t i = 0; str[i] != '\0'; ++i) { - buf->push_back(str[i]); - } - buf->push_back('\0'); - return offset; -} - -static uint32_t PushStr(std::vector<uint8_t>* buf, const std::string &str) { - uint32_t offset = buf->size(); - buf->insert(buf->end(), str.begin(), str.end()); - buf->push_back('\0'); - return offset; -} - -static void UpdateWord(std::vector<uint8_t>* buf, int offset, int data) { - (*buf)[offset+0] = data; - (*buf)[offset+1] = data >> 8; - (*buf)[offset+2] = data >> 16; - (*buf)[offset+3] = data >> 24; -} - -static void PushHalf(std::vector<uint8_t>* buf, int data) { - buf->push_back(data & 0xff); - buf->push_back((data >> 8) & 0xff); -} - template <typename Elf_Word, typename Elf_Sword, typename Elf_Addr, typename Elf_Dyn, typename Elf_Sym, typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr> @@ -85,116 +52,6 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn, return elf_writer.Write(oat_writer, dex_files, android_root, is_host); } -std::vector<uint8_t>* ConstructCIEFrameX86(bool is_x86_64) { - std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>; - - // Length (will be filled in later in this routine). - if (is_x86_64) { - Push32(cfi_info, 0xffffffff); // Indicates 64bit - Push32(cfi_info, 0); - Push32(cfi_info, 0); - } else { - Push32(cfi_info, 0); - } - - // CIE id: always 0. - if (is_x86_64) { - Push32(cfi_info, 0); - Push32(cfi_info, 0); - } else { - Push32(cfi_info, 0); - } - - // Version: always 1. - cfi_info->push_back(0x01); - - // Augmentation: 'zR\0' - cfi_info->push_back(0x7a); - cfi_info->push_back(0x52); - cfi_info->push_back(0x0); - - // Code alignment: 1. - EncodeUnsignedLeb128(1, cfi_info); - - // Data alignment. - if (is_x86_64) { - EncodeSignedLeb128(-8, cfi_info); - } else { - EncodeSignedLeb128(-4, cfi_info); - } - - // Return address register. - if (is_x86_64) { - // R16(RIP) - cfi_info->push_back(0x10); - } else { - // R8(EIP) - cfi_info->push_back(0x08); - } - - // Augmentation length: 1. - cfi_info->push_back(1); - - // Augmentation data. - if (is_x86_64) { - // 0x04 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata8). - cfi_info->push_back(0x04); - } else { - // 0x03 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4). - cfi_info->push_back(0x03); - } - - // Initial instructions. - if (is_x86_64) { - // DW_CFA_def_cfa R7(RSP) 8. - cfi_info->push_back(0x0c); - cfi_info->push_back(0x07); - cfi_info->push_back(0x08); - - // DW_CFA_offset R16(RIP) 1 (* -8). - cfi_info->push_back(0x90); - cfi_info->push_back(0x01); - } else { - // DW_CFA_def_cfa R4(ESP) 4. - cfi_info->push_back(0x0c); - cfi_info->push_back(0x04); - cfi_info->push_back(0x04); - - // DW_CFA_offset R8(EIP) 1 (* -4). - cfi_info->push_back(0x88); - cfi_info->push_back(0x01); - } - - // Padding to a multiple of 4 - while ((cfi_info->size() & 3) != 0) { - // DW_CFA_nop is encoded as 0. - cfi_info->push_back(0); - } - - // Set the length of the CIE inside the generated bytes. - if (is_x86_64) { - uint32_t length = cfi_info->size() - 12; - UpdateWord(cfi_info, 4, length); - } else { - uint32_t length = cfi_info->size() - 4; - UpdateWord(cfi_info, 0, length); - } - return cfi_info; -} - -std::vector<uint8_t>* ConstructCIEFrame(InstructionSet isa) { - switch (isa) { - case kX86: - return ConstructCIEFrameX86(false); - case kX86_64: - return ConstructCIEFrameX86(true); - - default: - // Not implemented. - return nullptr; - } -} - class OatWriterWrapper FINAL : public CodeOutput { public: explicit OatWriterWrapper(OatWriter* oat_writer) : oat_writer_(oat_writer) {} @@ -253,7 +110,8 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn, return false; } - if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) { + if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols() && + !oat_writer->GetMethodDebugInfo().empty()) { WriteDebugSymbols(compiler_driver_, builder.get(), oat_writer); } @@ -273,402 +131,6 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn, return builder->Write(); } -class LineTableGenerator FINAL : public Leb128Encoder { - public: - LineTableGenerator(int line_base, int line_range, int opcode_base, - std::vector<uint8_t>* data, uintptr_t current_address, - size_t current_line) - : Leb128Encoder(data), line_base_(line_base), line_range_(line_range), - opcode_base_(opcode_base), current_address_(current_address), - current_line_(current_line), current_file_index_(0) {} - - void PutDelta(unsigned delta_addr, int delta_line) { - current_line_ += delta_line; - current_address_ += delta_addr; - - if (delta_line >= line_base_ && delta_line < line_base_ + line_range_) { - unsigned special_opcode = (delta_line - line_base_) + - (line_range_ * delta_addr) + opcode_base_; - if (special_opcode <= 255) { - PushByte(data_, special_opcode); - return; - } - } - - // generate standart opcode for address advance - if (delta_addr != 0) { - PushByte(data_, DW_LNS_advance_pc); - PushBackUnsigned(delta_addr); - } - - // generate standart opcode for line delta - if (delta_line != 0) { - PushByte(data_, DW_LNS_advance_line); - PushBackSigned(delta_line); - } - - // generate standart opcode for new LTN entry - PushByte(data_, DW_LNS_copy); - } - - void SetAddr(uintptr_t addr) { - if (current_address_ == addr) { - return; - } - - current_address_ = addr; - - PushByte(data_, 0); // extended opcode: - PushByte(data_, 1 + 4); // length: opcode_size + address_size - PushByte(data_, DW_LNE_set_address); - Push32(data_, addr); - } - - void SetLine(unsigned line) { - int delta_line = line - current_line_; - if (delta_line) { - current_line_ = line; - PushByte(data_, DW_LNS_advance_line); - PushBackSigned(delta_line); - } - } - - void SetFile(unsigned file_index) { - if (current_file_index_ != file_index) { - current_file_index_ = file_index; - PushByte(data_, DW_LNS_set_file); - PushBackUnsigned(file_index); - } - } - - void EndSequence() { - // End of Line Table Program - // 0(=ext), 1(len), DW_LNE_end_sequence - PushByte(data_, 0); - PushByte(data_, 1); - PushByte(data_, DW_LNE_end_sequence); - } - - private: - const int line_base_; - const int line_range_; - const int opcode_base_; - uintptr_t current_address_; - size_t current_line_; - unsigned current_file_index_; - - DISALLOW_COPY_AND_ASSIGN(LineTableGenerator); -}; - -// TODO: rewriting it using DexFile::DecodeDebugInfo needs unneeded stuff. -static void GetLineInfoForJava(const uint8_t* dbgstream, const SwapSrcMap& pc2dex, - DefaultSrcMap* result, uint32_t start_pc = 0) { - if (dbgstream == nullptr) { - return; - } - - int adjopcode; - uint32_t dex_offset = 0; - uint32_t java_line = DecodeUnsignedLeb128(&dbgstream); - - // skip parameters - for (uint32_t param_count = DecodeUnsignedLeb128(&dbgstream); param_count != 0; --param_count) { - DecodeUnsignedLeb128(&dbgstream); - } - - for (bool is_end = false; is_end == false; ) { - uint8_t opcode = *dbgstream; - dbgstream++; - switch (opcode) { - case DexFile::DBG_END_SEQUENCE: - is_end = true; - break; - - case DexFile::DBG_ADVANCE_PC: - dex_offset += DecodeUnsignedLeb128(&dbgstream); - break; - - case DexFile::DBG_ADVANCE_LINE: - java_line += DecodeSignedLeb128(&dbgstream); - break; - - case DexFile::DBG_START_LOCAL: - case DexFile::DBG_START_LOCAL_EXTENDED: - DecodeUnsignedLeb128(&dbgstream); - DecodeUnsignedLeb128(&dbgstream); - DecodeUnsignedLeb128(&dbgstream); - - if (opcode == DexFile::DBG_START_LOCAL_EXTENDED) { - DecodeUnsignedLeb128(&dbgstream); - } - break; - - case DexFile::DBG_END_LOCAL: - case DexFile::DBG_RESTART_LOCAL: - DecodeUnsignedLeb128(&dbgstream); - break; - - case DexFile::DBG_SET_PROLOGUE_END: - case DexFile::DBG_SET_EPILOGUE_BEGIN: - case DexFile::DBG_SET_FILE: - break; - - default: - adjopcode = opcode - DexFile::DBG_FIRST_SPECIAL; - dex_offset += adjopcode / DexFile::DBG_LINE_RANGE; - java_line += DexFile::DBG_LINE_BASE + (adjopcode % DexFile::DBG_LINE_RANGE); - - for (SwapSrcMap::const_iterator found = pc2dex.FindByTo(dex_offset); - found != pc2dex.end() && found->to_ == static_cast<int32_t>(dex_offset); - found++) { - result->push_back({found->from_ + start_pc, static_cast<int32_t>(java_line)}); - } - break; - } - } -} - -/* - * @brief Generate the DWARF debug_info and debug_abbrev sections - * @param oat_writer The Oat file Writer. - * @param dbg_info Compilation unit information. - * @param dbg_abbrev Abbreviations used to generate dbg_info. - * @param dbg_str Debug strings. - */ -static void FillInCFIInformation(OatWriter* oat_writer, - std::vector<uint8_t>* dbg_info, - std::vector<uint8_t>* dbg_abbrev, - std::vector<uint8_t>* dbg_str, - std::vector<uint8_t>* dbg_line, - uint32_t text_section_offset) { - const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetCFIMethodInfo(); - - uint32_t producer_str_offset = PushStr(dbg_str, "Android dex2oat"); - - // Create the debug_abbrev section with boilerplate information. - // We only care about low_pc and high_pc right now for the compilation - // unit and methods. - - // Tag 1: Compilation unit: DW_TAG_compile_unit. - PushByte(dbg_abbrev, 1); - PushByte(dbg_abbrev, DW_TAG_compile_unit); - - // There are children (the methods). - PushByte(dbg_abbrev, DW_CHILDREN_yes); - - // DW_AT_producer DW_FORM_data1. - // REVIEW: we can get rid of dbg_str section if - // DW_FORM_string (immediate string) was used everywhere instead of - // DW_FORM_strp (ref to string from .debug_str section). - // DW_FORM_strp makes sense only if we reuse the strings. - PushByte(dbg_abbrev, DW_AT_producer); - PushByte(dbg_abbrev, DW_FORM_strp); - - // DW_LANG_Java DW_FORM_data1. - PushByte(dbg_abbrev, DW_AT_language); - PushByte(dbg_abbrev, DW_FORM_data1); - - // DW_AT_low_pc DW_FORM_addr. - PushByte(dbg_abbrev, DW_AT_low_pc); - PushByte(dbg_abbrev, DW_FORM_addr); - - // DW_AT_high_pc DW_FORM_addr. - PushByte(dbg_abbrev, DW_AT_high_pc); - PushByte(dbg_abbrev, DW_FORM_addr); - - if (dbg_line != nullptr) { - // DW_AT_stmt_list DW_FORM_sec_offset. - PushByte(dbg_abbrev, DW_AT_stmt_list); - PushByte(dbg_abbrev, DW_FORM_sec_offset); - } - - // End of DW_TAG_compile_unit. - PushHalf(dbg_abbrev, 0); - - // Tag 2: Compilation unit: DW_TAG_subprogram. - PushByte(dbg_abbrev, 2); - PushByte(dbg_abbrev, DW_TAG_subprogram); - - // There are no children. - PushByte(dbg_abbrev, DW_CHILDREN_no); - - // Name of the method. - PushByte(dbg_abbrev, DW_AT_name); - PushByte(dbg_abbrev, DW_FORM_strp); - - // DW_AT_low_pc DW_FORM_addr. - PushByte(dbg_abbrev, DW_AT_low_pc); - PushByte(dbg_abbrev, DW_FORM_addr); - - // DW_AT_high_pc DW_FORM_addr. - PushByte(dbg_abbrev, DW_AT_high_pc); - PushByte(dbg_abbrev, DW_FORM_addr); - - // End of DW_TAG_subprogram. - PushHalf(dbg_abbrev, 0); - - // Start the debug_info section with the header information - // 'unit_length' will be filled in later. - int cunit_length = dbg_info->size(); - Push32(dbg_info, 0); - - // 'version' - 3. - PushHalf(dbg_info, 3); - - // Offset into .debug_abbrev section (always 0). - Push32(dbg_info, 0); - - // Address size: 4. - PushByte(dbg_info, 4); - - // Start the description for the compilation unit. - // This uses tag 1. - PushByte(dbg_info, 1); - - // The producer is Android dex2oat. - Push32(dbg_info, producer_str_offset); - - // The language is Java. - PushByte(dbg_info, DW_LANG_Java); - - // low_pc and high_pc. - uint32_t cunit_low_pc = 0 - 1; - uint32_t cunit_high_pc = 0; - int cunit_low_pc_pos = dbg_info->size(); - Push32(dbg_info, 0); - Push32(dbg_info, 0); - - if (dbg_line == nullptr) { - for (size_t i = 0; i < method_info.size(); ++i) { - const OatWriter::DebugInfo &dbg = method_info[i]; - - cunit_low_pc = std::min(cunit_low_pc, dbg.low_pc_); - cunit_high_pc = std::max(cunit_high_pc, dbg.high_pc_); - - // Start a new TAG: subroutine (2). - PushByte(dbg_info, 2); - - // Enter name, low_pc, high_pc. - Push32(dbg_info, PushStr(dbg_str, dbg.method_name_)); - Push32(dbg_info, dbg.low_pc_ + text_section_offset); - Push32(dbg_info, dbg.high_pc_ + text_section_offset); - } - } else { - // TODO: in gdb info functions <regexp> - reports Java functions, but - // source file is <unknown> because .debug_line is formed as one - // compilation unit. To fix this it is possible to generate - // a separate compilation unit for every distinct Java source. - // Each of the these compilation units can have several non-adjacent - // method ranges. - - // Line number table offset - Push32(dbg_info, dbg_line->size()); - - size_t lnt_length = dbg_line->size(); - Push32(dbg_line, 0); - - PushHalf(dbg_line, 4); // LNT Version DWARF v4 => 4 - - size_t lnt_hdr_length = dbg_line->size(); - Push32(dbg_line, 0); // TODO: 64-bit uses 8-byte here - - PushByte(dbg_line, 1); // minimum_instruction_length (ubyte) - PushByte(dbg_line, 1); // maximum_operations_per_instruction (ubyte) = always 1 - PushByte(dbg_line, 1); // default_is_stmt (ubyte) - - const int8_t LINE_BASE = -5; - PushByte(dbg_line, LINE_BASE); // line_base (sbyte) - - const uint8_t LINE_RANGE = 14; - PushByte(dbg_line, LINE_RANGE); // line_range (ubyte) - - const uint8_t OPCODE_BASE = 13; - PushByte(dbg_line, OPCODE_BASE); // opcode_base (ubyte) - - // Standard_opcode_lengths (array of ubyte). - PushByte(dbg_line, 0); PushByte(dbg_line, 1); PushByte(dbg_line, 1); - PushByte(dbg_line, 1); PushByte(dbg_line, 1); PushByte(dbg_line, 0); - PushByte(dbg_line, 0); PushByte(dbg_line, 0); PushByte(dbg_line, 1); - PushByte(dbg_line, 0); PushByte(dbg_line, 0); PushByte(dbg_line, 1); - - PushByte(dbg_line, 0); // include_directories (sequence of path names) = EMPTY - - // File_names (sequence of file entries). - std::unordered_map<const char*, size_t> files; - for (size_t i = 0; i < method_info.size(); ++i) { - const OatWriter::DebugInfo &dbg = method_info[i]; - // TODO: add package directory to the file name - const char* file_name = dbg.src_file_name_ == nullptr ? "null" : dbg.src_file_name_; - auto found = files.find(file_name); - if (found == files.end()) { - size_t file_index = 1 + files.size(); - files[file_name] = file_index; - PushStr(dbg_line, file_name); - PushByte(dbg_line, 0); // include directory index = LEB128(0) - no directory - PushByte(dbg_line, 0); // modification time = LEB128(0) - NA - PushByte(dbg_line, 0); // file length = LEB128(0) - NA - } - } - PushByte(dbg_line, 0); // End of file_names. - - // Set lnt header length. - UpdateWord(dbg_line, lnt_hdr_length, dbg_line->size() - lnt_hdr_length - 4); - - // Generate Line Number Program code, one long program for all methods. - LineTableGenerator line_table_generator(LINE_BASE, LINE_RANGE, OPCODE_BASE, - dbg_line, 0, 1); - - DefaultSrcMap pc2java_map; - for (size_t i = 0; i < method_info.size(); ++i) { - const OatWriter::DebugInfo &dbg = method_info[i]; - const char* file_name = (dbg.src_file_name_ == nullptr) ? "null" : dbg.src_file_name_; - size_t file_index = files[file_name]; - DCHECK_NE(file_index, 0U) << file_name; - - cunit_low_pc = std::min(cunit_low_pc, dbg.low_pc_); - cunit_high_pc = std::max(cunit_high_pc, dbg.high_pc_); - - // Start a new TAG: subroutine (2). - PushByte(dbg_info, 2); - - // Enter name, low_pc, high_pc. - Push32(dbg_info, PushStr(dbg_str, dbg.method_name_)); - Push32(dbg_info, dbg.low_pc_ + text_section_offset); - Push32(dbg_info, dbg.high_pc_ + text_section_offset); - - GetLineInfoForJava(dbg.dbgstream_, dbg.compiled_method_->GetSrcMappingTable(), - &pc2java_map, dbg.low_pc_); - pc2java_map.DeltaFormat({dbg.low_pc_, 1}, dbg.high_pc_); - if (!pc2java_map.empty()) { - line_table_generator.SetFile(file_index); - line_table_generator.SetAddr(dbg.low_pc_ + text_section_offset); - line_table_generator.SetLine(1); - for (auto& src_map_elem : pc2java_map) { - line_table_generator.PutDelta(src_map_elem.from_, src_map_elem.to_); - } - pc2java_map.clear(); - } - } - - // End Sequence should have the highest address set. - line_table_generator.SetAddr(cunit_high_pc + text_section_offset); - line_table_generator.EndSequence(); - - // set lnt length - UpdateWord(dbg_line, lnt_length, dbg_line->size() - lnt_length - 4); - } - - // One byte terminator - PushByte(dbg_info, 0); - - // Fill in cunit's low_pc and high_pc. - UpdateWord(dbg_info, cunit_low_pc_pos, cunit_low_pc + text_section_offset); - UpdateWord(dbg_info, cunit_low_pc_pos + 4, cunit_high_pc + text_section_offset); - - // We have now walked all the methods. Fill in lengths. - UpdateWord(dbg_info, cunit_length, dbg_info->size() - cunit_length - 4); -} - template <typename Elf_Word, typename Elf_Sword, typename Elf_Addr, typename Elf_Dyn, typename Elf_Sym, typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr> @@ -678,18 +140,23 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver, ElfBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn, Elf_Sym, Elf_Ehdr, Elf_Phdr, Elf_Shdr>* builder, OatWriter* oat_writer) { - std::unique_ptr<std::vector<uint8_t>> cfi_info( - ConstructCIEFrame(compiler_driver->GetInstructionSet())); - - Elf_Addr text_section_address = builder->GetTextBuilder().GetSection()->sh_addr; - // Iterate over the compiled methods. - const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetCFIMethodInfo(); + const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo(); ElfSymtabBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Shdr>* symtab = builder->GetSymtabBuilder(); for (auto it = method_info.begin(); it != method_info.end(); ++it) { - symtab->AddSymbol(it->method_name_, &builder->GetTextBuilder(), it->low_pc_, true, - it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC); + std::string name = PrettyMethod(it->dex_method_index_, *it->dex_file_, true); + if (it->deduped_) { + // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol + // so that it will show up in a debuggerd crash report. + name += " [ DEDUPED ]"; + } + + uint32_t low_pc = it->low_pc_; + // Add in code delta, e.g., thumb bit 0 for Thumb2 code. + low_pc += it->compiled_method_->CodeDelta(); + symtab->AddSymbol(name, &builder->GetTextBuilder(), low_pc, + true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC); // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2 // instructions, so that disassembler tools can correctly disassemble. @@ -697,109 +164,29 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver, symtab->AddSymbol("$t", &builder->GetTextBuilder(), it->low_pc_ & ~1, true, 0, STB_LOCAL, STT_NOTYPE); } - - // Include CFI for compiled method, if possible. - if (cfi_info.get() != nullptr) { - DCHECK(it->compiled_method_ != nullptr); - - // Copy in the FDE, if present - const SwapVector<uint8_t>* fde = it->compiled_method_->GetCFIInfo(); - if (fde != nullptr) { - // Copy the information into cfi_info and then fix the address in the new copy. - int cur_offset = cfi_info->size(); - cfi_info->insert(cfi_info->end(), fde->begin(), fde->end()); - - bool is_64bit = *(reinterpret_cast<const uint32_t*>(fde->data())) == 0xffffffff; - - // Set the 'CIE_pointer' field. - uint64_t CIE_pointer = cur_offset + (is_64bit ? 12 : 4); - uint64_t offset_to_update = CIE_pointer; - if (is_64bit) { - (*cfi_info)[offset_to_update+0] = CIE_pointer; - (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8; - (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16; - (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24; - (*cfi_info)[offset_to_update+4] = CIE_pointer >> 32; - (*cfi_info)[offset_to_update+5] = CIE_pointer >> 40; - (*cfi_info)[offset_to_update+6] = CIE_pointer >> 48; - (*cfi_info)[offset_to_update+7] = CIE_pointer >> 56; - } else { - (*cfi_info)[offset_to_update+0] = CIE_pointer; - (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8; - (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16; - (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24; - } - - // Set the 'initial_location' field. - offset_to_update += is_64bit ? 8 : 4; - if (is_64bit) { - const uint64_t quick_code_start = it->low_pc_ + text_section_address; - (*cfi_info)[offset_to_update+0] = quick_code_start; - (*cfi_info)[offset_to_update+1] = quick_code_start >> 8; - (*cfi_info)[offset_to_update+2] = quick_code_start >> 16; - (*cfi_info)[offset_to_update+3] = quick_code_start >> 24; - (*cfi_info)[offset_to_update+4] = quick_code_start >> 32; - (*cfi_info)[offset_to_update+5] = quick_code_start >> 40; - (*cfi_info)[offset_to_update+6] = quick_code_start >> 48; - (*cfi_info)[offset_to_update+7] = quick_code_start >> 56; - } else { - const uint32_t quick_code_start = it->low_pc_ + text_section_address; - (*cfi_info)[offset_to_update+0] = quick_code_start; - (*cfi_info)[offset_to_update+1] = quick_code_start >> 8; - (*cfi_info)[offset_to_update+2] = quick_code_start >> 16; - (*cfi_info)[offset_to_update+3] = quick_code_start >> 24; - } - } - } - } - - bool hasCFI = (cfi_info.get() != nullptr); - bool hasLineInfo = false; - for (auto& dbg_info : oat_writer->GetCFIMethodInfo()) { - if (dbg_info.dbgstream_ != nullptr && - !dbg_info.compiled_method_->GetSrcMappingTable().empty()) { - hasLineInfo = true; - break; - } } - if (hasLineInfo || hasCFI) { - ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_info(".debug_info", - SHT_PROGBITS, - 0, nullptr, 0, 1, 0); - ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_abbrev(".debug_abbrev", - SHT_PROGBITS, - 0, nullptr, 0, 1, 0); - ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_str(".debug_str", - SHT_PROGBITS, - 0, nullptr, 0, 1, 0); - ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_line(".debug_line", - SHT_PROGBITS, - 0, nullptr, 0, 1, 0); - - FillInCFIInformation(oat_writer, debug_info.GetBuffer(), - debug_abbrev.GetBuffer(), debug_str.GetBuffer(), - hasLineInfo ? debug_line.GetBuffer() : nullptr, - text_section_address); - - builder->RegisterRawSection(debug_info); - builder->RegisterRawSection(debug_abbrev); - - if (hasCFI) { - ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> eh_frame(".eh_frame", - SHT_PROGBITS, - SHF_ALLOC, - nullptr, 0, 4, 0); - eh_frame.SetBuffer(std::move(*cfi_info.get())); - builder->RegisterRawSection(eh_frame); - } - - if (hasLineInfo) { - builder->RegisterRawSection(debug_line); - } - - builder->RegisterRawSection(debug_str); - } + typedef ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> Section; + Section eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0); + Section debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + Section debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + Section debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + Section debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + + dwarf::WriteDebugSections(compiler_driver, + oat_writer, + builder->GetTextBuilder().GetSection()->sh_addr, + eh_frame.GetBuffer(), + debug_info.GetBuffer(), + debug_abbrev.GetBuffer(), + debug_str.GetBuffer(), + debug_line.GetBuffer()); + + builder->RegisterRawSection(eh_frame); + builder->RegisterRawSection(debug_info); + builder->RegisterRawSection(debug_abbrev); + builder->RegisterRawSection(debug_str); + builder->RegisterRawSection(debug_line); } // Explicit instantiations diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index c1555aa523..1ede228c4f 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -19,6 +19,7 @@ #include <sys/stat.h> #include <memory> +#include <numeric> #include <vector> #include "base/logging.h" @@ -54,8 +55,7 @@ #include "runtime.h" #include "scoped_thread_state_change.h" #include "handle_scope-inl.h" - -#include <numeric> +#include "utils/dex_cache_arrays_layout-inl.h" using ::art::mirror::ArtField; using ::art::mirror::ArtMethod; @@ -238,7 +238,7 @@ void ImageWriter::AssignImageOffset(mirror::Object* object, ImageWriter::BinSlot DCHECK(object != nullptr); DCHECK_NE(image_objects_offset_begin_, 0u); - size_t previous_bin_sizes = GetBinSizeSum(bin_slot.GetBin()); // sum sizes in [0..bin#) + size_t previous_bin_sizes = bin_slot_previous_sizes_[bin_slot.GetBin()]; size_t new_offset = image_objects_offset_begin_ + previous_bin_sizes + bin_slot.GetIndex(); DCHECK_ALIGNED(new_offset, kObjectAlignment); @@ -293,6 +293,28 @@ void ImageWriter::SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) { DCHECK(IsImageBinSlotAssigned(object)); } +void ImageWriter::PrepareDexCacheArraySlots() { + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + ReaderMutexLock mu(Thread::Current(), *class_linker->DexLock()); + size_t dex_cache_count = class_linker->GetDexCacheCount(); + uint32_t size = 0u; + for (size_t idx = 0; idx < dex_cache_count; ++idx) { + DexCache* dex_cache = class_linker->GetDexCache(idx); + const DexFile* dex_file = dex_cache->GetDexFile(); + dex_cache_array_starts_.Put(dex_file, size); + DexCacheArraysLayout layout(dex_file); + DCHECK(layout.Valid()); + dex_cache_array_indexes_.Put(dex_cache->GetResolvedTypes(), size + layout.TypesOffset()); + dex_cache_array_indexes_.Put(dex_cache->GetResolvedMethods(), size + layout.MethodsOffset()); + dex_cache_array_indexes_.Put(dex_cache->GetResolvedFields(), size + layout.FieldsOffset()); + dex_cache_array_indexes_.Put(dex_cache->GetStrings(), size + layout.StringsOffset()); + size += layout.Size(); + } + // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned() + // when AssignImageBinSlot() assigns their indexes out or order. + bin_slot_sizes_[kBinDexCacheArray] = size; +} + void ImageWriter::AssignImageBinSlot(mirror::Object* object) { DCHECK(object != nullptr); size_t object_size = object->SizeOf(); @@ -307,6 +329,7 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) { // This means more pages will stay either clean or shared dirty (with zygote) and // the app will use less of its own (private) memory. Bin bin = kBinRegular; + size_t current_offset = 0u; if (kBinObjects) { // @@ -316,6 +339,12 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) { // Memory analysis has determined that the following types of objects get dirtied // the most: // + // * Dex cache arrays are stored in a special bin. The arrays for each dex cache have + // a fixed layout which helps improve generated code (using PC-relative addressing), + // so we pre-calculate their offsets separately in PrepareDexCacheArraySlots(). + // Since these arrays are huge, most pages do not overlap other objects and it's not + // really important where they are for the clean/dirty separation. Due to their + // special PC-relative addressing, we arbitrarily keep them at the beginning. // * Class'es which are verified [their clinit runs only at runtime] // - classes in general [because their static fields get overwritten] // - initialized classes with all-final statics are unlikely to be ever dirty, @@ -376,13 +405,21 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) { } } else if (object->GetClass<kVerifyNone>()->IsStringClass()) { bin = kBinString; // Strings are almost always immutable (except for object header). + } else if (object->IsObjectArray()) { + auto it = dex_cache_array_indexes_.find(object); + if (it != dex_cache_array_indexes_.end()) { + bin = kBinDexCacheArray; + current_offset = it->second; // Use prepared offset defined by the DexCacheLayout. + } // else bin = kBinRegular } // else bin = kBinRegular } - size_t current_offset = bin_slot_sizes_[bin]; // How many bytes the current bin is at (aligned). - // Move the current bin size up to accomodate the object we just assigned a bin slot. size_t offset_delta = RoundUp(object_size, kObjectAlignment); // 64-bit alignment - bin_slot_sizes_[bin] += offset_delta; + if (bin != kBinDexCacheArray) { + current_offset = bin_slot_sizes_[bin]; // How many bytes the current bin is at (aligned). + // Move the current bin size up to accomodate the object we just assigned a bin slot. + bin_slot_sizes_[bin] += offset_delta; + } BinSlot new_bin_slot(bin, current_offset); SetImageBinSlot(object, new_bin_slot); @@ -887,8 +924,17 @@ void ImageWriter::CalculateNewObjectOffsets() { // TODO: Image spaces only? DCHECK_LT(image_end_, image_->Size()); image_objects_offset_begin_ = image_end_; + // Prepare bin slots for dex cache arrays. + PrepareDexCacheArraySlots(); // Clear any pre-existing monitors which may have been in the monitor words, assign bin slots. heap->VisitObjects(WalkFieldsCallback, this); + // Calculate cumulative bin slot sizes. + size_t previous_sizes = 0u; + for (size_t i = 0; i != kBinSize; ++i) { + bin_slot_previous_sizes_[i] = previous_sizes; + previous_sizes += bin_slot_sizes_[i]; + } + DCHECK_EQ(previous_sizes, GetBinSizeSum()); // Transform each object's bin slot into an offset which will be used to do the final copy. heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this); DCHECK(saved_hashes_map_.empty()); // All binslot hashes should've been put into vector by now. @@ -1187,8 +1233,8 @@ size_t ImageWriter::GetBinSizeSum(ImageWriter::Bin up_to) const { ImageWriter::BinSlot::BinSlot(uint32_t lockword) : lockword_(lockword) { // These values may need to get updated if more bins are added to the enum Bin - static_assert(kBinBits == 3, "wrong number of bin bits"); - static_assert(kBinShift == 29, "wrong number of shift"); + static_assert(kBinBits == 4, "wrong number of bin bits"); + static_assert(kBinShift == 28, "wrong number of shift"); static_assert(sizeof(BinSlot) == sizeof(LockWord), "BinSlot/LockWord must have equal sizes"); DCHECK_LT(GetBin(), kBinSize); diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 53f5ce4545..71044f7b6e 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -52,7 +52,8 @@ class ImageWriter FINAL { quick_imt_conflict_trampoline_offset_(0), quick_resolution_trampoline_offset_(0), quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic), target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())), - bin_slot_sizes_(), bin_slot_count_() { + bin_slot_sizes_(), bin_slot_previous_sizes_(), bin_slot_count_(), + string_data_array_(nullptr) { CHECK_NE(image_begin, 0U); } @@ -80,6 +81,14 @@ class ImageWriter FINAL { return reinterpret_cast<mirror::Object*>(image_begin_ + GetImageOffset(object)); } + mirror::HeapReference<mirror::Object>* GetDexCacheArrayElementImageAddress( + const DexFile* dex_file, uint32_t offset) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + auto it = dex_cache_array_starts_.find(dex_file); + DCHECK(it != dex_cache_array_starts_.end()); + return reinterpret_cast<mirror::HeapReference<mirror::Object>*>( + image_begin_ + RoundUp(sizeof(ImageHeader), kObjectAlignment) + it->second + offset); + } + uint8_t* GetOatFileBegin() const { return image_begin_ + RoundUp(image_end_, kPageSize); } @@ -101,6 +110,10 @@ class ImageWriter FINAL { // Classify different kinds of bins that objects end up getting packed into during image writing. enum Bin { + // Dex cache arrays have a special slot for PC-relative addressing. Since they are + // huge, and as such their dirtiness is not important for the clean/dirty separation, + // we arbitrarily keep them at the beginning. + kBinDexCacheArray, // Object arrays belonging to dex cache. // Likely-clean: kBinString, // [String] Almost always immutable (except for obj header). kBinArtMethodsManagedInitialized, // [ArtMethod] Not-native, and initialized. Unlikely to dirty @@ -113,7 +126,6 @@ class ImageWriter FINAL { kBinClassVerified, // Class verified, but initializers haven't been run kBinArtMethodNative, // Art method that is actually native kBinArtMethodNotInitialized, // Art method with a declaring class that wasn't initialized - // Don't care about other art methods since they don't dirty // Add more bins here if we add more segregation code. kBinSize, }; @@ -157,6 +169,7 @@ class ImageWriter FINAL { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); size_t GetImageOffset(mirror::Object* object) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void PrepareDexCacheArraySlots() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void AssignImageBinSlot(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -282,6 +295,12 @@ class ImageWriter FINAL { // Memory mapped for generating the image. std::unique_ptr<MemMap> image_; + // Indexes for dex cache arrays (objects are inside of the image so that they don't move). + SafeMap<mirror::Object*, size_t> dex_cache_array_indexes_; + + // The start offsets of the dex cache arrays. + SafeMap<const DexFile*, size_t> dex_cache_array_starts_; + // Saved hashes (objects are inside of the image so that they don't move). std::vector<std::pair<mirror::Object*, uint32_t>> saved_hashes_; @@ -309,6 +328,7 @@ class ImageWriter FINAL { // Bin slot tracking for dirty object packing size_t bin_slot_sizes_[kBinSize]; // Number of bytes in a bin + size_t bin_slot_previous_sizes_[kBinSize]; // Number of bytes in previous bins. size_t bin_slot_count_[kBinSize]; // Number of objects in a bin void* string_data_array_; // The backing for the interned strings. diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc new file mode 100644 index 0000000000..3a0d520e47 --- /dev/null +++ b/compiler/jni/jni_cfi_test.cc @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <memory> +#include <vector> + +#include "arch/instruction_set.h" +#include "cfi_test.h" +#include "gtest/gtest.h" +#include "jni/quick/calling_convention.h" +#include "utils/assembler.h" + +#include "jni/jni_cfi_test_expected.inc" + +namespace art { + +// Run the tests only on host. +#ifndef HAVE_ANDROID_OS + +class JNICFITest : public CFITest { + public: + // Enable this flag to generate the expected outputs. + static constexpr bool kGenerateExpected = false; + + void TestImpl(InstructionSet isa, const char* isa_str, + const std::vector<uint8_t>& expected_asm, + const std::vector<uint8_t>& expected_cfi) { + // Description of simple method. + const bool is_static = true; + const bool is_synchronized = false; + const char* shorty = "IIFII"; + std::unique_ptr<JniCallingConvention> jni_conv( + JniCallingConvention::Create(is_static, is_synchronized, shorty, isa)); + std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv( + ManagedRuntimeCallingConvention::Create(is_static, is_synchronized, shorty, isa)); + const int frame_size(jni_conv->FrameSize()); + const std::vector<ManagedRegister>& callee_save_regs = jni_conv->CalleeSaveRegisters(); + + // Assemble the method. + std::unique_ptr<Assembler> jni_asm(Assembler::Create(isa)); + jni_asm->BuildFrame(frame_size, mr_conv->MethodRegister(), + callee_save_regs, mr_conv->EntrySpills()); + jni_asm->IncreaseFrameSize(32); + jni_asm->DecreaseFrameSize(32); + jni_asm->RemoveFrame(frame_size, callee_save_regs); + jni_asm->EmitSlowPaths(); + std::vector<uint8_t> actual_asm(jni_asm->CodeSize()); + MemoryRegion code(&actual_asm[0], actual_asm.size()); + jni_asm->FinalizeInstructions(code); + ASSERT_EQ(jni_asm->cfi().GetCurrentCFAOffset(), frame_size); + const std::vector<uint8_t>& actual_cfi = *(jni_asm->cfi().data()); + + if (kGenerateExpected) { + GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi); + } else { + EXPECT_EQ(expected_asm, actual_asm); + EXPECT_EQ(expected_cfi, actual_cfi); + } + } +}; + +#define TEST_ISA(isa) \ + TEST_F(JNICFITest, isa) { \ + std::vector<uint8_t> expected_asm(expected_asm_##isa, \ + expected_asm_##isa + arraysize(expected_asm_##isa)); \ + std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \ + expected_cfi_##isa + arraysize(expected_cfi_##isa)); \ + TestImpl(isa, #isa, expected_asm, expected_cfi); \ + } + +TEST_ISA(kThumb2) +TEST_ISA(kArm64) +TEST_ISA(kX86) +TEST_ISA(kX86_64) +TEST_ISA(kMips) +TEST_ISA(kMips64) + +#endif // HAVE_ANDROID_OS + +} // namespace art diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc new file mode 100644 index 0000000000..47e6f106ca --- /dev/null +++ b/compiler/jni/jni_cfi_test_expected.inc @@ -0,0 +1,505 @@ +static constexpr uint8_t expected_asm_kThumb2[] = { + 0x2D, 0xE9, 0xE0, 0x4D, 0x2D, 0xED, 0x10, 0x8A, 0x89, 0xB0, 0x00, 0x90, + 0xCD, 0xF8, 0x84, 0x10, 0x8D, 0xED, 0x22, 0x0A, 0xCD, 0xF8, 0x8C, 0x20, + 0xCD, 0xF8, 0x90, 0x30, 0x88, 0xB0, 0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC, + 0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x8D, +}; +static constexpr uint8_t expected_cfi_kThumb2[] = { + 0x44, 0x0E, 0x1C, 0x85, 0x07, 0x86, 0x06, 0x87, 0x05, 0x88, 0x04, 0x8A, + 0x03, 0x8B, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x5C, 0x05, 0x50, 0x17, 0x05, + 0x51, 0x16, 0x05, 0x52, 0x15, 0x05, 0x53, 0x14, 0x05, 0x54, 0x13, 0x05, + 0x55, 0x12, 0x05, 0x56, 0x11, 0x05, 0x57, 0x10, 0x05, 0x58, 0x0F, 0x05, + 0x59, 0x0E, 0x05, 0x5A, 0x0D, 0x05, 0x5B, 0x0C, 0x05, 0x5C, 0x0B, 0x05, + 0x5D, 0x0A, 0x05, 0x5E, 0x09, 0x05, 0x5F, 0x08, 0x42, 0x0E, 0x80, 0x01, + 0x54, 0x0E, 0xA0, 0x01, 0x42, 0x0E, 0x80, 0x01, 0x0A, 0x42, 0x0E, 0x5C, + 0x44, 0x0E, 0x1C, 0x06, 0x50, 0x06, 0x51, 0x06, 0x52, 0x06, 0x53, 0x06, + 0x54, 0x06, 0x55, 0x06, 0x56, 0x06, 0x57, 0x06, 0x58, 0x06, 0x59, 0x06, + 0x5A, 0x06, 0x5B, 0x06, 0x5C, 0x06, 0x5D, 0x06, 0x5E, 0x06, 0x5F, 0x44, + 0x0B, 0x0E, 0x80, 0x01, +}; +// 0x00000000: push {r5, r6, r7, r8, r10, r11, lr} +// 0x00000004: .cfi_def_cfa_offset: 28 +// 0x00000004: .cfi_offset: r5 at cfa-28 +// 0x00000004: .cfi_offset: r6 at cfa-24 +// 0x00000004: .cfi_offset: r7 at cfa-20 +// 0x00000004: .cfi_offset: r8 at cfa-16 +// 0x00000004: .cfi_offset: r10 at cfa-12 +// 0x00000004: .cfi_offset: r11 at cfa-8 +// 0x00000004: .cfi_offset: r14 at cfa-4 +// 0x00000004: vpush.f32 {s16-s31} +// 0x00000008: .cfi_def_cfa_offset: 92 +// 0x00000008: .cfi_offset_extended: r80 at cfa-92 +// 0x00000008: .cfi_offset_extended: r81 at cfa-88 +// 0x00000008: .cfi_offset_extended: r82 at cfa-84 +// 0x00000008: .cfi_offset_extended: r83 at cfa-80 +// 0x00000008: .cfi_offset_extended: r84 at cfa-76 +// 0x00000008: .cfi_offset_extended: r85 at cfa-72 +// 0x00000008: .cfi_offset_extended: r86 at cfa-68 +// 0x00000008: .cfi_offset_extended: r87 at cfa-64 +// 0x00000008: .cfi_offset_extended: r88 at cfa-60 +// 0x00000008: .cfi_offset_extended: r89 at cfa-56 +// 0x00000008: .cfi_offset_extended: r90 at cfa-52 +// 0x00000008: .cfi_offset_extended: r91 at cfa-48 +// 0x00000008: .cfi_offset_extended: r92 at cfa-44 +// 0x00000008: .cfi_offset_extended: r93 at cfa-40 +// 0x00000008: .cfi_offset_extended: r94 at cfa-36 +// 0x00000008: .cfi_offset_extended: r95 at cfa-32 +// 0x00000008: sub sp, sp, #36 +// 0x0000000a: .cfi_def_cfa_offset: 128 +// 0x0000000a: str r0, [sp, #0] +// 0x0000000c: str.w r1, [sp, #132] +// 0x00000010: vstr.f32 s0, [sp, #136] +// 0x00000014: str.w r2, [sp, #140] +// 0x00000018: str.w r3, [sp, #144] +// 0x0000001c: sub sp, sp, #32 +// 0x0000001e: .cfi_def_cfa_offset: 160 +// 0x0000001e: add sp, sp, #32 +// 0x00000020: .cfi_def_cfa_offset: 128 +// 0x00000020: .cfi_remember_state +// 0x00000020: add sp, sp, #36 +// 0x00000022: .cfi_def_cfa_offset: 92 +// 0x00000022: vpop.f32 {s16-s31} +// 0x00000026: .cfi_def_cfa_offset: 28 +// 0x00000026: .cfi_restore_extended: r80 +// 0x00000026: .cfi_restore_extended: r81 +// 0x00000026: .cfi_restore_extended: r82 +// 0x00000026: .cfi_restore_extended: r83 +// 0x00000026: .cfi_restore_extended: r84 +// 0x00000026: .cfi_restore_extended: r85 +// 0x00000026: .cfi_restore_extended: r86 +// 0x00000026: .cfi_restore_extended: r87 +// 0x00000026: .cfi_restore_extended: r88 +// 0x00000026: .cfi_restore_extended: r89 +// 0x00000026: .cfi_restore_extended: r90 +// 0x00000026: .cfi_restore_extended: r91 +// 0x00000026: .cfi_restore_extended: r92 +// 0x00000026: .cfi_restore_extended: r93 +// 0x00000026: .cfi_restore_extended: r94 +// 0x00000026: .cfi_restore_extended: r95 +// 0x00000026: pop {r5, r6, r7, r8, r10, r11, pc} +// 0x0000002a: .cfi_restore_state +// 0x0000002a: .cfi_def_cfa_offset: 128 + +static constexpr uint8_t expected_asm_kArm64[] = { + 0xFF, 0x03, 0x03, 0xD1, 0xFE, 0x5F, 0x00, 0xF9, 0xFD, 0x5B, 0x00, 0xF9, + 0xFC, 0x57, 0x00, 0xF9, 0xFB, 0x53, 0x00, 0xF9, 0xFA, 0x4F, 0x00, 0xF9, + 0xF9, 0x4B, 0x00, 0xF9, 0xF8, 0x47, 0x00, 0xF9, 0xF7, 0x43, 0x00, 0xF9, + 0xF6, 0x3F, 0x00, 0xF9, 0xF5, 0x3B, 0x00, 0xF9, 0xF4, 0x37, 0x00, 0xF9, + 0xEF, 0x33, 0x00, 0xFD, 0xEE, 0x2F, 0x00, 0xFD, 0xED, 0x2B, 0x00, 0xFD, + 0xEC, 0x27, 0x00, 0xFD, 0xEB, 0x23, 0x00, 0xFD, 0xEA, 0x1F, 0x00, 0xFD, + 0xE9, 0x1B, 0x00, 0xFD, 0xE8, 0x17, 0x00, 0xFD, 0xF5, 0x03, 0x12, 0xAA, + 0xE0, 0x03, 0x00, 0xB9, 0xE1, 0xC7, 0x00, 0xB9, 0xE0, 0xCB, 0x00, 0xBD, + 0xE2, 0xCF, 0x00, 0xB9, 0xE3, 0xD3, 0x00, 0xB9, 0xFF, 0x83, 0x00, 0xD1, + 0xFF, 0x83, 0x00, 0x91, 0xF2, 0x03, 0x15, 0xAA, 0xFE, 0x5F, 0x40, 0xF9, + 0xFD, 0x5B, 0x40, 0xF9, 0xFC, 0x57, 0x40, 0xF9, 0xFB, 0x53, 0x40, 0xF9, + 0xFA, 0x4F, 0x40, 0xF9, 0xF9, 0x4B, 0x40, 0xF9, 0xF8, 0x47, 0x40, 0xF9, + 0xF7, 0x43, 0x40, 0xF9, 0xF6, 0x3F, 0x40, 0xF9, 0xF5, 0x3B, 0x40, 0xF9, + 0xF4, 0x37, 0x40, 0xF9, 0xEF, 0x33, 0x40, 0xFD, 0xEE, 0x2F, 0x40, 0xFD, + 0xED, 0x2B, 0x40, 0xFD, 0xEC, 0x27, 0x40, 0xFD, 0xEB, 0x23, 0x40, 0xFD, + 0xEA, 0x1F, 0x40, 0xFD, 0xE9, 0x1B, 0x40, 0xFD, 0xE8, 0x17, 0x40, 0xFD, + 0xFF, 0x03, 0x03, 0x91, 0xC0, 0x03, 0x5F, 0xD6, +}; +static constexpr uint8_t expected_cfi_kArm64[] = { + 0x44, 0x0E, 0xC0, 0x01, 0x44, 0x9E, 0x02, 0x44, 0x9D, 0x04, 0x44, 0x9C, + 0x06, 0x44, 0x9B, 0x08, 0x44, 0x9A, 0x0A, 0x44, 0x99, 0x0C, 0x44, 0x98, + 0x0E, 0x44, 0x97, 0x10, 0x44, 0x96, 0x12, 0x44, 0x95, 0x14, 0x44, 0x94, + 0x16, 0x44, 0x05, 0x4F, 0x18, 0x44, 0x05, 0x4E, 0x1A, 0x44, 0x05, 0x4D, + 0x1C, 0x44, 0x05, 0x4C, 0x1E, 0x44, 0x05, 0x4B, 0x20, 0x44, 0x05, 0x4A, + 0x22, 0x44, 0x05, 0x49, 0x24, 0x44, 0x05, 0x48, 0x26, 0x5C, 0x0E, 0xE0, + 0x01, 0x44, 0x0E, 0xC0, 0x01, 0x0A, 0x48, 0xDE, 0x44, 0xDD, 0x44, 0xDC, + 0x44, 0xDB, 0x44, 0xDA, 0x44, 0xD9, 0x44, 0xD8, 0x44, 0xD7, 0x44, 0xD6, + 0x44, 0xD5, 0x44, 0xD4, 0x44, 0x06, 0x4F, 0x44, 0x06, 0x4E, 0x44, 0x06, + 0x4D, 0x44, 0x06, 0x4C, 0x44, 0x06, 0x4B, 0x44, 0x06, 0x4A, 0x44, 0x06, + 0x49, 0x44, 0x06, 0x48, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01, +}; +// 0x00000000: sub sp, sp, #0xc0 (192) +// 0x00000004: .cfi_def_cfa_offset: 192 +// 0x00000004: str lr, [sp, #184] +// 0x00000008: .cfi_offset: r30 at cfa-8 +// 0x00000008: str x29, [sp, #176] +// 0x0000000c: .cfi_offset: r29 at cfa-16 +// 0x0000000c: str x28, [sp, #168] +// 0x00000010: .cfi_offset: r28 at cfa-24 +// 0x00000010: str x27, [sp, #160] +// 0x00000014: .cfi_offset: r27 at cfa-32 +// 0x00000014: str x26, [sp, #152] +// 0x00000018: .cfi_offset: r26 at cfa-40 +// 0x00000018: str x25, [sp, #144] +// 0x0000001c: .cfi_offset: r25 at cfa-48 +// 0x0000001c: str x24, [sp, #136] +// 0x00000020: .cfi_offset: r24 at cfa-56 +// 0x00000020: str x23, [sp, #128] +// 0x00000024: .cfi_offset: r23 at cfa-64 +// 0x00000024: str x22, [sp, #120] +// 0x00000028: .cfi_offset: r22 at cfa-72 +// 0x00000028: str x21, [sp, #112] +// 0x0000002c: .cfi_offset: r21 at cfa-80 +// 0x0000002c: str x20, [sp, #104] +// 0x00000030: .cfi_offset: r20 at cfa-88 +// 0x00000030: str d15, [sp, #96] +// 0x00000034: .cfi_offset_extended: r79 at cfa-96 +// 0x00000034: str d14, [sp, #88] +// 0x00000038: .cfi_offset_extended: r78 at cfa-104 +// 0x00000038: str d13, [sp, #80] +// 0x0000003c: .cfi_offset_extended: r77 at cfa-112 +// 0x0000003c: str d12, [sp, #72] +// 0x00000040: .cfi_offset_extended: r76 at cfa-120 +// 0x00000040: str d11, [sp, #64] +// 0x00000044: .cfi_offset_extended: r75 at cfa-128 +// 0x00000044: str d10, [sp, #56] +// 0x00000048: .cfi_offset_extended: r74 at cfa-136 +// 0x00000048: str d9, [sp, #48] +// 0x0000004c: .cfi_offset_extended: r73 at cfa-144 +// 0x0000004c: str d8, [sp, #40] +// 0x00000050: .cfi_offset_extended: r72 at cfa-152 +// 0x00000050: mov x21, tr +// 0x00000054: str w0, [sp] +// 0x00000058: str w1, [sp, #196] +// 0x0000005c: str s0, [sp, #200] +// 0x00000060: str w2, [sp, #204] +// 0x00000064: str w3, [sp, #208] +// 0x00000068: sub sp, sp, #0x20 (32) +// 0x0000006c: .cfi_def_cfa_offset: 224 +// 0x0000006c: add sp, sp, #0x20 (32) +// 0x00000070: .cfi_def_cfa_offset: 192 +// 0x00000070: .cfi_remember_state +// 0x00000070: mov tr, x21 +// 0x00000074: ldr lr, [sp, #184] +// 0x00000078: .cfi_restore: r30 +// 0x00000078: ldr x29, [sp, #176] +// 0x0000007c: .cfi_restore: r29 +// 0x0000007c: ldr x28, [sp, #168] +// 0x00000080: .cfi_restore: r28 +// 0x00000080: ldr x27, [sp, #160] +// 0x00000084: .cfi_restore: r27 +// 0x00000084: ldr x26, [sp, #152] +// 0x00000088: .cfi_restore: r26 +// 0x00000088: ldr x25, [sp, #144] +// 0x0000008c: .cfi_restore: r25 +// 0x0000008c: ldr x24, [sp, #136] +// 0x00000090: .cfi_restore: r24 +// 0x00000090: ldr x23, [sp, #128] +// 0x00000094: .cfi_restore: r23 +// 0x00000094: ldr x22, [sp, #120] +// 0x00000098: .cfi_restore: r22 +// 0x00000098: ldr x21, [sp, #112] +// 0x0000009c: .cfi_restore: r21 +// 0x0000009c: ldr x20, [sp, #104] +// 0x000000a0: .cfi_restore: r20 +// 0x000000a0: ldr d15, [sp, #96] +// 0x000000a4: .cfi_restore_extended: r79 +// 0x000000a4: ldr d14, [sp, #88] +// 0x000000a8: .cfi_restore_extended: r78 +// 0x000000a8: ldr d13, [sp, #80] +// 0x000000ac: .cfi_restore_extended: r77 +// 0x000000ac: ldr d12, [sp, #72] +// 0x000000b0: .cfi_restore_extended: r76 +// 0x000000b0: ldr d11, [sp, #64] +// 0x000000b4: .cfi_restore_extended: r75 +// 0x000000b4: ldr d10, [sp, #56] +// 0x000000b8: .cfi_restore_extended: r74 +// 0x000000b8: ldr d9, [sp, #48] +// 0x000000bc: .cfi_restore_extended: r73 +// 0x000000bc: ldr d8, [sp, #40] +// 0x000000c0: .cfi_restore_extended: r72 +// 0x000000c0: add sp, sp, #0xc0 (192) +// 0x000000c4: .cfi_def_cfa_offset: 0 +// 0x000000c4: ret +// 0x000000c8: .cfi_restore_state +// 0x000000c8: .cfi_def_cfa_offset: 192 + +static constexpr uint8_t expected_asm_kX86[] = { + 0x57, 0x56, 0x55, 0x83, 0xC4, 0xE4, 0x50, 0x89, 0x4C, 0x24, 0x34, 0xF3, + 0x0F, 0x11, 0x44, 0x24, 0x38, 0x89, 0x54, 0x24, 0x3C, 0x89, 0x5C, 0x24, + 0x40, 0x83, 0xC4, 0xE0, 0x83, 0xC4, 0x20, 0x83, 0xC4, 0x20, 0x5D, 0x5E, + 0x5F, 0xC3, +}; +static constexpr uint8_t expected_cfi_kX86[] = { + 0x41, 0x0E, 0x08, 0x87, 0x02, 0x41, 0x0E, 0x0C, 0x86, 0x03, 0x41, 0x0E, + 0x10, 0x85, 0x04, 0x43, 0x0E, 0x2C, 0x41, 0x0E, 0x30, 0x55, 0x0E, 0x50, + 0x43, 0x0E, 0x30, 0x0A, 0x43, 0x0E, 0x10, 0x41, 0x0E, 0x0C, 0xC5, 0x41, + 0x0E, 0x08, 0xC6, 0x41, 0x0E, 0x04, 0xC7, 0x41, 0x0B, 0x0E, 0x30, +}; +// 0x00000000: push edi +// 0x00000001: .cfi_def_cfa_offset: 8 +// 0x00000001: .cfi_offset: r7 at cfa-8 +// 0x00000001: push esi +// 0x00000002: .cfi_def_cfa_offset: 12 +// 0x00000002: .cfi_offset: r6 at cfa-12 +// 0x00000002: push ebp +// 0x00000003: .cfi_def_cfa_offset: 16 +// 0x00000003: .cfi_offset: r5 at cfa-16 +// 0x00000003: add esp, -28 +// 0x00000006: .cfi_def_cfa_offset: 44 +// 0x00000006: push eax +// 0x00000007: .cfi_def_cfa_offset: 48 +// 0x00000007: mov [esp + 52], ecx +// 0x0000000b: movss [esp + 56], xmm0 +// 0x00000011: mov [esp + 60], edx +// 0x00000015: mov [esp + 64], ebx +// 0x00000019: add esp, -32 +// 0x0000001c: .cfi_def_cfa_offset: 80 +// 0x0000001c: add esp, 32 +// 0x0000001f: .cfi_def_cfa_offset: 48 +// 0x0000001f: .cfi_remember_state +// 0x0000001f: add esp, 32 +// 0x00000022: .cfi_def_cfa_offset: 16 +// 0x00000022: pop ebp +// 0x00000023: .cfi_def_cfa_offset: 12 +// 0x00000023: .cfi_restore: r5 +// 0x00000023: pop esi +// 0x00000024: .cfi_def_cfa_offset: 8 +// 0x00000024: .cfi_restore: r6 +// 0x00000024: pop edi +// 0x00000025: .cfi_def_cfa_offset: 4 +// 0x00000025: .cfi_restore: r7 +// 0x00000025: ret +// 0x00000026: .cfi_restore_state +// 0x00000026: .cfi_def_cfa_offset: 48 + +static constexpr uint8_t expected_asm_kX86_64[] = { + 0x41, 0x57, 0x41, 0x56, 0x41, 0x55, 0x41, 0x54, 0x55, 0x53, 0x48, 0x83, + 0xEC, 0x48, 0xF2, 0x44, 0x0F, 0x11, 0x7C, 0x24, 0x40, 0xF2, 0x44, 0x0F, + 0x11, 0x74, 0x24, 0x38, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24, 0x30, 0xF2, + 0x44, 0x0F, 0x11, 0x64, 0x24, 0x28, 0x89, 0x3C, 0x24, 0x89, 0xB4, 0x24, + 0x84, 0x00, 0x00, 0x00, 0xF3, 0x0F, 0x11, 0x84, 0x24, 0x88, 0x00, 0x00, + 0x00, 0x89, 0x94, 0x24, 0x8C, 0x00, 0x00, 0x00, 0x89, 0x8C, 0x24, 0x90, + 0x00, 0x00, 0x00, 0x48, 0x83, 0xC4, 0xE0, 0x48, 0x83, 0xC4, 0x20, 0xF2, + 0x44, 0x0F, 0x10, 0x64, 0x24, 0x28, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24, + 0x30, 0xF2, 0x44, 0x0F, 0x10, 0x74, 0x24, 0x38, 0xF2, 0x44, 0x0F, 0x10, + 0x7C, 0x24, 0x40, 0x48, 0x83, 0xC4, 0x48, 0x5B, 0x5D, 0x41, 0x5C, 0x41, + 0x5D, 0x41, 0x5E, 0x41, 0x5F, 0xC3, +}; +static constexpr uint8_t expected_cfi_kX86_64[] = { + 0x42, 0x0E, 0x10, 0x8F, 0x04, 0x42, 0x0E, 0x18, 0x8E, 0x06, 0x42, 0x0E, + 0x20, 0x8D, 0x08, 0x42, 0x0E, 0x28, 0x8C, 0x0A, 0x41, 0x0E, 0x30, 0x86, + 0x0C, 0x41, 0x0E, 0x38, 0x83, 0x0E, 0x44, 0x0E, 0x80, 0x01, 0x47, 0xA0, + 0x10, 0x47, 0x9F, 0x12, 0x47, 0x9E, 0x14, 0x47, 0x9D, 0x16, 0x65, 0x0E, + 0xA0, 0x01, 0x44, 0x0E, 0x80, 0x01, 0x0A, 0x47, 0xDD, 0x47, 0xDE, 0x47, + 0xDF, 0x47, 0xE0, 0x44, 0x0E, 0x38, 0x41, 0x0E, 0x30, 0xC3, 0x41, 0x0E, + 0x28, 0xC6, 0x42, 0x0E, 0x20, 0xCC, 0x42, 0x0E, 0x18, 0xCD, 0x42, 0x0E, + 0x10, 0xCE, 0x42, 0x0E, 0x08, 0xCF, 0x41, 0x0B, 0x0E, 0x80, 0x01, +}; +// 0x00000000: push r15 +// 0x00000002: .cfi_def_cfa_offset: 16 +// 0x00000002: .cfi_offset: r15 at cfa-16 +// 0x00000002: push r14 +// 0x00000004: .cfi_def_cfa_offset: 24 +// 0x00000004: .cfi_offset: r14 at cfa-24 +// 0x00000004: push r13 +// 0x00000006: .cfi_def_cfa_offset: 32 +// 0x00000006: .cfi_offset: r13 at cfa-32 +// 0x00000006: push r12 +// 0x00000008: .cfi_def_cfa_offset: 40 +// 0x00000008: .cfi_offset: r12 at cfa-40 +// 0x00000008: push rbp +// 0x00000009: .cfi_def_cfa_offset: 48 +// 0x00000009: .cfi_offset: r6 at cfa-48 +// 0x00000009: push rbx +// 0x0000000a: .cfi_def_cfa_offset: 56 +// 0x0000000a: .cfi_offset: r3 at cfa-56 +// 0x0000000a: subq rsp, 72 +// 0x0000000e: .cfi_def_cfa_offset: 128 +// 0x0000000e: movsd [rsp + 64], xmm15 +// 0x00000015: .cfi_offset: r32 at cfa-64 +// 0x00000015: movsd [rsp + 56], xmm14 +// 0x0000001c: .cfi_offset: r31 at cfa-72 +// 0x0000001c: movsd [rsp + 48], xmm13 +// 0x00000023: .cfi_offset: r30 at cfa-80 +// 0x00000023: movsd [rsp + 40], xmm12 +// 0x0000002a: .cfi_offset: r29 at cfa-88 +// 0x0000002a: mov [rsp], edi +// 0x0000002d: mov [rsp + 132], esi +// 0x00000034: movss [rsp + 136], xmm0 +// 0x0000003d: mov [rsp + 140], edx +// 0x00000044: mov [rsp + 144], ecx +// 0x0000004b: addq rsp, -32 +// 0x0000004f: .cfi_def_cfa_offset: 160 +// 0x0000004f: addq rsp, 32 +// 0x00000053: .cfi_def_cfa_offset: 128 +// 0x00000053: .cfi_remember_state +// 0x00000053: movsd xmm12, [rsp + 40] +// 0x0000005a: .cfi_restore: r29 +// 0x0000005a: movsd xmm13, [rsp + 48] +// 0x00000061: .cfi_restore: r30 +// 0x00000061: movsd xmm14, [rsp + 56] +// 0x00000068: .cfi_restore: r31 +// 0x00000068: movsd xmm15, [rsp + 64] +// 0x0000006f: .cfi_restore: r32 +// 0x0000006f: addq rsp, 72 +// 0x00000073: .cfi_def_cfa_offset: 56 +// 0x00000073: pop rbx +// 0x00000074: .cfi_def_cfa_offset: 48 +// 0x00000074: .cfi_restore: r3 +// 0x00000074: pop rbp +// 0x00000075: .cfi_def_cfa_offset: 40 +// 0x00000075: .cfi_restore: r6 +// 0x00000075: pop r12 +// 0x00000077: .cfi_def_cfa_offset: 32 +// 0x00000077: .cfi_restore: r12 +// 0x00000077: pop r13 +// 0x00000079: .cfi_def_cfa_offset: 24 +// 0x00000079: .cfi_restore: r13 +// 0x00000079: pop r14 +// 0x0000007b: .cfi_def_cfa_offset: 16 +// 0x0000007b: .cfi_restore: r14 +// 0x0000007b: pop r15 +// 0x0000007d: .cfi_def_cfa_offset: 8 +// 0x0000007d: .cfi_restore: r15 +// 0x0000007d: ret +// 0x0000007e: .cfi_restore_state +// 0x0000007e: .cfi_def_cfa_offset: 128 + +static constexpr uint8_t expected_asm_kMips[] = { + 0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB8, 0xAF, + 0x34, 0x00, 0xAF, 0xAF, 0x30, 0x00, 0xAE, 0xAF, 0x2C, 0x00, 0xAD, 0xAF, + 0x28, 0x00, 0xAC, 0xAF, 0x24, 0x00, 0xAB, 0xAF, 0x20, 0x00, 0xAA, 0xAF, + 0x1C, 0x00, 0xA9, 0xAF, 0x18, 0x00, 0xA8, 0xAF, 0x00, 0x00, 0xA4, 0xAF, + 0x44, 0x00, 0xA5, 0xAF, 0x48, 0x00, 0xA6, 0xAF, 0x4C, 0x00, 0xA7, 0xAF, + 0xE0, 0xFF, 0xBD, 0x27, 0x20, 0x00, 0xBD, 0x27, 0x18, 0x00, 0xA8, 0x8F, + 0x1C, 0x00, 0xA9, 0x8F, 0x20, 0x00, 0xAA, 0x8F, 0x24, 0x00, 0xAB, 0x8F, + 0x28, 0x00, 0xAC, 0x8F, 0x2C, 0x00, 0xAD, 0x8F, 0x30, 0x00, 0xAE, 0x8F, + 0x34, 0x00, 0xAF, 0x8F, 0x38, 0x00, 0xB8, 0x8F, 0x3C, 0x00, 0xBF, 0x8F, + 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips[] = { + 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x98, 0x02, 0x44, 0x8F, 0x03, + 0x44, 0x8E, 0x04, 0x44, 0x8D, 0x05, 0x44, 0x8C, 0x06, 0x44, 0x8B, 0x07, + 0x44, 0x8A, 0x08, 0x44, 0x89, 0x09, 0x44, 0x88, 0x0A, 0x54, 0x0E, 0x60, + 0x44, 0x0E, 0x40, 0x0A, 0x44, 0xC8, 0x44, 0xC9, 0x44, 0xCA, 0x44, 0xCB, + 0x44, 0xCC, 0x44, 0xCD, 0x44, 0xCE, 0x44, 0xCF, 0x44, 0xD8, 0x44, 0xDF, + 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: addiu r29, r29, -64 +// 0x00000004: .cfi_def_cfa_offset: 64 +// 0x00000004: sw r31, +60(r29) +// 0x00000008: .cfi_offset: r31 at cfa-4 +// 0x00000008: sw r24, +56(r29) +// 0x0000000c: .cfi_offset: r24 at cfa-8 +// 0x0000000c: sw r15, +52(r29) +// 0x00000010: .cfi_offset: r15 at cfa-12 +// 0x00000010: sw r14, +48(r29) +// 0x00000014: .cfi_offset: r14 at cfa-16 +// 0x00000014: sw r13, +44(r29) +// 0x00000018: .cfi_offset: r13 at cfa-20 +// 0x00000018: sw r12, +40(r29) +// 0x0000001c: .cfi_offset: r12 at cfa-24 +// 0x0000001c: sw r11, +36(r29) +// 0x00000020: .cfi_offset: r11 at cfa-28 +// 0x00000020: sw r10, +32(r29) +// 0x00000024: .cfi_offset: r10 at cfa-32 +// 0x00000024: sw r9, +28(r29) +// 0x00000028: .cfi_offset: r9 at cfa-36 +// 0x00000028: sw r8, +24(r29) +// 0x0000002c: .cfi_offset: r8 at cfa-40 +// 0x0000002c: sw r4, +0(r29) +// 0x00000030: sw r5, +68(r29) +// 0x00000034: sw r6, +72(r29) +// 0x00000038: sw r7, +76(r29) +// 0x0000003c: addiu r29, r29, -32 +// 0x00000040: .cfi_def_cfa_offset: 96 +// 0x00000040: addiu r29, r29, 32 +// 0x00000044: .cfi_def_cfa_offset: 64 +// 0x00000044: .cfi_remember_state +// 0x00000044: lw r8, +24(r29) +// 0x00000048: .cfi_restore: r8 +// 0x00000048: lw r9, +28(r29) +// 0x0000004c: .cfi_restore: r9 +// 0x0000004c: lw r10, +32(r29) +// 0x00000050: .cfi_restore: r10 +// 0x00000050: lw r11, +36(r29) +// 0x00000054: .cfi_restore: r11 +// 0x00000054: lw r12, +40(r29) +// 0x00000058: .cfi_restore: r12 +// 0x00000058: lw r13, +44(r29) +// 0x0000005c: .cfi_restore: r13 +// 0x0000005c: lw r14, +48(r29) +// 0x00000060: .cfi_restore: r14 +// 0x00000060: lw r15, +52(r29) +// 0x00000064: .cfi_restore: r15 +// 0x00000064: lw r24, +56(r29) +// 0x00000068: .cfi_restore: r24 +// 0x00000068: lw r31, +60(r29) +// 0x0000006c: .cfi_restore: r31 +// 0x0000006c: addiu r29, r29, 64 +// 0x00000070: .cfi_def_cfa_offset: 0 +// 0x00000070: jalr r0, r31 +// 0x00000074: nop +// 0x00000078: .cfi_restore_state +// 0x00000078: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips64[] = { + 0xA0, 0xFF, 0xBD, 0x67, 0x58, 0x00, 0xBF, 0xFF, 0x50, 0x00, 0xBE, 0xFF, + 0x48, 0x00, 0xBC, 0xFF, 0x40, 0x00, 0xB7, 0xFF, 0x38, 0x00, 0xB6, 0xFF, + 0x30, 0x00, 0xB5, 0xFF, 0x28, 0x00, 0xB4, 0xFF, 0x20, 0x00, 0xB3, 0xFF, + 0x18, 0x00, 0xB2, 0xFF, 0x00, 0x00, 0xA4, 0xAF, 0x64, 0x00, 0xA5, 0xAF, + 0x68, 0x00, 0xAE, 0xE7, 0x6C, 0x00, 0xA7, 0xAF, 0x70, 0x00, 0xA8, 0xAF, + 0xE0, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBD, 0x67, 0x18, 0x00, 0xB2, 0xDF, + 0x20, 0x00, 0xB3, 0xDF, 0x28, 0x00, 0xB4, 0xDF, 0x30, 0x00, 0xB5, 0xDF, + 0x38, 0x00, 0xB6, 0xDF, 0x40, 0x00, 0xB7, 0xDF, 0x48, 0x00, 0xBC, 0xDF, + 0x50, 0x00, 0xBE, 0xDF, 0x58, 0x00, 0xBF, 0xDF, 0x60, 0x00, 0xBD, 0x67, + 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips64[] = { + 0x44, 0x0E, 0x60, 0x44, 0x9F, 0x02, 0x44, 0x9E, 0x04, 0x44, 0x9C, 0x06, + 0x44, 0x97, 0x08, 0x44, 0x96, 0x0A, 0x44, 0x95, 0x0C, 0x44, 0x94, 0x0E, + 0x44, 0x93, 0x10, 0x44, 0x92, 0x12, 0x58, 0x0E, 0x80, 0x01, 0x44, 0x0E, + 0x60, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xD4, 0x44, 0xD5, 0x44, 0xD6, + 0x44, 0xD7, 0x44, 0xDC, 0x44, 0xDE, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, + 0x0B, 0x0E, 0x60, +}; +// 0x00000000: daddiu r29, r29, -96 +// 0x00000004: .cfi_def_cfa_offset: 96 +// 0x00000004: sd r31, +88(r29) +// 0x00000008: .cfi_offset: r31 at cfa-8 +// 0x00000008: sd r30, +80(r29) +// 0x0000000c: .cfi_offset: r30 at cfa-16 +// 0x0000000c: sd r28, +72(r29) +// 0x00000010: .cfi_offset: r28 at cfa-24 +// 0x00000010: sd r23, +64(r29) +// 0x00000014: .cfi_offset: r23 at cfa-32 +// 0x00000014: sd r22, +56(r29) +// 0x00000018: .cfi_offset: r22 at cfa-40 +// 0x00000018: sd r21, +48(r29) +// 0x0000001c: .cfi_offset: r21 at cfa-48 +// 0x0000001c: sd r20, +40(r29) +// 0x00000020: .cfi_offset: r20 at cfa-56 +// 0x00000020: sd r19, +32(r29) +// 0x00000024: .cfi_offset: r19 at cfa-64 +// 0x00000024: sd r18, +24(r29) +// 0x00000028: .cfi_offset: r18 at cfa-72 +// 0x00000028: sw r4, +0(r29) +// 0x0000002c: sw r5, +100(r29) +// 0x00000030: swc1 f14, +104(r29) +// 0x00000034: sw r7, +108(r29) +// 0x00000038: sw r8, +112(r29) +// 0x0000003c: daddiu r29, r29, -32 +// 0x00000040: .cfi_def_cfa_offset: 128 +// 0x00000040: daddiu r29, r29, 32 +// 0x00000044: .cfi_def_cfa_offset: 96 +// 0x00000044: .cfi_remember_state +// 0x00000044: ld r18, +24(r29) +// 0x00000048: .cfi_restore: r18 +// 0x00000048: ld r19, +32(r29) +// 0x0000004c: .cfi_restore: r19 +// 0x0000004c: ld r20, +40(r29) +// 0x00000050: .cfi_restore: r20 +// 0x00000050: ld r21, +48(r29) +// 0x00000054: .cfi_restore: r21 +// 0x00000054: ld r22, +56(r29) +// 0x00000058: .cfi_restore: r22 +// 0x00000058: ld r23, +64(r29) +// 0x0000005c: .cfi_restore: r23 +// 0x0000005c: ld r28, +72(r29) +// 0x00000060: .cfi_restore: r28 +// 0x00000060: ld r30, +80(r29) +// 0x00000064: .cfi_restore: r30 +// 0x00000064: ld r31, +88(r29) +// 0x00000068: .cfi_restore: r31 +// 0x00000068: daddiu r29, r29, 96 +// 0x0000006c: .cfi_def_cfa_offset: 0 +// 0x0000006c: jr r31 +// 0x00000070: nop +// 0x00000074: .cfi_restore_state +// 0x00000074: .cfi_def_cfa_offset: 96 + diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index 2d9e03a718..8a14038074 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -28,6 +28,7 @@ #include "compiled_method.h" #include "dex_file-inl.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "entrypoints/quick/quick_entrypoints.h" #include "jni_env_ext.h" #include "mirror/art_method.h" @@ -93,7 +94,7 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, // Assembler that holds generated instructions std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set)); - jni_asm->InitializeFrameDescriptionEntry(); + jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GetIncludeDebugSymbols()); // Offsets into data structures // TODO: if cross compiling these offsets are for the host not the target @@ -105,6 +106,7 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, const size_t frame_size(main_jni_conv->FrameSize()); const std::vector<ManagedRegister>& callee_save_regs = main_jni_conv->CalleeSaveRegisters(); __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills()); + DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size)); // 2. Set up the HandleScope mr_conv->ResetIterator(FrameOffset(frame_size)); @@ -424,7 +426,9 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, // 16. Remove activation - need to restore callee save registers since the GC may have changed // them. + DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size)); __ RemoveFrame(frame_size, callee_save_regs); + DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size)); // 17. Finalize code generation __ EmitSlowPaths(); @@ -432,19 +436,19 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, std::vector<uint8_t> managed_code(cs); MemoryRegion code(&managed_code[0], managed_code.size()); __ FinalizeInstructions(code); - jni_asm->FinalizeFrameDescriptionEntry(); - std::vector<uint8_t>* fde(jni_asm->GetFrameDescriptionEntry()); - ArrayRef<const uint8_t> cfi_ref; - if (fde != nullptr) { - cfi_ref = ArrayRef<const uint8_t>(*fde); - } - return CompiledMethod::SwapAllocCompiledMethodCFI(driver, - instruction_set, - ArrayRef<const uint8_t>(managed_code), - frame_size, - main_jni_conv->CoreSpillMask(), - main_jni_conv->FpSpillMask(), - cfi_ref); + + return CompiledMethod::SwapAllocCompiledMethod(driver, + instruction_set, + ArrayRef<const uint8_t>(managed_code), + frame_size, + main_jni_conv->CoreSpillMask(), + main_jni_conv->FpSpillMask(), + nullptr, // src_mapping_table. + ArrayRef<const uint8_t>(), // mapping_table. + ArrayRef<const uint8_t>(), // vmap_table. + ArrayRef<const uint8_t>(), // native_gc_map. + ArrayRef<const uint8_t>(*jni_asm->cfi().data()), + ArrayRef<const LinkerPatch>()); } // Copy a single parameter from the managed to the JNI calling convention diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.cc b/compiler/jni/quick/mips64/calling_convention_mips64.cc index 17325d6d49..d446867d32 100644 --- a/compiler/jni/quick/mips64/calling_convention_mips64.cc +++ b/compiler/jni/quick/mips64/calling_convention_mips64.cc @@ -126,25 +126,20 @@ const ManagedRegisterEntrySpills& Mips64ManagedRuntimeCallingConvention::EntrySp Mips64JniCallingConvention::Mips64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty) : JniCallingConvention(is_static, is_synchronized, shorty, kFramePointerSize) { - callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S0)); - callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S1)); callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S2)); callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S3)); callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S4)); callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S5)); callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S6)); callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S7)); - callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(GP)); - callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(SP)); callee_save_regs_.push_back(Mips64ManagedRegister::FromGpuRegister(S8)); } uint32_t Mips64JniCallingConvention::CoreSpillMask() const { // Compute spill mask to agree with callee saves initialized in the constructor uint32_t result = 0; - result = 1 << S0 | 1 << S1 | 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | - 1 << S7 | 1 << GP | 1 << SP | 1 << S8; + result = 1 << S2 | 1 << S3 | 1 << S4 | 1 << S5 | 1 << S6 | 1 << S7 | 1 << GP | 1 << S8 | 1 << RA; return result; } diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc new file mode 100644 index 0000000000..ceace824ea --- /dev/null +++ b/compiler/linker/arm/relative_patcher_arm_base.cc @@ -0,0 +1,182 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/arm/relative_patcher_arm_base.h" + +#include "compiled_method.h" +#include "oat.h" +#include "output_stream.h" + +namespace art { +namespace linker { + +uint32_t ArmBaseRelativePatcher::ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method, + MethodReference method_ref) { + return ReserveSpaceInternal(offset, compiled_method, method_ref, 0u); +} + +uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) { + // NOTE: The final thunk can be reserved from InitCodeMethodVisitor::EndClass() while it + // may be written early by WriteCodeMethodVisitor::VisitMethod() for a deduplicated chunk + // of code. To avoid any alignment discrepancies for the final chunk, we always align the + // offset after reserving of writing any chunk. + uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); + bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset, MethodReference(nullptr, 0u), + aligned_offset); + if (needs_thunk) { + thunk_locations_.push_back(aligned_offset); + offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_); + } + return offset; +} + +uint32_t ArmBaseRelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) { + if (current_thunk_to_write_ == thunk_locations_.size()) { + return offset; + } + uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); + if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) { + ++current_thunk_to_write_; + uint32_t aligned_code_delta = aligned_offset - offset; + if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) { + return 0u; + } + if (UNLIKELY(!WriteRelCallThunk(out, ArrayRef<const uint8_t>(thunk_code_)))) { + return 0u; + } + uint32_t thunk_end_offset = aligned_offset + thunk_code_.size(); + // Align after writing chunk, see the ReserveSpace() above. + offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_); + aligned_code_delta = offset - thunk_end_offset; + if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) { + return 0u; + } + } + return offset; +} + +ArmBaseRelativePatcher::ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider, + InstructionSet instruction_set, + std::vector<uint8_t> thunk_code, + uint32_t max_positive_displacement, + uint32_t max_negative_displacement) + : provider_(provider), instruction_set_(instruction_set), thunk_code_(thunk_code), + max_positive_displacement_(max_positive_displacement), + max_negative_displacement_(max_negative_displacement), + thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() { +} + +uint32_t ArmBaseRelativePatcher::ReserveSpaceInternal(uint32_t offset, + const CompiledMethod* compiled_method, + MethodReference method_ref, + uint32_t max_extra_space) { + DCHECK(compiled_method->GetQuickCode() != nullptr); + uint32_t quick_code_size = compiled_method->GetQuickCode()->size(); + uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader); + uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size); + // Adjust for extra space required by the subclass. + next_aligned_offset = compiled_method->AlignCode(next_aligned_offset + max_extra_space); + // TODO: ignore unprocessed patches targeting this method if they can reach quick_code_offset. + // We need the MethodReference for that. + if (!unprocessed_patches_.empty() && + next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) { + bool needs_thunk = ReserveSpaceProcessPatches(quick_code_offset, method_ref, + next_aligned_offset); + if (needs_thunk) { + // A single thunk will cover all pending patches. + unprocessed_patches_.clear(); + uint32_t thunk_location = compiled_method->AlignCode(offset); + thunk_locations_.push_back(thunk_location); + offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_); + } + } + for (const LinkerPatch& patch : compiled_method->GetPatches()) { + if (patch.Type() == kLinkerPatchCallRelative) { + unprocessed_patches_.emplace_back(patch.TargetMethod(), + quick_code_offset + patch.LiteralOffset()); + } + } + return offset; +} + +uint32_t ArmBaseRelativePatcher::CalculateDisplacement(uint32_t patch_offset, + uint32_t target_offset) { + // Unsigned arithmetic with its well-defined overflow behavior is just fine here. + uint32_t displacement = target_offset - patch_offset; + // NOTE: With unsigned arithmetic we do mean to use && rather than || below. + if (displacement > max_positive_displacement_ && displacement < -max_negative_displacement_) { + // Unwritten thunks have higher offsets, check if it's within range. + DCHECK(current_thunk_to_write_ == thunk_locations_.size() || + thunk_locations_[current_thunk_to_write_] > patch_offset); + if (current_thunk_to_write_ != thunk_locations_.size() && + thunk_locations_[current_thunk_to_write_] - patch_offset < max_positive_displacement_) { + displacement = thunk_locations_[current_thunk_to_write_] - patch_offset; + } else { + // We must have a previous thunk then. + DCHECK_NE(current_thunk_to_write_, 0u); + DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset); + displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset; + DCHECK(displacement >= -max_negative_displacement_); + } + } + return displacement; +} + +bool ArmBaseRelativePatcher::ReserveSpaceProcessPatches(uint32_t quick_code_offset, + MethodReference method_ref, + uint32_t next_aligned_offset) { + // Process as many patches as possible, stop only on unresolved targets or calls too far back. + while (!unprocessed_patches_.empty()) { + MethodReference patch_ref = unprocessed_patches_.front().first; + uint32_t patch_offset = unprocessed_patches_.front().second; + DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset); + if (patch_ref.dex_file == method_ref.dex_file && + patch_ref.dex_method_index == method_ref.dex_method_index) { + DCHECK_GT(quick_code_offset, patch_offset); + if (quick_code_offset - patch_offset > max_positive_displacement_) { + return true; + } + } else { + auto result = provider_->FindMethodOffset(patch_ref); + if (!result.first) { + // If still unresolved, check if we have a thunk within range. + if (thunk_locations_.empty() || + patch_offset - thunk_locations_.back() > max_negative_displacement_) { + return next_aligned_offset - patch_offset > max_positive_displacement_; + } + } else { + uint32_t target_offset = result.second - CompiledCode::CodeDelta(instruction_set_); + if (target_offset >= patch_offset) { + DCHECK_LE(target_offset - patch_offset, max_positive_displacement_); + } else { + // When calling back, check if we have a thunk that's closer than the actual target. + if (!thunk_locations_.empty()) { + target_offset = std::max(target_offset, thunk_locations_.back()); + } + if (patch_offset - target_offset > max_negative_displacement_) { + return true; + } + } + } + } + unprocessed_patches_.pop_front(); + } + return false; +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h new file mode 100644 index 0000000000..f80dd962ce --- /dev/null +++ b/compiler/linker/arm/relative_patcher_arm_base.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_ +#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_ + +#include <deque> + +#include "linker/relative_patcher.h" +#include "method_reference.h" + +namespace art { +namespace linker { + +class ArmBaseRelativePatcher : public RelativePatcher { + public: + uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method, + MethodReference method_ref) OVERRIDE; + uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; + uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; + + protected: + ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider, + InstructionSet instruction_set, std::vector<uint8_t> thunk_code, + uint32_t max_positive_displacement, uint32_t max_negative_displacement); + + uint32_t ReserveSpaceInternal(uint32_t offset, const CompiledMethod* compiled_method, + MethodReference method_ref, uint32_t max_extra_space); + uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset); + + private: + bool ReserveSpaceProcessPatches(uint32_t quick_code_offset, MethodReference method_ref, + uint32_t next_aligned_offset); + + RelativePatcherTargetProvider* const provider_; + const InstructionSet instruction_set_; + const std::vector<uint8_t> thunk_code_; + const uint32_t max_positive_displacement_; + const uint32_t max_negative_displacement_; + std::vector<uint32_t> thunk_locations_; + size_t current_thunk_to_write_; + + // ReserveSpace() tracks unprocessed patches. + typedef std::pair<MethodReference, uint32_t> UnprocessedPatch; + std::deque<UnprocessedPatch> unprocessed_patches_; + + friend class Arm64RelativePatcherTest; + friend class Thumb2RelativePatcherTest; + + DISALLOW_COPY_AND_ASSIGN(ArmBaseRelativePatcher); +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_ diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc new file mode 100644 index 0000000000..b17cbca2d2 --- /dev/null +++ b/compiler/linker/arm/relative_patcher_thumb2.cc @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/arm/relative_patcher_thumb2.h" + +#include "compiled_method.h" +#include "mirror/art_method.h" +#include "utils/arm/assembler_thumb2.h" + +namespace art { +namespace linker { + +Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider) + : ArmBaseRelativePatcher(provider, kThumb2, CompileThunkCode(), + kMaxPositiveDisplacement, kMaxNegativeDisplacement) { +} + +void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, + uint32_t patch_offset, uint32_t target_offset) { + DCHECK_LE(literal_offset + 4u, code->size()); + DCHECK_EQ(literal_offset & 1u, 0u); + DCHECK_EQ(patch_offset & 1u, 0u); + DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit. + uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u); + displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. + DCHECK_EQ(displacement & 1u, 0u); + DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u); // 25-bit signed. + uint32_t signbit = (displacement >> 31) & 0x1; + uint32_t i1 = (displacement >> 23) & 0x1; + uint32_t i2 = (displacement >> 22) & 0x1; + uint32_t imm10 = (displacement >> 12) & 0x03ff; + uint32_t imm11 = (displacement >> 1) & 0x07ff; + uint32_t j1 = i1 ^ (signbit ^ 1); + uint32_t j2 = i2 ^ (signbit ^ 1); + uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11; + value |= 0xf000d000; // BL + + // Check that we're just overwriting an existing BL. + DCHECK_EQ(GetInsn32(code, literal_offset) & 0xf800d000, 0xf000d000); + // Write the new BL. + SetInsn32(code, literal_offset, value); +} + +void Thumb2RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) { + uint32_t literal_offset = patch.LiteralOffset(); + uint32_t pc_literal_offset = patch.PcInsnOffset(); + uint32_t pc_base = patch_offset + (pc_literal_offset - literal_offset) + 4u /* PC adjustment */; + uint32_t diff = target_offset - pc_base; + + uint32_t insn = GetInsn32(code, literal_offset); + DCHECK_EQ(insn & 0xff7ff0ffu, 0xf2400000u); // MOVW/MOVT, unpatched (imm16 == 0). + uint32_t diff16 = ((insn & 0x00800000u) != 0u) ? (diff >> 16) : (diff & 0xffffu); + uint32_t imm4 = (diff16 >> 12) & 0xfu; + uint32_t imm = (diff16 >> 11) & 0x1u; + uint32_t imm3 = (diff16 >> 8) & 0x7u; + uint32_t imm8 = diff16 & 0xffu; + insn = (insn & 0xfbf08f00u) | (imm << 26) | (imm4 << 16) | (imm3 << 12) | imm8; + SetInsn32(code, literal_offset, insn); +} + +std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() { + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + arm::Thumb2Assembler assembler; + assembler.LoadFromOffset( + arm::kLoadWord, arm::PC, arm::R0, + mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); + assembler.bkpt(0); + std::vector<uint8_t> thunk_code(assembler.CodeSize()); + MemoryRegion code(thunk_code.data(), thunk_code.size()); + assembler.FinalizeInstructions(code); + return thunk_code; +} + +void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) { + DCHECK_LE(offset + 4u, code->size()); + DCHECK_EQ(offset & 1u, 0u); + uint8_t* addr = &(*code)[offset]; + addr[0] = (value >> 16) & 0xff; + addr[1] = (value >> 24) & 0xff; + addr[2] = (value >> 0) & 0xff; + addr[3] = (value >> 8) & 0xff; +} + +uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) { + DCHECK_LE(offset + 4u, code.size()); + DCHECK_EQ(offset & 1u, 0u); + const uint8_t* addr = &code[offset]; + return + (static_cast<uint32_t>(addr[0]) << 16) + + (static_cast<uint32_t>(addr[1]) << 24) + + (static_cast<uint32_t>(addr[2]) << 0)+ + (static_cast<uint32_t>(addr[3]) << 8); +} + +template <typename Alloc> +uint32_t Thumb2RelativePatcher::GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset) { + return GetInsn32(ArrayRef<const uint8_t>(*code), offset); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h new file mode 100644 index 0000000000..2d474c2db0 --- /dev/null +++ b/compiler/linker/arm/relative_patcher_thumb2.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ +#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ + +#include "linker/arm/relative_patcher_arm_base.h" + +namespace art { +namespace linker { + +class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { + public: + explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider); + + void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, + uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, + uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + + private: + static std::vector<uint8_t> CompileThunkCode(); + + void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value); + static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset); + + template <typename Alloc> + static uint32_t GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset); + + // PC displacement from patch location; Thumb2 PC is always at instruction address + 4. + static constexpr int32_t kPcDisplacement = 4; + + // Maximum positive and negative displacement measured from the patch location. + // (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from + // the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.) + static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement; + static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement; + + DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher); +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc new file mode 100644 index 0000000000..a057a4cf16 --- /dev/null +++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc @@ -0,0 +1,351 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher_test.h" +#include "linker/arm/relative_patcher_thumb2.h" + +namespace art { +namespace linker { + +class Thumb2RelativePatcherTest : public RelativePatcherTest { + public: + Thumb2RelativePatcherTest() : RelativePatcherTest(kThumb2, "default") { } + + protected: + static const uint8_t kCallRawCode[]; + static const ArrayRef<const uint8_t> kCallCode; + static const uint8_t kNopRawCode[]; + static const ArrayRef<const uint8_t> kNopCode; + + // Branches within range [-256, 256) can be created from these by adding the low 8 bits. + static constexpr uint32_t kBlPlus0 = 0xf000f800; + static constexpr uint32_t kBlMinus256 = 0xf7ffff00; + + // Special BL values. + static constexpr uint32_t kBlPlusMax = 0xf3ffd7ff; + static constexpr uint32_t kBlMinusMax = 0xf400d000; + + bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code, + const ArrayRef<const LinkerPatch>& method1_patches, + const ArrayRef<const uint8_t>& method3_code, + const ArrayRef<const LinkerPatch>& method3_patches, + uint32_t distance_without_thunks) { + CHECK_EQ(distance_without_thunks % kArmAlignment, 0u); + const uint32_t method1_offset = + CompiledCode::AlignCode(kTrampolineSize, kThumb2) + sizeof(OatQuickMethodHeader); + AddCompiledMethod(MethodRef(1u), method1_code, method1_patches); + + // We want to put the method3 at a very precise offset. + const uint32_t method3_offset = method1_offset + distance_without_thunks; + CHECK(IsAligned<kArmAlignment>(method3_offset - sizeof(OatQuickMethodHeader))); + + // Calculate size of method2 so that we put method3 at the correct place. + const uint32_t method2_offset = + CompiledCode::AlignCode(method1_offset + method1_code.size(), kThumb2) + + sizeof(OatQuickMethodHeader); + const uint32_t method2_size = (method3_offset - sizeof(OatQuickMethodHeader) - method2_offset); + std::vector<uint8_t> method2_raw_code(method2_size); + ArrayRef<const uint8_t> method2_code(method2_raw_code); + AddCompiledMethod(MethodRef(2u), method2_code, ArrayRef<const LinkerPatch>()); + + AddCompiledMethod(MethodRef(3u), method3_code, method3_patches); + + Link(); + + // Check assumptions. + CHECK_EQ(GetMethodOffset(1), method1_offset); + CHECK_EQ(GetMethodOffset(2), method2_offset); + auto result3 = method_offset_map_.FindMethodOffset(MethodRef(3)); + CHECK(result3.first); + // There may be a thunk before method2. + if (result3.second == method3_offset + 1 /* thumb mode */) { + return false; // No thunk. + } else { + uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kThumb2); + CHECK_EQ(result3.second, method3_offset + aligned_thunk_size + 1 /* thumb mode */); + return true; // Thunk present. + } + } + + uint32_t GetMethodOffset(uint32_t method_idx) { + auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(result.first); + CHECK_NE(result.second & 1u, 0u); + return result.second - 1 /* thumb mode */; + } + + uint32_t ThunkSize() { + return static_cast<Thumb2RelativePatcher*>(patcher_.get())->thunk_code_.size(); + } + + bool CheckThunk(uint32_t thunk_offset) { + Thumb2RelativePatcher* patcher = static_cast<Thumb2RelativePatcher*>(patcher_.get()); + ArrayRef<const uint8_t> expected_code(patcher->thunk_code_); + if (output_.size() < thunk_offset + expected_code.size()) { + LOG(ERROR) << "output_.size() == " << output_.size() << " < " + << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size()); + return false; + } + ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size()); + if (linked_code == expected_code) { + return true; + } + // Log failure info. + DumpDiff(expected_code, linked_code); + return false; + } + + std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) { + std::vector<uint8_t> result; + result.reserve(num_nops * 2u + 4u); + for (size_t i = 0; i != num_nops; ++i) { + result.push_back(0x00); + result.push_back(0xbf); + } + result.push_back(static_cast<uint8_t>(bl >> 16)); + result.push_back(static_cast<uint8_t>(bl >> 24)); + result.push_back(static_cast<uint8_t>(bl)); + result.push_back(static_cast<uint8_t>(bl >> 8)); + return result; + } + + void TestDexCachereference(uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + dex_cache_arrays_begin_ = dex_cache_arrays_begin; + static const uint8_t raw_code[] = { + 0x40, 0xf2, 0x00, 0x00, // MOVW r0, #0 (placeholder) + 0xc0, 0xf2, 0x00, 0x00, // MOVT r0, #0 (placeholder) + 0x78, 0x44, // ADD r0, pc + }; + constexpr uint32_t pc_insn_offset = 8u; + const ArrayRef<const uint8_t> code(raw_code); + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(0u, nullptr, pc_insn_offset, element_offset), + LinkerPatch::DexCacheArrayPatch(4u, nullptr, pc_insn_offset, element_offset), + }; + AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t pc_base_offset = method1_offset + pc_insn_offset + 4u /* PC adjustment */; + uint32_t diff = dex_cache_arrays_begin_ + element_offset - pc_base_offset; + // Distribute the bits of the diff between the MOVW and MOVT: + uint32_t diffw = diff & 0xffffu; + uint32_t difft = diff >> 16; + uint32_t movw = 0xf2400000u | // MOVW r0, #0 (placeholder), + ((diffw & 0xf000u) << (16 - 12)) | // move imm4 from bits 12-15 to bits 16-19, + ((diffw & 0x0800u) << (26 - 11)) | // move imm from bit 11 to bit 26, + ((diffw & 0x0700u) << (12 - 8)) | // move imm3 from bits 8-10 to bits 12-14, + ((diffw & 0x00ffu)); // keep imm8 at bits 0-7. + uint32_t movt = 0xf2c00000u | // MOVT r0, #0 (placeholder), + ((difft & 0xf000u) << (16 - 12)) | // move imm4 from bits 12-15 to bits 16-19, + ((difft & 0x0800u) << (26 - 11)) | // move imm from bit 11 to bit 26, + ((difft & 0x0700u) << (12 - 8)) | // move imm3 from bits 8-10 to bits 12-14, + ((difft & 0x00ffu)); // keep imm8 at bits 0-7. + const uint8_t expected_code[] = { + static_cast<uint8_t>(movw >> 16), static_cast<uint8_t>(movw >> 24), + static_cast<uint8_t>(movw >> 0), static_cast<uint8_t>(movw >> 8), + static_cast<uint8_t>(movt >> 16), static_cast<uint8_t>(movt >> 24), + static_cast<uint8_t>(movt >> 0), static_cast<uint8_t>(movt >> 8), + 0x78, 0x44, + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); + } +}; + +const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = { + 0x00, 0xf0, 0x00, 0xf8 +}; + +const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kCallCode(kCallRawCode); + +const uint8_t Thumb2RelativePatcherTest::kNopRawCode[] = { + 0x00, 0xbf +}; + +const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kNopCode(kNopRawCode); + +TEST_F(Thumb2RelativePatcherTest, CallSelf) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); + Link(); + + static const uint8_t expected_code[] = { + 0xff, 0xf7, 0xfe, 0xff + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Thumb2RelativePatcherTest, CallOther) { + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); + LinkerPatch method2_patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t method2_offset = GetMethodOffset(2u); + uint32_t diff_after = method2_offset - (method1_offset + 4u /* PC adjustment */); + ASSERT_EQ(diff_after & 1u, 0u); + ASSERT_LT(diff_after >> 1, 1u << 8); // Simple encoding, (diff_after >> 1) fits into 8 bits. + static const uint8_t method1_expected_code[] = { + 0x00, 0xf0, static_cast<uint8_t>(diff_after >> 1), 0xf8 + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); + uint32_t diff_before = method1_offset - (method2_offset + 4u /* PC adjustment */); + ASSERT_EQ(diff_before & 1u, 0u); + ASSERT_GE(diff_before, -1u << 9); // Simple encoding, -256 <= (diff >> 1) < 0. + auto method2_expected_code = GenNopsAndBl(0u, kBlMinus256 | ((diff_before >> 1) & 0xffu)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); +} + +TEST_F(Thumb2RelativePatcherTest, CallTrampoline) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t diff = kTrampolineOffset - (method1_offset + 4u); + ASSERT_EQ(diff & 1u, 0u); + ASSERT_GE(diff, -1u << 9); // Simple encoding, -256 <= (diff >> 1) < 0 (checked as unsigned). + auto expected_code = GenNopsAndBl(0u, kBlMinus256 | ((diff >> 1) & 0xffu)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) { + auto method1_raw_code = GenNopsAndBl(3u, kBlPlus0); + constexpr uint32_t bl_offset_in_method1 = 3u * 2u; // After NOPs. + ArrayRef<const uint8_t> method1_code(method1_raw_code); + ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), + }; + + constexpr uint32_t max_positive_disp = 16 * MB - 2u + 4u /* PC adjustment */; + bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches, + kNopCode, ArrayRef<const LinkerPatch>(), + bl_offset_in_method1 + max_positive_disp); + ASSERT_FALSE(thunk_in_gap); // There should be no thunk. + + // Check linked code. + auto expected_code = GenNopsAndBl(3u, kBlPlusMax); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarBefore) { + auto method3_raw_code = GenNopsAndBl(2u, kBlPlus0); + constexpr uint32_t bl_offset_in_method3 = 2u * 2u; // After NOPs. + ArrayRef<const uint8_t> method3_code(method3_raw_code); + ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); + LinkerPatch method3_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), + }; + + constexpr uint32_t just_over_max_negative_disp = 16 * MB - 4u /* PC adjustment */; + bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(), + method3_code, method3_patches, + just_over_max_negative_disp - bl_offset_in_method3); + ASSERT_FALSE(thunk_in_gap); // There should be no thunk. + + // Check linked code. + auto expected_code = GenNopsAndBl(2u, kBlMinusMax); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) { + auto method1_raw_code = GenNopsAndBl(2u, kBlPlus0); + constexpr uint32_t bl_offset_in_method1 = 2u * 2u; // After NOPs. + ArrayRef<const uint8_t> method1_code(method1_raw_code); + ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), + }; + + constexpr uint32_t just_over_max_positive_disp = 16 * MB + 4u /* PC adjustment */; + bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches, + kNopCode, ArrayRef<const LinkerPatch>(), + bl_offset_in_method1 + just_over_max_positive_disp); + ASSERT_TRUE(thunk_in_gap); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t method3_offset = GetMethodOffset(3u); + uint32_t method3_header_offset = method3_offset - sizeof(OatQuickMethodHeader); + ASSERT_TRUE(IsAligned<kArmAlignment>(method3_header_offset)); + uint32_t thunk_offset = method3_header_offset - CompiledCode::AlignCode(ThunkSize(), kThumb2); + ASSERT_TRUE(IsAligned<kArmAlignment>(thunk_offset)); + uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1 + 4u /* PC adjustment */); + ASSERT_EQ(diff & 1u, 0u); + ASSERT_GE(diff, 16 * MB - (1u << 9)); // Simple encoding, unknown bits fit into the low 8 bits. + auto expected_code = GenNopsAndBl(2u, 0xf3ffd700 | ((diff >> 1) & 0xffu)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); + CheckThunk(thunk_offset); +} + +TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) { + auto method3_raw_code = GenNopsAndBl(3u, kBlPlus0); + constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs. + ArrayRef<const uint8_t> method3_code(method3_raw_code); + ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); + LinkerPatch method3_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), + }; + + constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */; + bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(), + method3_code, method3_patches, + just_over_max_negative_disp - bl_offset_in_method3); + ASSERT_FALSE(thunk_in_gap); // There should be a thunk but it should be after the method2. + + // Check linked code. + uint32_t method3_offset = GetMethodOffset(3u); + uint32_t thunk_offset = CompiledCode::AlignCode(method3_offset + method3_code.size(), kThumb2); + uint32_t diff = thunk_offset - (method3_offset + bl_offset_in_method3 + 4u /* PC adjustment */); + ASSERT_EQ(diff & 1u, 0u); + ASSERT_LT(diff >> 1, 1u << 8); // Simple encoding, (diff >> 1) fits into 8 bits. + auto expected_code = GenNopsAndBl(3u, kBlPlus0 | ((diff >> 1) & 0xffu)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code))); + EXPECT_TRUE(CheckThunk(thunk_offset)); +} + +TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm8) { + TestDexCachereference(0x00ff0000u, 0x00fcu); + ASSERT_LT(GetMethodOffset(1u), 0xfcu); +} + +TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm3) { + TestDexCachereference(0x02ff0000u, 0x05fcu); + ASSERT_LT(GetMethodOffset(1u), 0xfcu); +} + +TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm) { + TestDexCachereference(0x08ff0000u, 0x08fcu); + ASSERT_LT(GetMethodOffset(1u), 0xfcu); +} + +TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceimm4) { + TestDexCachereference(0xd0ff0000u, 0x60fcu); + ASSERT_LT(GetMethodOffset(1u), 0xfcu); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc new file mode 100644 index 0000000000..72ddf07089 --- /dev/null +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -0,0 +1,322 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/arm64/relative_patcher_arm64.h" + +#include "arch/arm64/instruction_set_features_arm64.h" +#include "compiled_method.h" +#include "driver/compiler_driver.h" +#include "mirror/art_method.h" +#include "utils/arm64/assembler_arm64.h" +#include "oat.h" +#include "output_stream.h" + +namespace art { +namespace linker { + +Arm64RelativePatcher::Arm64RelativePatcher(RelativePatcherTargetProvider* provider, + const Arm64InstructionSetFeatures* features) + : ArmBaseRelativePatcher(provider, kArm64, CompileThunkCode(), + kMaxPositiveDisplacement, kMaxNegativeDisplacement), + fix_cortex_a53_843419_(features->NeedFixCortexA53_843419()), + reserved_adrp_thunks_(0u), + processed_adrp_thunks_(0u) { + if (fix_cortex_a53_843419_) { + adrp_thunk_locations_.reserve(16u); + current_method_thunks_.reserve(16u * kAdrpThunkSize); + } +} + +uint32_t Arm64RelativePatcher::ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method, + MethodReference method_ref) { + if (!fix_cortex_a53_843419_) { + DCHECK(adrp_thunk_locations_.empty()); + return ReserveSpaceInternal(offset, compiled_method, method_ref, 0u); + } + + // Add thunks for previous method if any. + if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) { + size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_; + offset = CompiledMethod::AlignCode(offset, kArm64) + kAdrpThunkSize * num_adrp_thunks; + reserved_adrp_thunks_ = adrp_thunk_locations_.size(); + } + + // Count the number of ADRP insns as the upper bound on the number of thunks needed + // and use it to reserve space for other linker patches. + size_t num_adrp = 0u; + DCHECK(compiled_method != nullptr); + for (const LinkerPatch& patch : compiled_method->GetPatches()) { + if (patch.Type() == kLinkerPatchDexCacheArray && + patch.LiteralOffset() == patch.PcInsnOffset()) { // ADRP patch + ++num_adrp; + } + } + offset = ReserveSpaceInternal(offset, compiled_method, method_ref, kAdrpThunkSize * num_adrp); + if (num_adrp == 0u) { + return offset; + } + + // Now that we have the actual offset where the code will be placed, locate the ADRP insns + // that actually require the thunk. + uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader); + ArrayRef<const uint8_t> code(*compiled_method->GetQuickCode()); + uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size()); + DCHECK(compiled_method != nullptr); + for (const LinkerPatch& patch : compiled_method->GetPatches()) { + if (patch.Type() == kLinkerPatchDexCacheArray && + patch.LiteralOffset() == patch.PcInsnOffset()) { // ADRP patch + uint32_t patch_offset = quick_code_offset + patch.LiteralOffset(); + if (NeedsErratum843419Thunk(code, patch.LiteralOffset(), patch_offset)) { + adrp_thunk_locations_.emplace_back(patch_offset, thunk_offset); + thunk_offset += kAdrpThunkSize; + } + } + } + return offset; +} + +uint32_t Arm64RelativePatcher::ReserveSpaceEnd(uint32_t offset) { + if (!fix_cortex_a53_843419_) { + DCHECK(adrp_thunk_locations_.empty()); + } else { + // Add thunks for the last method if any. + if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) { + size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_; + offset = CompiledMethod::AlignCode(offset, kArm64) + kAdrpThunkSize * num_adrp_thunks; + reserved_adrp_thunks_ = adrp_thunk_locations_.size(); + } + } + return ArmBaseRelativePatcher::ReserveSpaceEnd(offset); +} + +uint32_t Arm64RelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) { + if (fix_cortex_a53_843419_) { + if (!current_method_thunks_.empty()) { + uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kArm64); + if (kIsDebugBuild) { + CHECK(IsAligned<kAdrpThunkSize>(current_method_thunks_.size())); + size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize; + CHECK_LE(num_thunks, processed_adrp_thunks_); + for (size_t i = 0u; i != num_thunks; ++i) { + const auto& entry = adrp_thunk_locations_[processed_adrp_thunks_ - num_thunks + i]; + CHECK_EQ(entry.second, aligned_offset + i * kAdrpThunkSize); + } + } + uint32_t aligned_code_delta = aligned_offset - offset; + if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) { + return 0u; + } + if (!WriteMiscThunk(out, ArrayRef<const uint8_t>(current_method_thunks_))) { + return 0u; + } + offset = aligned_offset + current_method_thunks_.size(); + current_method_thunks_.clear(); + } + } + return ArmBaseRelativePatcher::WriteThunks(out, offset); +} + +void Arm64RelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, + uint32_t patch_offset, uint32_t target_offset) { + DCHECK_LE(literal_offset + 4u, code->size()); + DCHECK_EQ(literal_offset & 3u, 0u); + DCHECK_EQ(patch_offset & 3u, 0u); + DCHECK_EQ(target_offset & 3u, 0u); + uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u); + DCHECK_EQ(displacement & 3u, 0u); + DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u); // 28-bit signed. + uint32_t insn = (displacement & 0x0fffffffu) >> 2; + insn |= 0x94000000; // BL + + // Check that we're just overwriting an existing BL. + DCHECK_EQ(GetInsn(code, literal_offset) & 0xfc000000u, 0x94000000u); + // Write the new BL. + SetInsn(code, literal_offset, insn); +} + +void Arm64RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) { + DCHECK_EQ(patch_offset & 3u, 0u); + DCHECK_EQ(target_offset & 3u, 0u); + uint32_t literal_offset = patch.LiteralOffset(); + uint32_t insn = GetInsn(code, literal_offset); + uint32_t pc_insn_offset = patch.PcInsnOffset(); + uint32_t disp = target_offset - ((patch_offset - literal_offset + pc_insn_offset) & ~0xfffu); + if (literal_offset == pc_insn_offset) { + // Check it's an ADRP with imm == 0 (unset). + DCHECK_EQ((insn & 0xffffffe0u), 0x90000000u) + << literal_offset << ", " << pc_insn_offset << ", 0x" << std::hex << insn; + if (fix_cortex_a53_843419_ && processed_adrp_thunks_ != adrp_thunk_locations_.size() && + adrp_thunk_locations_[processed_adrp_thunks_].first == patch_offset) { + DCHECK(NeedsErratum843419Thunk(ArrayRef<const uint8_t>(*code), + literal_offset, patch_offset)); + uint32_t thunk_offset = adrp_thunk_locations_[processed_adrp_thunks_].second; + uint32_t adrp_disp = target_offset - (thunk_offset & ~0xfffu); + uint32_t adrp = PatchAdrp(insn, adrp_disp); + + uint32_t out_disp = thunk_offset - patch_offset; + DCHECK_EQ(out_disp & 3u, 0u); + DCHECK((out_disp >> 27) == 0u || (out_disp >> 27) == 31u); // 28-bit signed. + insn = (out_disp & 0x0fffffffu) >> 2; + insn |= 0x14000000; // B <thunk> + + uint32_t back_disp = -out_disp; + DCHECK_EQ(back_disp & 3u, 0u); + DCHECK((back_disp >> 27) == 0u || (back_disp >> 27) == 31u); // 28-bit signed. + uint32_t b_back = (back_disp & 0x0fffffffu) >> 2; + b_back |= 0x14000000; // B <back> + size_t thunks_code_offset = current_method_thunks_.size(); + current_method_thunks_.resize(thunks_code_offset + kAdrpThunkSize); + SetInsn(¤t_method_thunks_, thunks_code_offset, adrp); + SetInsn(¤t_method_thunks_, thunks_code_offset + 4u, b_back); + static_assert(kAdrpThunkSize == 2 * 4u, "thunk has 2 instructions"); + + processed_adrp_thunks_ += 1u; + } else { + insn = PatchAdrp(insn, disp); + } + // Write the new ADRP (or B to the erratum 843419 thunk). + SetInsn(code, literal_offset, insn); + } else { + DCHECK_EQ(insn & 0xfffffc00, 0xb9400000); // LDR 32-bit with imm12 == 0 (unset). + if (kIsDebugBuild) { + uint32_t adrp = GetInsn(code, pc_insn_offset); + if ((adrp & 0x9f000000u) != 0x90000000u) { + CHECK(fix_cortex_a53_843419_); + CHECK_EQ(adrp & 0xfc000000u, 0x14000000u); // B <thunk> + CHECK(IsAligned<kAdrpThunkSize>(current_method_thunks_.size())); + size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize; + CHECK_LE(num_thunks, processed_adrp_thunks_); + uint32_t b_offset = patch_offset - literal_offset + pc_insn_offset; + for (size_t i = processed_adrp_thunks_ - num_thunks; ; ++i) { + CHECK_NE(i, processed_adrp_thunks_); + if (adrp_thunk_locations_[i].first == b_offset) { + size_t idx = num_thunks - (processed_adrp_thunks_ - i); + adrp = GetInsn(¤t_method_thunks_, idx * kAdrpThunkSize); + break; + } + } + } + CHECK_EQ(adrp & 0x9f00001fu, // Check that pc_insn_offset points + 0x90000000 | ((insn >> 5) & 0x1fu)); // to ADRP with matching register. + } + uint32_t imm12 = (disp & 0xfffu) >> 2; + insn = (insn & ~(0xfffu << 10)) | (imm12 << 10); + SetInsn(code, literal_offset, insn); + } +} + +std::vector<uint8_t> Arm64RelativePatcher::CompileThunkCode() { + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + arm64::Arm64Assembler assembler; + Offset offset(mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArm64PointerSize).Int32Value()); + assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); + // Ensure we emit the literal pool. + assembler.EmitSlowPaths(); + std::vector<uint8_t> thunk_code(assembler.CodeSize()); + MemoryRegion code(thunk_code.data(), thunk_code.size()); + assembler.FinalizeInstructions(code); + return thunk_code; +} + +uint32_t Arm64RelativePatcher::PatchAdrp(uint32_t adrp, uint32_t disp) { + return (adrp & 0x9f00001fu) | // Clear offset bits, keep ADRP with destination reg. + // Bottom 12 bits are ignored, the next 2 lowest bits are encoded in bits 29-30. + ((disp & 0x00003000u) << (29 - 12)) | + // The next 16 bits are encoded in bits 5-22. + ((disp & 0xffffc000u) >> (12 + 2 - 5)) | + // Since the target_offset is based on the beginning of the oat file and the + // image space precedes the oat file, the target_offset into image space will + // be negative yet passed as uint32_t. Therefore we limit the displacement + // to +-2GiB (rather than the maximim +-4GiB) and determine the sign bit from + // the highest bit of the displacement. This is encoded in bit 23. + ((disp & 0x80000000u) >> (31 - 23)); +} + +bool Arm64RelativePatcher::NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, + uint32_t literal_offset, + uint32_t patch_offset) { + DCHECK_EQ(patch_offset & 0x3u, 0u); + if ((patch_offset & 0xff8) == 0xff8) { // ...ff8 or ...ffc + uint32_t adrp = GetInsn(code, literal_offset); + DCHECK_EQ(adrp & 0xff000000, 0x90000000); + uint32_t next_offset = patch_offset + 4u; + uint32_t next_insn = GetInsn(code, literal_offset + 4u); + + // Below we avoid patching sequences where the adrp is followed by a load which can easily + // be proved to be aligned. + + // First check if the next insn is the LDR using the result of the ADRP. + // LDR <Wt>, [<Xn>, #pimm], where <Xn> == ADRP destination reg. + if ((next_insn & 0xffc00000) == 0xb9400000 && + (((next_insn >> 5) ^ adrp) & 0x1f) == 0) { + return false; + } + + // LDR <Wt>, <label> is always aligned and thus it doesn't cause boundary crossing. + if ((next_insn & 0xff000000) == 0x18000000) { + return false; + } + + // LDR <Xt>, <label> is aligned iff the pc + displacement is a multiple of 8. + if ((next_insn & 0xff000000) == 0x58000000) { + bool is_aligned_load = (((next_offset >> 2) ^ (next_insn >> 5)) & 1) == 0; + return !is_aligned_load; + } + + // LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned loads, as SP is + // guaranteed to be 128-bits aligned and <pimm> is multiple of the load size. + if ((next_insn & 0xbfc003e0) == 0xb94003e0) { + return false; + } + return true; + } + return false; +} + +void Arm64RelativePatcher::SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) { + DCHECK_LE(offset + 4u, code->size()); + DCHECK_EQ(offset & 3u, 0u); + uint8_t* addr = &(*code)[offset]; + addr[0] = (value >> 0) & 0xff; + addr[1] = (value >> 8) & 0xff; + addr[2] = (value >> 16) & 0xff; + addr[3] = (value >> 24) & 0xff; +} + +uint32_t Arm64RelativePatcher::GetInsn(ArrayRef<const uint8_t> code, uint32_t offset) { + DCHECK_LE(offset + 4u, code.size()); + DCHECK_EQ(offset & 3u, 0u); + const uint8_t* addr = &code[offset]; + return + (static_cast<uint32_t>(addr[0]) << 0) + + (static_cast<uint32_t>(addr[1]) << 8) + + (static_cast<uint32_t>(addr[2]) << 16)+ + (static_cast<uint32_t>(addr[3]) << 24); +} + +template <typename Alloc> +uint32_t Arm64RelativePatcher::GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset) { + return GetInsn(ArrayRef<const uint8_t>(*code), offset); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h new file mode 100644 index 0000000000..2d07e75c85 --- /dev/null +++ b/compiler/linker/arm64/relative_patcher_arm64.h @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_ +#define ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_ + +#include "linker/arm/relative_patcher_arm_base.h" +#include "utils/array_ref.h" + +namespace art { +namespace linker { + +class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { + public: + Arm64RelativePatcher(RelativePatcherTargetProvider* provider, + const Arm64InstructionSetFeatures* features); + + uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method, + MethodReference method_ref) OVERRIDE; + uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; + uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; + void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, + uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, + uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + + private: + static std::vector<uint8_t> CompileThunkCode(); + static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp); + + static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset, + uint32_t patch_offset); + void SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value); + static uint32_t GetInsn(ArrayRef<const uint8_t> code, uint32_t offset); + + template <typename Alloc> + static uint32_t GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset); + + // Maximum positive and negative displacement measured from the patch location. + // (Signed 28 bit displacement with the last bit 0 has range [-2^27, 2^27-4] measured from + // the ARM64 PC pointing to the BL.) + static constexpr uint32_t kMaxPositiveDisplacement = (1u << 27) - 4u; + static constexpr uint32_t kMaxNegativeDisplacement = (1u << 27); + + // The ADRP thunk for erratum 843419 is 2 instructions, i.e. 8 bytes. + static constexpr uint32_t kAdrpThunkSize = 8u; + + const bool fix_cortex_a53_843419_; + // Map original patch_offset to thunk offset. + std::vector<std::pair<uint32_t, uint32_t>> adrp_thunk_locations_; + size_t reserved_adrp_thunks_; + size_t processed_adrp_thunks_; + std::vector<uint8_t> current_method_thunks_; + + DISALLOW_COPY_AND_ASSIGN(Arm64RelativePatcher); +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_ diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc new file mode 100644 index 0000000000..21f93672ad --- /dev/null +++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc @@ -0,0 +1,582 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher_test.h" +#include "linker/arm64/relative_patcher_arm64.h" + +namespace art { +namespace linker { + +class Arm64RelativePatcherTest : public RelativePatcherTest { + public: + explicit Arm64RelativePatcherTest(const std::string& variant) + : RelativePatcherTest(kArm64, variant) { } + + protected: + static const uint8_t kCallRawCode[]; + static const ArrayRef<const uint8_t> kCallCode; + static const uint8_t kNopRawCode[]; + static const ArrayRef<const uint8_t> kNopCode; + + // All branches can be created from kBlPlus0 or kBPlus0 by adding the low 26 bits. + static constexpr uint32_t kBlPlus0 = 0x94000000u; + static constexpr uint32_t kBPlus0 = 0x14000000u; + + // Special BL values. + static constexpr uint32_t kBlPlusMax = 0x95ffffffu; + static constexpr uint32_t kBlMinusMax = 0x96000000u; + + // LDUR x2, [sp, #4], i.e. unaligned load crossing 64-bit boundary (assuming aligned sp). + static constexpr uint32_t kLdurInsn = 0xf840405fu; + + // LDR w12, <label> and LDR x12, <label>. Bits 5-23 contain label displacement in 4-byte units. + static constexpr uint32_t kLdrWPcRelInsn = 0x1800000cu; + static constexpr uint32_t kLdrXPcRelInsn = 0x5800000cu; + + // LDR w13, [SP, #<pimm>] and LDR x13, [SP, #<pimm>]. Bits 10-21 contain displacement from SP + // in units of 4-bytes (for 32-bit load) or 8-bytes (for 64-bit load). + static constexpr uint32_t kLdrWSpRelInsn = 0xb94003edu; + static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu; + + uint32_t Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code, + const ArrayRef<const LinkerPatch>& method1_patches, + const ArrayRef<const uint8_t>& last_method_code, + const ArrayRef<const LinkerPatch>& last_method_patches, + uint32_t distance_without_thunks) { + CHECK_EQ(distance_without_thunks % kArm64Alignment, 0u); + const uint32_t method1_offset = + CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader); + AddCompiledMethod(MethodRef(1u), method1_code, method1_patches); + const uint32_t gap_start = + CompiledCode::AlignCode(method1_offset + method1_code.size(), kArm64); + + // We want to put the method3 at a very precise offset. + const uint32_t last_method_offset = method1_offset + distance_without_thunks; + const uint32_t gap_end = last_method_offset - sizeof(OatQuickMethodHeader); + CHECK(IsAligned<kArm64Alignment>(gap_end)); + + // Fill the gap with intermediate methods in chunks of 2MiB and the last in [2MiB, 4MiB). + // (This allows deduplicating the small chunks to avoid using 256MiB of memory for +-128MiB + // offsets by this test.) + uint32_t method_idx = 2u; + constexpr uint32_t kSmallChunkSize = 2 * MB; + std::vector<uint8_t> gap_code; + size_t gap_size = gap_end - gap_start; + for (; gap_size >= 2u * kSmallChunkSize; gap_size -= kSmallChunkSize) { + uint32_t chunk_code_size = kSmallChunkSize - sizeof(OatQuickMethodHeader); + gap_code.resize(chunk_code_size, 0u); + AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code), + ArrayRef<const LinkerPatch>()); + method_idx += 1u; + } + uint32_t chunk_code_size = gap_size - sizeof(OatQuickMethodHeader); + gap_code.resize(chunk_code_size, 0u); + AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code), + ArrayRef<const LinkerPatch>()); + method_idx += 1u; + + // Add the last method and link + AddCompiledMethod(MethodRef(method_idx), last_method_code, last_method_patches); + Link(); + + // Check assumptions. + CHECK_EQ(GetMethodOffset(1), method1_offset); + auto last_result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(last_result.first); + // There may be a thunk before method2. + if (last_result.second != last_method_offset) { + // Thunk present. Check that there's only one. + uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kArm64); + CHECK_EQ(last_result.second, last_method_offset + aligned_thunk_size); + } + return method_idx; + } + + uint32_t GetMethodOffset(uint32_t method_idx) { + auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(result.first); + CHECK_EQ(result.second & 3u, 0u); + return result.second; + } + + uint32_t ThunkSize() { + return static_cast<Arm64RelativePatcher*>(patcher_.get())->thunk_code_.size(); + } + + bool CheckThunk(uint32_t thunk_offset) { + Arm64RelativePatcher* patcher = static_cast<Arm64RelativePatcher*>(patcher_.get()); + ArrayRef<const uint8_t> expected_code(patcher->thunk_code_); + if (output_.size() < thunk_offset + expected_code.size()) { + LOG(ERROR) << "output_.size() == " << output_.size() << " < " + << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size()); + return false; + } + ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size()); + if (linked_code == expected_code) { + return true; + } + // Log failure info. + DumpDiff(expected_code, linked_code); + return false; + } + + std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) { + std::vector<uint8_t> result; + result.reserve(num_nops * 4u + 4u); + for (size_t i = 0; i != num_nops; ++i) { + result.insert(result.end(), kNopCode.begin(), kNopCode.end()); + } + result.push_back(static_cast<uint8_t>(bl)); + result.push_back(static_cast<uint8_t>(bl >> 8)); + result.push_back(static_cast<uint8_t>(bl >> 16)); + result.push_back(static_cast<uint8_t>(bl >> 24)); + return result; + } + + std::vector<uint8_t> GenNopsAndAdrpLdr(size_t num_nops, + uint32_t method_offset, uint32_t target_offset) { + std::vector<uint8_t> result; + result.reserve(num_nops * 4u + 8u); + for (size_t i = 0; i != num_nops; ++i) { + result.insert(result.end(), kNopCode.begin(), kNopCode.end()); + } + DCHECK_EQ(method_offset & 3u, 0u); + DCHECK_EQ(target_offset & 3u, 0u); + uint32_t adrp_offset = method_offset + num_nops * 4u; + uint32_t disp = target_offset - (adrp_offset & ~0xfffu); + DCHECK_EQ(disp & 3u, 0u); + uint32_t ldr = 0xb9400001 | // LDR w1, [x0, #(imm12 * 2)] + ((disp & 0xfffu) << (10 - 2)); // imm12 = ((disp & 0xfffu) >> 2) is at bit 10. + uint32_t adrp = 0x90000000 | // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64) + ((disp & 0x3000u) << (29 - 12)) | // immlo = ((disp & 0x3000u) >> 12) is at bit 29, + ((disp & 0xffffc000) >> (14 - 5)) | // immhi = (disp >> 14) is at bit 5, + // We take the sign bit from the disp, limiting disp to +- 2GiB. + ((disp & 0x80000000) >> (31 - 23)); // sign bit in immhi is at bit 23. + result.push_back(static_cast<uint8_t>(adrp)); + result.push_back(static_cast<uint8_t>(adrp >> 8)); + result.push_back(static_cast<uint8_t>(adrp >> 16)); + result.push_back(static_cast<uint8_t>(adrp >> 24)); + result.push_back(static_cast<uint8_t>(ldr)); + result.push_back(static_cast<uint8_t>(ldr >> 8)); + result.push_back(static_cast<uint8_t>(ldr >> 16)); + result.push_back(static_cast<uint8_t>(ldr >> 24)); + return result; + } + + void TestNopsAdrpLdr(size_t num_nops, uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + dex_cache_arrays_begin_ = dex_cache_arrays_begin; + auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched. + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(num_nops * 4u , nullptr, num_nops * 4u, element_offset), + LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, element_offset), + }; + AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code), + ArrayRef<const LinkerPatch>(patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t target_offset = dex_cache_arrays_begin_ + element_offset; + auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); + } + + void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) { + CHECK_LE(pos, code->size()); + const uint8_t insn_code[] = { + static_cast<uint8_t>(insn), static_cast<uint8_t>(insn >> 8), + static_cast<uint8_t>(insn >> 16), static_cast<uint8_t>(insn >> 24), + }; + static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code)."); + code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); + } + + void PrepareNopsAdrpInsn2Ldr(size_t num_nops, uint32_t insn2, + uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + dex_cache_arrays_begin_ = dex_cache_arrays_begin; + auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched. + InsertInsn(&code, num_nops * 4u + 4u, insn2); + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(num_nops * 4u , nullptr, num_nops * 4u, element_offset), + LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, element_offset), + }; + AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code), + ArrayRef<const LinkerPatch>(patches)); + Link(); + } + + void TestNopsAdrpInsn2Ldr(size_t num_nops, uint32_t insn2, + uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t target_offset = dex_cache_arrays_begin_ + element_offset; + auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset); + InsertInsn(&expected_code, num_nops * 4u + 4u, insn2); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); + } + + void TestNopsAdrpInsn2LdrHasThunk(size_t num_nops, uint32_t insn2, + uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset); + + uint32_t method1_offset = GetMethodOffset(1u); + CHECK(!compiled_method_refs_.empty()); + CHECK_EQ(compiled_method_refs_[0].dex_method_index, 1u); + CHECK_EQ(compiled_method_refs_.size(), compiled_methods_.size()); + uint32_t method1_size = compiled_methods_[0]->GetQuickCode()->size(); + uint32_t thunk_offset = CompiledCode::AlignCode(method1_offset + method1_size, kArm64); + uint32_t b_diff = thunk_offset - (method1_offset + num_nops * 4u); + ASSERT_EQ(b_diff & 3u, 0u); + ASSERT_LT(b_diff, 128 * MB); + uint32_t b_out = kBPlus0 + ((b_diff >> 2) & 0x03ffffffu); + uint32_t b_in = kBPlus0 + ((-b_diff >> 2) & 0x03ffffffu); + + uint32_t target_offset = dex_cache_arrays_begin_ + element_offset; + auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset); + InsertInsn(&expected_code, num_nops * 4u + 4u, insn2); + // Replace adrp with bl. + expected_code.erase(expected_code.begin() + num_nops * 4u, + expected_code.begin() + num_nops * 4u + 4u); + InsertInsn(&expected_code, num_nops * 4u, b_out); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); + + auto expected_thunk_code = GenNopsAndAdrpLdr(0u, thunk_offset, target_offset); + ASSERT_EQ(expected_thunk_code.size(), 8u); + expected_thunk_code.erase(expected_thunk_code.begin() + 4u, expected_thunk_code.begin() + 8u); + InsertInsn(&expected_thunk_code, 4u, b_in); + ASSERT_EQ(expected_thunk_code.size(), 8u); + + uint32_t thunk_size = ThunkSize(); + ASSERT_EQ(thunk_offset + thunk_size, output_.size()); + ASSERT_EQ(thunk_size, expected_thunk_code.size()); + ArrayRef<const uint8_t> thunk_code(&output_[thunk_offset], thunk_size); + if (ArrayRef<const uint8_t>(expected_thunk_code) != thunk_code) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk_code), thunk_code); + FAIL(); + } + } + + void TestAdrpInsn2Ldr(uint32_t insn2, uint32_t adrp_offset, bool has_thunk, + uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + uint32_t method1_offset = + CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader); + ASSERT_LT(method1_offset, adrp_offset); + ASSERT_EQ(adrp_offset & 3u, 0u); + uint32_t num_nops = (adrp_offset - method1_offset) / 4u; + if (has_thunk) { + TestNopsAdrpInsn2LdrHasThunk(num_nops, insn2, dex_cache_arrays_begin, element_offset); + } else { + TestNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset); + } + ASSERT_EQ(method1_offset, GetMethodOffset(1u)); // If this fails, num_nops is wrong. + } + + void TestAdrpLdurLdr(uint32_t adrp_offset, bool has_thunk, + uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset); + } + + void TestAdrpLdrPcRelLdr(uint32_t pcrel_ldr_insn, int32_t pcrel_disp, + uint32_t adrp_offset, bool has_thunk, + uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + ASSERT_LT(pcrel_disp, 0x100000); + ASSERT_GE(pcrel_disp, -0x100000); + ASSERT_EQ(pcrel_disp & 0x3, 0); + uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5); + TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset); + } + + void TestAdrpLdrSpRelLdr(uint32_t sprel_ldr_insn, uint32_t sprel_disp_in_load_units, + uint32_t adrp_offset, bool has_thunk, + uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + ASSERT_LT(sprel_disp_in_load_units, 0x1000u); + uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10); + TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset); + } +}; + +const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = { + 0x00, 0x00, 0x00, 0x94 +}; + +const ArrayRef<const uint8_t> Arm64RelativePatcherTest::kCallCode(kCallRawCode); + +const uint8_t Arm64RelativePatcherTest::kNopRawCode[] = { + 0x1f, 0x20, 0x03, 0xd5 +}; + +const ArrayRef<const uint8_t> Arm64RelativePatcherTest::kNopCode(kNopRawCode); + +class Arm64RelativePatcherTestDefault : public Arm64RelativePatcherTest { + public: + Arm64RelativePatcherTestDefault() : Arm64RelativePatcherTest("default") { } +}; + +class Arm64RelativePatcherTestDenver64 : public Arm64RelativePatcherTest { + public: + Arm64RelativePatcherTestDenver64() : Arm64RelativePatcherTest("denver64") { } +}; + +TEST_F(Arm64RelativePatcherTestDefault, CallSelf) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); + Link(); + + static const uint8_t expected_code[] = { + 0x00, 0x00, 0x00, 0x94 + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Arm64RelativePatcherTestDefault, CallOther) { + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); + LinkerPatch method2_patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t method2_offset = GetMethodOffset(2u); + uint32_t diff_after = method2_offset - method1_offset; + ASSERT_EQ(diff_after & 3u, 0u); + ASSERT_LT(diff_after >> 2, 1u << 8); // Simple encoding, (diff_after >> 2) fits into 8 bits. + static const uint8_t method1_expected_code[] = { + static_cast<uint8_t>(diff_after >> 2), 0x00, 0x00, 0x94 + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); + uint32_t diff_before = method1_offset - method2_offset; + ASSERT_EQ(diff_before & 3u, 0u); + ASSERT_GE(diff_before, -1u << 27); + auto method2_expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff_before >> 2) & 0x03ffffffu)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); +} + +TEST_F(Arm64RelativePatcherTestDefault, CallTrampoline) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t diff = kTrampolineOffset - method1_offset; + ASSERT_EQ(diff & 1u, 0u); + ASSERT_GE(diff, -1u << 9); // Simple encoding, -256 <= (diff >> 1) < 0 (checked as unsigned). + auto expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff >> 2) & 0x03ffffffu)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarAfter) { + auto method1_raw_code = GenNopsAndBl(1u, kBlPlus0); + constexpr uint32_t bl_offset_in_method1 = 1u * 4u; // After NOPs. + ArrayRef<const uint8_t> method1_code(method1_raw_code); + ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); + uint32_t expected_last_method_idx = 65; // Based on 2MiB chunks in Create2MethodsWithGap(). + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx), + }; + + constexpr uint32_t max_positive_disp = 128 * MB - 4u; + uint32_t last_method_idx = Create2MethodsWithGap(method1_code, method1_patches, + kNopCode, ArrayRef<const LinkerPatch>(), + bl_offset_in_method1 + max_positive_disp); + ASSERT_EQ(expected_last_method_idx, last_method_idx); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(last_method_idx); + ASSERT_EQ(method1_offset + bl_offset_in_method1 + max_positive_disp, last_method_offset); + + // Check linked code. + auto expected_code = GenNopsAndBl(1u, kBlPlusMax); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarBefore) { + auto last_method_raw_code = GenNopsAndBl(0u, kBlPlus0); + constexpr uint32_t bl_offset_in_last_method = 0u * 4u; // After NOPs. + ArrayRef<const uint8_t> last_method_code(last_method_raw_code); + ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size()); + LinkerPatch last_method_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u), + }; + + constexpr uint32_t max_negative_disp = 128 * MB; + uint32_t last_method_idx = Create2MethodsWithGap(kNopCode, ArrayRef<const LinkerPatch>(), + last_method_code, last_method_patches, + max_negative_disp - bl_offset_in_last_method); + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(last_method_idx); + ASSERT_EQ(method1_offset, last_method_offset + bl_offset_in_last_method - max_negative_disp); + + // Check linked code. + auto expected_code = GenNopsAndBl(0u, kBlMinusMax); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx), + ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarAfter) { + auto method1_raw_code = GenNopsAndBl(0u, kBlPlus0); + constexpr uint32_t bl_offset_in_method1 = 0u * 4u; // After NOPs. + ArrayRef<const uint8_t> method1_code(method1_raw_code); + ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); + uint32_t expected_last_method_idx = 65; // Based on 2MiB chunks in Create2MethodsWithGap(). + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx), + }; + + constexpr uint32_t just_over_max_positive_disp = 128 * MB; + uint32_t last_method_idx = Create2MethodsWithGap( + method1_code, method1_patches, kNopCode, ArrayRef<const LinkerPatch>(), + bl_offset_in_method1 + just_over_max_positive_disp); + ASSERT_EQ(expected_last_method_idx, last_method_idx); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(last_method_idx); + uint32_t last_method_header_offset = last_method_offset - sizeof(OatQuickMethodHeader); + ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_header_offset)); + uint32_t thunk_offset = last_method_header_offset - CompiledCode::AlignCode(ThunkSize(), kArm64); + ASSERT_TRUE(IsAligned<kArm64Alignment>(thunk_offset)); + uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1); + ASSERT_EQ(diff & 3u, 0u); + ASSERT_LT(diff, 128 * MB); + auto expected_code = GenNopsAndBl(0u, kBlPlus0 | (diff >> 2)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); + CheckThunk(thunk_offset); +} + +TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarBefore) { + auto last_method_raw_code = GenNopsAndBl(1u, kBlPlus0); + constexpr uint32_t bl_offset_in_last_method = 1u * 4u; // After NOPs. + ArrayRef<const uint8_t> last_method_code(last_method_raw_code); + ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size()); + LinkerPatch last_method_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u), + }; + + constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4; + uint32_t last_method_idx = Create2MethodsWithGap( + kNopCode, ArrayRef<const LinkerPatch>(), last_method_code, last_method_patches, + just_over_max_negative_disp - bl_offset_in_last_method); + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(last_method_idx); + ASSERT_EQ(method1_offset, + last_method_offset + bl_offset_in_last_method - just_over_max_negative_disp); + + // Check linked code. + uint32_t thunk_offset = + CompiledCode::AlignCode(last_method_offset + last_method_code.size(), kArm64); + uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method); + ASSERT_EQ(diff & 3u, 0u); + ASSERT_LT(diff, 128 * MB); + auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx), + ArrayRef<const uint8_t>(expected_code))); + EXPECT_TRUE(CheckThunk(thunk_offset)); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference1) { + TestNopsAdrpLdr(0u, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference2) { + TestNopsAdrpLdr(0u, -0x12345678u, 0x4444u); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference3) { + TestNopsAdrpLdr(0u, 0x12345000u, 0x3ffcu); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference4) { + TestNopsAdrpLdr(0u, 0x12345000u, 0x4000u); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xff4) { + TestAdrpLdurLdr(0xff4u, false, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xff8) { + TestAdrpLdurLdr(0xff8u, true, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xffc) { + TestAdrpLdurLdr(0xffcu, true, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0x1000) { + TestAdrpLdurLdr(0x1000u, false, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xff4) { + TestAdrpLdurLdr(0xff4u, false, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xff8) { + TestAdrpLdurLdr(0xff8u, false, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xffc) { + TestAdrpLdurLdr(0xffcu, false, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0x1000) { + TestAdrpLdurLdr(0x1000u, false, 0x12345678u, 0x1234u); +} + +#define TEST_FOR_OFFSETS(test, disp1, disp2) \ + test(0xff4u, disp1) test(0xff8u, disp1) test(0xffcu, disp1) test(0x1000u, disp1) \ + test(0xff4u, disp2) test(0xff8u, disp2) test(0xffcu, disp2) test(0x1000u, disp2) + +// LDR <Wt>, <label> is always aligned. We should never have to use a fixup. +#define LDRW_PCREL_TEST(adrp_offset, disp) \ + TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WPcRel ## disp) { \ + TestAdrpLdrPcRelLdr(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u, 0x1234u); \ + } + +TEST_FOR_OFFSETS(LDRW_PCREL_TEST, 0x1234, 0x1238) + +// LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8. +#define LDRX_PCREL_TEST(adrp_offset, disp) \ + TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XPcRel ## disp) { \ + bool unaligned = ((adrp_offset + 4u + static_cast<uint32_t>(disp)) & 7u) != 0; \ + bool has_thunk = (adrp_offset == 0xff8u || adrp_offset == 0xffcu) && unaligned; \ + TestAdrpLdrPcRelLdr(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u, 0x1234u); \ + } + +TEST_FOR_OFFSETS(LDRX_PCREL_TEST, 0x1234, 0x1238) + +// LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed. +#define LDRW_SPREL_TEST(adrp_offset, disp) \ + TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WSpRel ## disp) { \ + TestAdrpLdrSpRelLdr(kLdrWSpRelInsn, disp >> 2, adrp_offset, false, 0x12345678u, 0x1234u); \ + } + +TEST_FOR_OFFSETS(LDRW_SPREL_TEST, 0, 4) + +#define LDRX_SPREL_TEST(adrp_offset, disp) \ + TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XSpRel ## disp) { \ + TestAdrpLdrSpRelLdr(kLdrXSpRelInsn, disp >> 3, adrp_offset, false, 0x12345678u, 0x1234u); \ + } + +TEST_FOR_OFFSETS(LDRX_SPREL_TEST, 0, 8) + +} // namespace linker +} // namespace art diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc new file mode 100644 index 0000000000..89aed956aa --- /dev/null +++ b/compiler/linker/relative_patcher.cc @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher.h" + +#include "linker/arm/relative_patcher_thumb2.h" +#include "linker/arm64/relative_patcher_arm64.h" +#include "linker/x86/relative_patcher_x86.h" +#include "linker/x86_64/relative_patcher_x86_64.h" +#include "output_stream.h" + +namespace art { +namespace linker { + +std::unique_ptr<RelativePatcher> RelativePatcher::Create( + InstructionSet instruction_set, const InstructionSetFeatures* features, + RelativePatcherTargetProvider* provider) { + class RelativePatcherNone FINAL : public RelativePatcher { + public: + RelativePatcherNone() { } + + uint32_t ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, + MethodReference method_ref ATTRIBUTE_UNUSED) OVERRIDE { + return offset; // No space reserved; no patches expected. + } + + uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE { + return offset; // No space reserved; no patches expected. + } + + uint32_t WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) OVERRIDE { + return offset; // No thunks added; no patches expected. + } + + void PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + uint32_t literal_offset ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED, + uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE { + LOG(FATAL) << "Unexpected relative call patch."; + } + + virtual void PatchDexCacheReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + const LinkerPatch& patch ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED, + uint32_t target_offset ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unexpected relative dex cache array patch."; + } + + private: + DISALLOW_COPY_AND_ASSIGN(RelativePatcherNone); + }; + + switch (instruction_set) { + case kX86: + return std::unique_ptr<RelativePatcher>(new X86RelativePatcher()); + case kX86_64: + return std::unique_ptr<RelativePatcher>(new X86_64RelativePatcher()); + case kArm: + // Fall through: we generate Thumb2 code for "arm". + case kThumb2: + return std::unique_ptr<RelativePatcher>(new Thumb2RelativePatcher(provider)); + case kArm64: + return std::unique_ptr<RelativePatcher>( + new Arm64RelativePatcher(provider, features->AsArm64InstructionSetFeatures())); + default: + return std::unique_ptr<RelativePatcher>(new RelativePatcherNone); + } +} + +bool RelativePatcher::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) { + static const uint8_t kPadding[] = { + 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u + }; + DCHECK_LE(aligned_code_delta, sizeof(kPadding)); + if (UNLIKELY(!out->WriteFully(kPadding, aligned_code_delta))) { + return false; + } + size_code_alignment_ += aligned_code_delta; + return true; +} + +bool RelativePatcher::WriteRelCallThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) { + if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) { + return false; + } + size_relative_call_thunks_ += thunk.size(); + return true; +} + +bool RelativePatcher::WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) { + if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) { + return false; + } + size_misc_thunks_ += thunk.size(); + return true; +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/relative_patcher.h b/compiler/linker/relative_patcher.h new file mode 100644 index 0000000000..8a9f3f8364 --- /dev/null +++ b/compiler/linker/relative_patcher.h @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_RELATIVE_PATCHER_H_ +#define ART_COMPILER_LINKER_RELATIVE_PATCHER_H_ + +#include <vector> + +#include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" +#include "base/macros.h" +#include "method_reference.h" +#include "utils/array_ref.h" + +namespace art { + +class CompiledMethod; +class LinkerPatch; +class OutputStream; + +namespace linker { + +/** + * @class RelativePatcherTargetProvider + * @brief Interface for providing method offsets for relative call targets. + */ +class RelativePatcherTargetProvider { + public: + /** + * Find the offset of the target method of a relative call if known. + * + * The process of assigning target method offsets includes calls to the relative patcher's + * ReserveSpace() which in turn can use FindMethodOffset() to determine if a method already + * has an offset assigned and, if so, what's that offset. If the offset has not yet been + * assigned or if it's too far for the particular architecture's relative call, + * ReserveSpace() may need to allocate space for a special dispatch thunk. + * + * @param ref the target method of the relative call. + * @return true in the first element of the pair if the method was found, false otherwise; + * if found, the second element specifies the offset. + */ + virtual std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) = 0; + + protected: + virtual ~RelativePatcherTargetProvider() { } +}; + +/** + * @class RelativePatcher + * @brief Interface for architecture-specific link-time patching of PC-relative references. + */ +class RelativePatcher { + public: + static std::unique_ptr<RelativePatcher> Create( + InstructionSet instruction_set, const InstructionSetFeatures* features, + RelativePatcherTargetProvider* provider); + + virtual ~RelativePatcher() { } + + uint32_t CodeAlignmentSize() const { + return size_code_alignment_; + } + + uint32_t RelativeCallThunksSize() const { + return size_relative_call_thunks_; + } + + uint32_t MiscThunksSize() const { + return size_misc_thunks_; + } + + // Reserve space for thunks if needed before a method, return adjusted offset. + virtual uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method, + MethodReference method_ref) = 0; + + // Reserve space for thunks if needed after the last method, return adjusted offset. + virtual uint32_t ReserveSpaceEnd(uint32_t offset) = 0; + + // Write relative call thunks if needed, return adjusted offset. + virtual uint32_t WriteThunks(OutputStream* out, uint32_t offset) = 0; + + // Patch method code. The input displacement is relative to the patched location, + // the patcher may need to adjust it if the correct base is different. + virtual void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, + uint32_t patch_offset, uint32_t target_offset) = 0; + + // Patch a reference to a dex cache location. + virtual void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, + uint32_t patch_offset, uint32_t target_offset) = 0; + + protected: + RelativePatcher() + : size_code_alignment_(0u), + size_relative_call_thunks_(0u), + size_misc_thunks_(0u) { + } + + bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta); + bool WriteRelCallThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk); + bool WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk); + + private: + uint32_t size_code_alignment_; + uint32_t size_relative_call_thunks_; + uint32_t size_misc_thunks_; + + DISALLOW_COPY_AND_ASSIGN(RelativePatcher); +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_RELATIVE_PATCHER_H_ diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h new file mode 100644 index 0000000000..70630f366f --- /dev/null +++ b/compiler/linker/relative_patcher_test.h @@ -0,0 +1,255 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_ +#define ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_ + +#include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" +#include "base/macros.h" +#include "compiled_method.h" +#include "dex/quick/dex_file_to_method_inliner_map.h" +#include "dex/verification_results.h" +#include "driver/compiler_driver.h" +#include "driver/compiler_options.h" +#include "globals.h" +#include "gtest/gtest.h" +#include "linker/relative_patcher.h" +#include "method_reference.h" +#include "oat.h" +#include "utils/array_ref.h" +#include "vector_output_stream.h" + +namespace art { +namespace linker { + +// Base class providing infrastructure for architecture-specific tests. +class RelativePatcherTest : public testing::Test { + protected: + RelativePatcherTest(InstructionSet instruction_set, const std::string& variant) + : compiler_options_(), + verification_results_(&compiler_options_), + inliner_map_(), + driver_(&compiler_options_, &verification_results_, &inliner_map_, + Compiler::kQuick, instruction_set, nullptr, + false, nullptr, nullptr, 1u, + false, false, "", nullptr, -1, ""), + error_msg_(), + instruction_set_(instruction_set), + features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)), + method_offset_map_(), + patcher_(RelativePatcher::Create(instruction_set, features_.get(), &method_offset_map_)), + dex_cache_arrays_begin_(0u), + compiled_method_refs_(), + compiled_methods_(), + patched_code_(), + output_(), + out_("test output stream", &output_) { + CHECK(error_msg_.empty()) << instruction_set << "/" << variant; + patched_code_.reserve(16 * KB); + } + + MethodReference MethodRef(uint32_t method_idx) { + CHECK_NE(method_idx, 0u); + return MethodReference(nullptr, method_idx); + } + + void AddCompiledMethod(MethodReference method_ref, + const ArrayRef<const uint8_t>& code, + const ArrayRef<const LinkerPatch>& patches) { + compiled_method_refs_.push_back(method_ref); + compiled_methods_.emplace_back(new CompiledMethod( + &driver_, instruction_set_, code, + 0u, 0u, 0u, nullptr, ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(), + ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(), + patches)); + } + + void Link() { + // Reserve space. + static_assert(kTrampolineOffset == 0u, "Unexpected trampoline offset."); + uint32_t offset = kTrampolineSize; + size_t idx = 0u; + for (auto& compiled_method : compiled_methods_) { + offset = patcher_->ReserveSpace(offset, compiled_method.get(), compiled_method_refs_[idx]); + + uint32_t aligned_offset = compiled_method->AlignCode(offset); + uint32_t aligned_code_delta = aligned_offset - offset; + offset += aligned_code_delta; + + offset += sizeof(OatQuickMethodHeader); + uint32_t quick_code_offset = offset + compiled_method->CodeDelta(); + const auto& code = *compiled_method->GetQuickCode(); + offset += code.size(); + + method_offset_map_.map.Put(compiled_method_refs_[idx], quick_code_offset); + ++idx; + } + offset = patcher_->ReserveSpaceEnd(offset); + uint32_t output_size = offset; + output_.reserve(output_size); + + // Write data. + DCHECK(output_.empty()); + uint8_t dummy_trampoline[kTrampolineSize]; + memset(dummy_trampoline, 0, sizeof(dummy_trampoline)); + out_.WriteFully(dummy_trampoline, kTrampolineSize); + offset = kTrampolineSize; + static const uint8_t kPadding[] = { + 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u + }; + uint8_t dummy_header[sizeof(OatQuickMethodHeader)]; + memset(dummy_header, 0, sizeof(dummy_header)); + for (auto& compiled_method : compiled_methods_) { + offset = patcher_->WriteThunks(&out_, offset); + + uint32_t aligned_offset = compiled_method->AlignCode(offset); + uint32_t aligned_code_delta = aligned_offset - offset; + CHECK_LE(aligned_code_delta, sizeof(kPadding)); + out_.WriteFully(kPadding, aligned_code_delta); + offset += aligned_code_delta; + + out_.WriteFully(dummy_header, sizeof(OatQuickMethodHeader)); + offset += sizeof(OatQuickMethodHeader); + ArrayRef<const uint8_t> code(*compiled_method->GetQuickCode()); + if (!compiled_method->GetPatches().empty()) { + patched_code_.assign(code.begin(), code.end()); + code = ArrayRef<const uint8_t>(patched_code_); + for (const LinkerPatch& patch : compiled_method->GetPatches()) { + if (patch.Type() == kLinkerPatchCallRelative) { + auto result = method_offset_map_.FindMethodOffset(patch.TargetMethod()); + uint32_t target_offset = + result.first ? result.second : kTrampolineOffset + compiled_method->CodeDelta(); + patcher_->PatchCall(&patched_code_, patch.LiteralOffset(), + offset + patch.LiteralOffset(), target_offset); + } else if (patch.Type() == kLinkerPatchDexCacheArray) { + uint32_t target_offset = dex_cache_arrays_begin_ + patch.TargetDexCacheElementOffset(); + patcher_->PatchDexCacheReference(&patched_code_, patch, + offset + patch.LiteralOffset(), target_offset); + } else { + LOG(FATAL) << "Bad patch type."; + } + } + } + out_.WriteFully(&code[0], code.size()); + offset += code.size(); + } + offset = patcher_->WriteThunks(&out_, offset); + CHECK_EQ(offset, output_size); + CHECK_EQ(output_.size(), output_size); + } + + bool CheckLinkedMethod(MethodReference method_ref, const ArrayRef<const uint8_t>& expected_code) { + // Sanity check: original code size must match linked_code.size(). + size_t idx = 0u; + for (auto ref : compiled_method_refs_) { + if (ref.dex_file == method_ref.dex_file && + ref.dex_method_index == method_ref.dex_method_index) { + break; + } + ++idx; + } + CHECK_NE(idx, compiled_method_refs_.size()); + CHECK_EQ(compiled_methods_[idx]->GetQuickCode()->size(), expected_code.size()); + + auto result = method_offset_map_.FindMethodOffset(method_ref); + CHECK(result.first); // Must have been linked. + size_t offset = result.second - compiled_methods_[idx]->CodeDelta(); + CHECK_LT(offset, output_.size()); + CHECK_LE(offset + expected_code.size(), output_.size()); + ArrayRef<const uint8_t> linked_code(&output_[offset], expected_code.size()); + if (linked_code == expected_code) { + return true; + } + // Log failure info. + DumpDiff(expected_code, linked_code); + return false; + } + + void DumpDiff(const ArrayRef<const uint8_t>& expected_code, + const ArrayRef<const uint8_t>& linked_code) { + std::ostringstream expected_hex; + std::ostringstream linked_hex; + std::ostringstream diff_indicator; + static const char digits[] = "0123456789abcdef"; + bool found_diff = false; + for (size_t i = 0; i != expected_code.size(); ++i) { + expected_hex << " " << digits[expected_code[i] >> 4] << digits[expected_code[i] & 0xf]; + linked_hex << " " << digits[linked_code[i] >> 4] << digits[linked_code[i] & 0xf]; + if (!found_diff) { + found_diff = (expected_code[i] != linked_code[i]); + diff_indicator << (found_diff ? " ^^" : " "); + } + } + CHECK(found_diff); + std::string expected_hex_str = expected_hex.str(); + std::string linked_hex_str = linked_hex.str(); + std::string diff_indicator_str = diff_indicator.str(); + if (diff_indicator_str.length() > 60) { + CHECK_EQ(diff_indicator_str.length() % 3u, 0u); + size_t remove = diff_indicator_str.length() / 3 - 5; + std::ostringstream oss; + oss << "[stripped " << remove << "]"; + std::string replacement = oss.str(); + expected_hex_str.replace(0u, remove * 3u, replacement); + linked_hex_str.replace(0u, remove * 3u, replacement); + diff_indicator_str.replace(0u, remove * 3u, replacement); + } + LOG(ERROR) << "diff expected_code linked_code"; + LOG(ERROR) << "<" << expected_hex_str; + LOG(ERROR) << ">" << linked_hex_str; + LOG(ERROR) << " " << diff_indicator_str; + } + + // Map method reference to assinged offset. + // Wrap the map in a class implementing linker::RelativePatcherTargetProvider. + class MethodOffsetMap FINAL : public linker::RelativePatcherTargetProvider { + public: + std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) OVERRIDE { + auto it = map.find(ref); + if (it == map.end()) { + return std::pair<bool, uint32_t>(false, 0u); + } else { + return std::pair<bool, uint32_t>(true, it->second); + } + } + SafeMap<MethodReference, uint32_t, MethodReferenceComparator> map; + }; + + static const uint32_t kTrampolineSize = 4u; + static const uint32_t kTrampolineOffset = 0u; + + CompilerOptions compiler_options_; + VerificationResults verification_results_; + DexFileToMethodInlinerMap inliner_map_; + CompilerDriver driver_; // Needed for constructing CompiledMethod. + std::string error_msg_; + InstructionSet instruction_set_; + std::unique_ptr<const InstructionSetFeatures> features_; + MethodOffsetMap method_offset_map_; + std::unique_ptr<RelativePatcher> patcher_; + uint32_t dex_cache_arrays_begin_; + std::vector<MethodReference> compiled_method_refs_; + std::vector<std::unique_ptr<CompiledMethod>> compiled_methods_; + std::vector<uint8_t> patched_code_; + std::vector<uint8_t> output_; + VectorOutputStream out_; +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_ diff --git a/compiler/linker/x86/relative_patcher_x86.cc b/compiler/linker/x86/relative_patcher_x86.cc new file mode 100644 index 0000000000..315585d9e7 --- /dev/null +++ b/compiler/linker/x86/relative_patcher_x86.cc @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/x86/relative_patcher_x86.h" + +#include "compiled_method.h" + +namespace art { +namespace linker { + +void X86RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) { + uint32_t anchor_literal_offset = patch.PcInsnOffset(); + uint32_t literal_offset = patch.LiteralOffset(); + + // Check that the anchor points to pop in a "call +0; pop <reg>" sequence. + DCHECK_GE(anchor_literal_offset, 5u); + DCHECK_LT(anchor_literal_offset, code->size()); + DCHECK_EQ((*code)[anchor_literal_offset - 5u], 0xe8u); + DCHECK_EQ((*code)[anchor_literal_offset - 4u], 0x00u); + DCHECK_EQ((*code)[anchor_literal_offset - 3u], 0x00u); + DCHECK_EQ((*code)[anchor_literal_offset - 2u], 0x00u); + DCHECK_EQ((*code)[anchor_literal_offset - 1u], 0x00u); + DCHECK_EQ((*code)[anchor_literal_offset] & 0xf8u, 0x58u); + + // Check that the patched data contains kDummy32BitOffset. + constexpr int kDummy32BitOffset = 256; // Must match X86Mir2Lir::kDummy32BitOffset. + DCHECK_LE(literal_offset, code->size()); + DCHECK_EQ((*code)[literal_offset + 0u], static_cast<uint8_t>(kDummy32BitOffset >> 0)); + DCHECK_EQ((*code)[literal_offset + 1u], static_cast<uint8_t>(kDummy32BitOffset >> 8)); + DCHECK_EQ((*code)[literal_offset + 2u], static_cast<uint8_t>(kDummy32BitOffset >> 16)); + DCHECK_EQ((*code)[literal_offset + 3u], static_cast<uint8_t>(kDummy32BitOffset >> 24)); + + // Apply patch. + uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset; + uint32_t diff = target_offset - anchor_offset; + (*code)[literal_offset + 0u] = static_cast<uint8_t>(diff >> 0); + (*code)[literal_offset + 1u] = static_cast<uint8_t>(diff >> 8); + (*code)[literal_offset + 2u] = static_cast<uint8_t>(diff >> 16); + (*code)[literal_offset + 3u] = static_cast<uint8_t>(diff >> 24); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/x86/relative_patcher_x86.h b/compiler/linker/x86/relative_patcher_x86.h new file mode 100644 index 0000000000..0c881f00ba --- /dev/null +++ b/compiler/linker/x86/relative_patcher_x86.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_ +#define ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_ + +#include "linker/x86/relative_patcher_x86_base.h" + +namespace art { +namespace linker { + +class X86RelativePatcher FINAL : public X86BaseRelativePatcher { + public: + X86RelativePatcher() { } + + void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, + uint32_t patch_offset, uint32_t target_offset) OVERRIDE; +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_ diff --git a/compiler/linker/x86/relative_patcher_x86_base.cc b/compiler/linker/x86/relative_patcher_x86_base.cc new file mode 100644 index 0000000000..bc285a7849 --- /dev/null +++ b/compiler/linker/x86/relative_patcher_x86_base.cc @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/x86/relative_patcher_x86_base.h" + +namespace art { +namespace linker { + +uint32_t X86BaseRelativePatcher::ReserveSpace( + uint32_t offset, + const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, + MethodReference method_ref ATTRIBUTE_UNUSED) { + return offset; // No space reserved; no limit on relative call distance. +} + +uint32_t X86BaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) { + return offset; // No space reserved; no limit on relative call distance. +} + +uint32_t X86BaseRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) { + return offset; // No thunks added; no limit on relative call distance. +} + +void X86BaseRelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, + uint32_t patch_offset, uint32_t target_offset) { + DCHECK_LE(literal_offset + 4u, code->size()); + // Unsigned arithmetic with its well-defined overflow behavior is just fine here. + uint32_t displacement = target_offset - patch_offset; + displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. + + typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t; + reinterpret_cast<unaligned_int32_t*>(&(*code)[literal_offset])[0] = displacement; +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/x86/relative_patcher_x86_base.h b/compiler/linker/x86/relative_patcher_x86_base.h new file mode 100644 index 0000000000..9200709398 --- /dev/null +++ b/compiler/linker/x86/relative_patcher_x86_base.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_ +#define ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_ + +#include "linker/relative_patcher.h" + +namespace art { +namespace linker { + +class X86BaseRelativePatcher : public RelativePatcher { + public: + uint32_t ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method, + MethodReference method_ref) OVERRIDE; + uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; + uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; + void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, + uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + + protected: + X86BaseRelativePatcher() { } + + // PC displacement from patch location; the base address of x86/x86-64 relative + // calls and x86-64 RIP-relative addressing is the PC of the next instruction and + // the patch location is 4 bytes earlier. + static constexpr int32_t kPcDisplacement = 4; + + private: + DISALLOW_COPY_AND_ASSIGN(X86BaseRelativePatcher); +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_ diff --git a/compiler/linker/x86/relative_patcher_x86_test.cc b/compiler/linker/x86/relative_patcher_x86_test.cc new file mode 100644 index 0000000000..7acc33004a --- /dev/null +++ b/compiler/linker/x86/relative_patcher_x86_test.cc @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher_test.h" +#include "linker/x86/relative_patcher_x86.h" + +namespace art { +namespace linker { + +class X86RelativePatcherTest : public RelativePatcherTest { + public: + X86RelativePatcherTest() : RelativePatcherTest(kX86, "default") { } + + protected: + static const uint8_t kCallRawCode[]; + static const ArrayRef<const uint8_t> kCallCode; + + uint32_t GetMethodOffset(uint32_t method_idx) { + auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(result.first); + return result.second; + } +}; + +const uint8_t X86RelativePatcherTest::kCallRawCode[] = { + 0xe8, 0x00, 0x01, 0x00, 0x00 +}; + +const ArrayRef<const uint8_t> X86RelativePatcherTest::kCallCode(kCallRawCode); + +TEST_F(X86RelativePatcherTest, CallSelf) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); + Link(); + + static const uint8_t expected_code[] = { + 0xe8, 0xfb, 0xff, 0xff, 0xff + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(X86RelativePatcherTest, CallOther) { + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); + LinkerPatch method2_patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t method2_offset = GetMethodOffset(2u); + uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */); + static const uint8_t method1_expected_code[] = { + 0xe8, + static_cast<uint8_t>(diff_after), static_cast<uint8_t>(diff_after >> 8), + static_cast<uint8_t>(diff_after >> 16), static_cast<uint8_t>(diff_after >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); + uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */); + static const uint8_t method2_expected_code[] = { + 0xe8, + static_cast<uint8_t>(diff_before), static_cast<uint8_t>(diff_before >> 8), + static_cast<uint8_t>(diff_before >> 16), static_cast<uint8_t>(diff_before >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); +} + +TEST_F(X86RelativePatcherTest, CallTrampoline) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); + Link(); + + auto result = method_offset_map_.FindMethodOffset(MethodRef(1)); + ASSERT_TRUE(result.first); + uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size()); + static const uint8_t expected_code[] = { + 0xe8, + static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), + static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(X86RelativePatcherTest, DexCacheReference) { + dex_cache_arrays_begin_ = 0x12345678; + constexpr size_t kElementOffset = 0x1234; + static const uint8_t raw_code[] = { + 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0 + 0x5b, // pop ebx + 0x8b, 0x83, 0x00, 0x01, 0x00, 0x00, // mov eax, [ebx + 256 (kDummy32BitValue)] + }; + constexpr uint32_t anchor_offset = 5u; // After call +0. + ArrayRef<const uint8_t> code(raw_code); + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(code.size() - 4u, nullptr, anchor_offset, kElementOffset), + }; + AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); + Link(); + + auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); + ASSERT_TRUE(result.first); + uint32_t diff = + dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset); + static const uint8_t expected_code[] = { + 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0 + 0x5b, // pop ebx + 0x8b, 0x83, // mov eax, [ebx + diff] + static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), + static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.cc b/compiler/linker/x86_64/relative_patcher_x86_64.cc new file mode 100644 index 0000000000..598f3ac4a8 --- /dev/null +++ b/compiler/linker/x86_64/relative_patcher_x86_64.cc @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/x86_64/relative_patcher_x86_64.h" + +#include "compiled_method.h" + +namespace art { +namespace linker { + +void X86_64RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, uint32_t target_offset) { + DCHECK_LE(patch.LiteralOffset() + 4u, code->size()); + // Unsigned arithmetic with its well-defined overflow behavior is just fine here. + uint32_t displacement = target_offset - patch_offset; + displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. + + typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t; + reinterpret_cast<unaligned_int32_t*>(&(*code)[patch.LiteralOffset()])[0] = displacement; +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.h b/compiler/linker/x86_64/relative_patcher_x86_64.h new file mode 100644 index 0000000000..af687b4a2f --- /dev/null +++ b/compiler/linker/x86_64/relative_patcher_x86_64.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_ +#define ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_ + +#include "linker/x86/relative_patcher_x86_base.h" + +namespace art { +namespace linker { + +class X86_64RelativePatcher FINAL : public X86BaseRelativePatcher { + public: + X86_64RelativePatcher() { } + + void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, + uint32_t patch_offset, uint32_t target_offset) OVERRIDE; +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_ diff --git a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc new file mode 100644 index 0000000000..36e0f01a50 --- /dev/null +++ b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher_test.h" +#include "linker/x86_64/relative_patcher_x86_64.h" + +namespace art { +namespace linker { + +class X86_64RelativePatcherTest : public RelativePatcherTest { + public: + X86_64RelativePatcherTest() : RelativePatcherTest(kX86_64, "default") { } + + protected: + static const uint8_t kCallRawCode[]; + static const ArrayRef<const uint8_t> kCallCode; + static const uint8_t kDexCacheLoadRawCode[]; + static const ArrayRef<const uint8_t> kDexCacheLoadCode; + + uint32_t GetMethodOffset(uint32_t method_idx) { + auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(result.first); + return result.second; + } +}; + +const uint8_t X86_64RelativePatcherTest::kCallRawCode[] = { + 0xe8, 0x00, 0x01, 0x00, 0x00 +}; + +const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kCallCode(kCallRawCode); + +const uint8_t X86_64RelativePatcherTest::kDexCacheLoadRawCode[] = { + 0x8b, 0x05, // mov eax, [rip + <offset>] + 0x00, 0x01, 0x00, 0x00 +}; + +const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kDexCacheLoadCode( + kDexCacheLoadRawCode); + +TEST_F(X86_64RelativePatcherTest, CallSelf) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); + Link(); + + static const uint8_t expected_code[] = { + 0xe8, 0xfb, 0xff, 0xff, 0xff + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(X86_64RelativePatcherTest, CallOther) { + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); + LinkerPatch method2_patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t method2_offset = GetMethodOffset(2u); + uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */); + static const uint8_t method1_expected_code[] = { + 0xe8, + static_cast<uint8_t>(diff_after), static_cast<uint8_t>(diff_after >> 8), + static_cast<uint8_t>(diff_after >> 16), static_cast<uint8_t>(diff_after >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); + uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */); + static const uint8_t method2_expected_code[] = { + 0xe8, + static_cast<uint8_t>(diff_before), static_cast<uint8_t>(diff_before >> 8), + static_cast<uint8_t>(diff_before >> 16), static_cast<uint8_t>(diff_before >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); +} + +TEST_F(X86_64RelativePatcherTest, CallTrampoline) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); + Link(); + + auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); + ASSERT_TRUE(result.first); + uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size()); + static const uint8_t expected_code[] = { + 0xe8, + static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), + static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(X86_64RelativePatcherTest, DexCacheReference) { + dex_cache_arrays_begin_ = 0x12345678; + constexpr size_t kElementOffset = 0x1234; + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(kDexCacheLoadCode.size() - 4u, nullptr, 0u, kElementOffset), + }; + AddCompiledMethod(MethodRef(1u), kDexCacheLoadCode, ArrayRef<const LinkerPatch>(patches)); + Link(); + + auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); + ASSERT_TRUE(result.first); + uint32_t diff = + dex_cache_arrays_begin_ + kElementOffset - (result.second + kDexCacheLoadCode.size()); + static const uint8_t expected_code[] = { + 0x8b, 0x05, + static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), + static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +} // namespace linker +} // namespace art diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index afd39e8874..989b04fa36 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -17,11 +17,14 @@ #include "arch/instruction_set_features.h" #include "class_linker.h" #include "common_compiler_test.h" +#include "compiled_method.h" #include "compiler.h" #include "dex/pass_manager.h" #include "dex/quick/dex_file_to_method_inliner_map.h" #include "dex/quick_compiler_callbacks.h" #include "dex/verification_results.h" +#include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "entrypoints/quick/quick_entrypoints.h" #include "mirror/art_method-inl.h" #include "mirror/class-inl.h" @@ -173,7 +176,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(72U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(28U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(91 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); + EXPECT_EQ(92 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); } TEST_F(OatTest, OatHeaderIsValid) { diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index b3bb438bac..5b4cc54858 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -18,16 +18,21 @@ #include <zlib.h> +#include "arch/arm64/instruction_set_features_arm64.h" #include "base/allocator.h" #include "base/bit_vector.h" #include "base/stl_util.h" #include "base/unix_file/fd_file.h" #include "class_linker.h" #include "compiled_class.h" +#include "compiled_method.h" #include "dex_file-inl.h" #include "dex/verification_results.h" +#include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "gc/space/space.h" #include "image_writer.h" +#include "linker/relative_patcher.h" #include "mirror/art_method-inl.h" #include "mirror/array.h" #include "mirror/class_loader.h" @@ -37,352 +42,10 @@ #include "safe_map.h" #include "scoped_thread_state_change.h" #include "handle_scope-inl.h" -#include "utils/arm/assembler_thumb2.h" -#include "utils/arm64/assembler_arm64.h" #include "verifier/method_verifier.h" namespace art { -class OatWriter::RelativeCallPatcher { - public: - virtual ~RelativeCallPatcher() { } - - // Reserve space for relative call thunks if needed, return adjusted offset. - // After all methods have been processed it's call one last time with compiled_method == nullptr. - virtual uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) = 0; - - // Write relative call thunks if needed, return adjusted offset. - virtual uint32_t WriteThunks(OutputStream* out, uint32_t offset) = 0; - - // Patch method code. The input displacement is relative to the patched location, - // the patcher may need to adjust it if the correct base is different. - virtual void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset, - uint32_t target_offset) = 0; - - protected: - RelativeCallPatcher() { } - - private: - DISALLOW_COPY_AND_ASSIGN(RelativeCallPatcher); -}; - -class OatWriter::NoRelativeCallPatcher FINAL : public RelativeCallPatcher { - public: - NoRelativeCallPatcher() { } - - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method ATTRIBUTE_UNUSED) OVERRIDE { - return offset; // No space reserved; no patches expected. - } - - uint32_t WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) OVERRIDE { - return offset; // No thunks added; no patches expected. - } - - void Patch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, uint32_t literal_offset ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED, - uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE { - LOG(FATAL) << "Unexpected relative patch."; - } - - private: - DISALLOW_COPY_AND_ASSIGN(NoRelativeCallPatcher); -}; - -class OatWriter::X86RelativeCallPatcher FINAL : public RelativeCallPatcher { - public: - X86RelativeCallPatcher() { } - - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method ATTRIBUTE_UNUSED) OVERRIDE { - return offset; // No space reserved; no limit on relative call distance. - } - - uint32_t WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) OVERRIDE { - return offset; // No thunks added; no limit on relative call distance. - } - - void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset, - uint32_t target_offset) OVERRIDE { - DCHECK_LE(literal_offset + 4u, code->size()); - // Unsigned arithmetic with its well-defined overflow behavior is just fine here. - uint32_t displacement = target_offset - patch_offset; - displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. - - typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t; - reinterpret_cast<unaligned_int32_t*>(&(*code)[literal_offset])[0] = displacement; - } - - private: - // PC displacement from patch location; x86 PC for relative calls points to the next - // instruction and the patch location is 4 bytes earlier. - static constexpr int32_t kPcDisplacement = 4; - - DISALLOW_COPY_AND_ASSIGN(X86RelativeCallPatcher); -}; - -class OatWriter::ArmBaseRelativeCallPatcher : public RelativeCallPatcher { - public: - ArmBaseRelativeCallPatcher(OatWriter* writer, - InstructionSet instruction_set, std::vector<uint8_t> thunk_code, - uint32_t max_positive_displacement, uint32_t max_negative_displacement) - : writer_(writer), instruction_set_(instruction_set), thunk_code_(thunk_code), - max_positive_displacement_(max_positive_displacement), - max_negative_displacement_(max_negative_displacement), - thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() { - } - - uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) OVERRIDE { - // NOTE: The final thunk can be reserved from InitCodeMethodVisitor::EndClass() while it - // may be written early by WriteCodeMethodVisitor::VisitMethod() for a deduplicated chunk - // of code. To avoid any alignment discrepancies for the final chunk, we always align the - // offset after reserving of writing any chunk. - if (UNLIKELY(compiled_method == nullptr)) { - uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); - bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset); - if (needs_thunk) { - thunk_locations_.push_back(aligned_offset); - offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_); - } - return offset; - } - DCHECK(compiled_method->GetQuickCode() != nullptr); - uint32_t quick_code_size = compiled_method->GetQuickCode()->size(); - uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader); - uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size); - if (!unprocessed_patches_.empty() && - next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) { - bool needs_thunk = ReserveSpaceProcessPatches(next_aligned_offset); - if (needs_thunk) { - // A single thunk will cover all pending patches. - unprocessed_patches_.clear(); - uint32_t thunk_location = compiled_method->AlignCode(offset); - thunk_locations_.push_back(thunk_location); - offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_); - } - } - for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (patch.Type() == kLinkerPatchCallRelative) { - unprocessed_patches_.emplace_back(patch.TargetMethod(), - quick_code_offset + patch.LiteralOffset()); - } - } - return offset; - } - - uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE { - if (current_thunk_to_write_ == thunk_locations_.size()) { - return offset; - } - uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); - if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) { - ++current_thunk_to_write_; - uint32_t aligned_code_delta = aligned_offset - offset; - if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) { - return 0u; - } - if (!out->WriteFully(thunk_code_.data(), thunk_code_.size())) { - return 0u; - } - writer_->size_relative_call_thunks_ += thunk_code_.size(); - uint32_t thunk_end_offset = aligned_offset + thunk_code_.size(); - // Align after writing chunk, see the ReserveSpace() above. - offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_); - aligned_code_delta = offset - thunk_end_offset; - if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) { - return 0u; - } - } - return offset; - } - - protected: - uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset) { - // Unsigned arithmetic with its well-defined overflow behavior is just fine here. - uint32_t displacement = target_offset - patch_offset; - // NOTE: With unsigned arithmetic we do mean to use && rather than || below. - if (displacement > max_positive_displacement_ && displacement < -max_negative_displacement_) { - // Unwritten thunks have higher offsets, check if it's within range. - DCHECK(current_thunk_to_write_ == thunk_locations_.size() || - thunk_locations_[current_thunk_to_write_] > patch_offset); - if (current_thunk_to_write_ != thunk_locations_.size() && - thunk_locations_[current_thunk_to_write_] - patch_offset < max_positive_displacement_) { - displacement = thunk_locations_[current_thunk_to_write_] - patch_offset; - } else { - // We must have a previous thunk then. - DCHECK_NE(current_thunk_to_write_, 0u); - DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset); - displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset; - DCHECK(displacement >= -max_negative_displacement_); - } - } - return displacement; - } - - private: - bool ReserveSpaceProcessPatches(uint32_t next_aligned_offset) { - // Process as many patches as possible, stop only on unresolved targets or calls too far back. - while (!unprocessed_patches_.empty()) { - uint32_t patch_offset = unprocessed_patches_.front().second; - auto it = writer_->method_offset_map_.find(unprocessed_patches_.front().first); - if (it == writer_->method_offset_map_.end()) { - // If still unresolved, check if we have a thunk within range. - DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset); - if (thunk_locations_.empty() || - patch_offset - thunk_locations_.back() > max_negative_displacement_) { - return next_aligned_offset - patch_offset > max_positive_displacement_; - } - } else if (it->second >= patch_offset) { - DCHECK_LE(it->second - patch_offset, max_positive_displacement_); - } else { - // When calling back, check if we have a thunk that's closer than the actual target. - uint32_t target_offset = (thunk_locations_.empty() || it->second > thunk_locations_.back()) - ? it->second - : thunk_locations_.back(); - DCHECK_GT(patch_offset, target_offset); - if (patch_offset - target_offset > max_negative_displacement_) { - return true; - } - } - unprocessed_patches_.pop_front(); - } - return false; - } - - OatWriter* const writer_; - const InstructionSet instruction_set_; - const std::vector<uint8_t> thunk_code_; - const uint32_t max_positive_displacement_; - const uint32_t max_negative_displacement_; - std::vector<uint32_t> thunk_locations_; - size_t current_thunk_to_write_; - - // ReserveSpace() tracks unprocessed patches. - typedef std::pair<MethodReference, uint32_t> UnprocessedPatch; - std::deque<UnprocessedPatch> unprocessed_patches_; - - DISALLOW_COPY_AND_ASSIGN(ArmBaseRelativeCallPatcher); -}; - -class OatWriter::Thumb2RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher { - public: - explicit Thumb2RelativeCallPatcher(OatWriter* writer) - : ArmBaseRelativeCallPatcher(writer, kThumb2, CompileThunkCode(), - kMaxPositiveDisplacement, kMaxNegativeDisplacement) { - } - - void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset, - uint32_t target_offset) OVERRIDE { - DCHECK_LE(literal_offset + 4u, code->size()); - DCHECK_EQ(literal_offset & 1u, 0u); - DCHECK_EQ(patch_offset & 1u, 0u); - DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit. - uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u); - displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. - DCHECK_EQ(displacement & 1u, 0u); - DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u); // 25-bit signed. - uint32_t signbit = (displacement >> 31) & 0x1; - uint32_t i1 = (displacement >> 23) & 0x1; - uint32_t i2 = (displacement >> 22) & 0x1; - uint32_t imm10 = (displacement >> 12) & 0x03ff; - uint32_t imm11 = (displacement >> 1) & 0x07ff; - uint32_t j1 = i1 ^ (signbit ^ 1); - uint32_t j2 = i2 ^ (signbit ^ 1); - uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11; - value |= 0xf000d000; // BL - - uint8_t* addr = &(*code)[literal_offset]; - // Check that we're just overwriting an existing BL. - DCHECK_EQ(addr[1] & 0xf8, 0xf0); - DCHECK_EQ(addr[3] & 0xd0, 0xd0); - // Write the new BL. - addr[0] = (value >> 16) & 0xff; - addr[1] = (value >> 24) & 0xff; - addr[2] = (value >> 0) & 0xff; - addr[3] = (value >> 8) & 0xff; - } - - private: - static std::vector<uint8_t> CompileThunkCode() { - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. - arm::Thumb2Assembler assembler; - assembler.LoadFromOffset( - arm::kLoadWord, arm::PC, arm::R0, - mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); - assembler.bkpt(0); - std::vector<uint8_t> thunk_code(assembler.CodeSize()); - MemoryRegion code(thunk_code.data(), thunk_code.size()); - assembler.FinalizeInstructions(code); - return thunk_code; - } - - // PC displacement from patch location; Thumb2 PC is always at instruction address + 4. - static constexpr int32_t kPcDisplacement = 4; - - // Maximum positive and negative displacement measured from the patch location. - // (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from - // the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.) - static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement; - static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement; - - DISALLOW_COPY_AND_ASSIGN(Thumb2RelativeCallPatcher); -}; - -class OatWriter::Arm64RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher { - public: - explicit Arm64RelativeCallPatcher(OatWriter* writer) - : ArmBaseRelativeCallPatcher(writer, kArm64, CompileThunkCode(), - kMaxPositiveDisplacement, kMaxNegativeDisplacement) { - } - - void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset, - uint32_t target_offset) OVERRIDE { - DCHECK_LE(literal_offset + 4u, code->size()); - DCHECK_EQ(literal_offset & 3u, 0u); - DCHECK_EQ(patch_offset & 3u, 0u); - DCHECK_EQ(target_offset & 3u, 0u); - uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u); - DCHECK_EQ(displacement & 3u, 0u); - DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u); // 28-bit signed. - uint32_t value = (displacement & 0x0fffffffu) >> 2; - value |= 0x94000000; // BL - - uint8_t* addr = &(*code)[literal_offset]; - // Check that we're just overwriting an existing BL. - DCHECK_EQ(addr[3] & 0xfc, 0x94); - // Write the new BL. - addr[0] = (value >> 0) & 0xff; - addr[1] = (value >> 8) & 0xff; - addr[2] = (value >> 16) & 0xff; - addr[3] = (value >> 24) & 0xff; - } - - private: - static std::vector<uint8_t> CompileThunkCode() { - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. - arm64::Arm64Assembler assembler; - Offset offset(mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArm64PointerSize).Int32Value()); - assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); - // Ensure we emit the literal pool. - assembler.EmitSlowPaths(); - std::vector<uint8_t> thunk_code(assembler.CodeSize()); - MemoryRegion code(thunk_code.data(), thunk_code.size()); - assembler.FinalizeInstructions(code); - return thunk_code; - } - - // Maximum positive and negative displacement measured from the patch location. - // (Signed 28 bit displacement with the last bit 0 has range [-2^27, 2^27-4] measured from - // the ARM64 PC pointing to the BL.) - static constexpr uint32_t kMaxPositiveDisplacement = (1u << 27) - 4u; - static constexpr uint32_t kMaxNegativeDisplacement = (1u << 27); - - DISALLOW_COPY_AND_ASSIGN(Arm64RelativeCallPatcher); -}; - #define DCHECK_OFFSET() \ DCHECK_EQ(static_cast<off_t>(file_offset + relative_offset), out->Seek(0, kSeekCurrent)) \ << "file_offset=" << file_offset << " relative_offset=" << relative_offset @@ -427,6 +90,7 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, size_code_(0), size_code_alignment_(0), size_relative_call_thunks_(0), + size_misc_thunks_(0), size_mapping_table_(0), size_vmap_table_(0), size_gc_map_(0), @@ -442,23 +106,10 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, method_offset_map_() { CHECK(key_value_store != nullptr); - switch (compiler_driver_->GetInstructionSet()) { - case kX86: - case kX86_64: - relative_call_patcher_.reset(new X86RelativeCallPatcher); - break; - case kArm: - // Fall through: we generate Thumb2 code for "arm". - case kThumb2: - relative_call_patcher_.reset(new Thumb2RelativeCallPatcher(this)); - break; - case kArm64: - relative_call_patcher_.reset(new Arm64RelativeCallPatcher(this)); - break; - default: - relative_call_patcher_.reset(new NoRelativeCallPatcher); - break; - } + InstructionSet instruction_set = compiler_driver_->GetInstructionSet(); + const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures(); + relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features, + &method_offset_map_); size_t offset; { @@ -706,7 +357,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { bool EndClass() { OatDexMethodVisitor::EndClass(); if (oat_class_index_ == writer_->oat_classes_.size()) { - offset_ = writer_->relative_call_patcher_->ReserveSpace(offset_, nullptr); + offset_ = writer_->relative_patcher_->ReserveSpaceEnd(offset_); } return true; } @@ -722,36 +373,36 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode(); CHECK(quick_code != nullptr); - offset_ = writer_->relative_call_patcher_->ReserveSpace(offset_, compiled_method); - offset_ = compiled_method->AlignCode(offset_); - DCHECK_ALIGNED_PARAM(offset_, - GetInstructionSetAlignment(compiled_method->GetInstructionSet())); uint32_t code_size = quick_code->size() * sizeof(uint8_t); CHECK_NE(code_size, 0U); uint32_t thumb_offset = compiled_method->CodeDelta(); - quick_code_offset = offset_ + sizeof(OatQuickMethodHeader) + thumb_offset; - - bool deduped = false; // Deduplicate code arrays. + bool deduped = false; auto lb = dedupe_map_.lower_bound(compiled_method); if (lb != dedupe_map_.end() && !dedupe_map_.key_comp()(compiled_method, lb->first)) { quick_code_offset = lb->second; deduped = true; } else { + offset_ = writer_->relative_patcher_->ReserveSpace( + offset_, compiled_method, MethodReference(dex_file_, it.GetMemberIndex())); + offset_ = compiled_method->AlignCode(offset_); + DCHECK_ALIGNED_PARAM(offset_, + GetInstructionSetAlignment(compiled_method->GetInstructionSet())); + quick_code_offset = offset_ + sizeof(OatQuickMethodHeader) + thumb_offset; dedupe_map_.PutBefore(lb, compiled_method, quick_code_offset); } MethodReference method_ref(dex_file_, it.GetMemberIndex()); - auto method_lb = writer_->method_offset_map_.lower_bound(method_ref); - if (method_lb != writer_->method_offset_map_.end() && - !writer_->method_offset_map_.key_comp()(method_ref, method_lb->first)) { + auto method_lb = writer_->method_offset_map_.map.lower_bound(method_ref); + if (method_lb != writer_->method_offset_map_.map.end() && + !writer_->method_offset_map_.map.key_comp()(method_ref, method_lb->first)) { // TODO: Should this be a hard failure? LOG(WARNING) << "Multiple definitions of " << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file) << ((method_lb->second != quick_code_offset) ? "; OFFSET MISMATCH" : ""); } else { - writer_->method_offset_map_.PutBefore(method_lb, method_ref, quick_code_offset); + writer_->method_offset_map_.map.PutBefore(method_lb, method_ref, quick_code_offset); } // Update quick method header. @@ -790,7 +441,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { if (!compiled_method->GetPatches().empty()) { uintptr_t base_loc = offset_ - code_size - writer_->oat_header_->GetExecutableOffset(); for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (patch.Type() != kLinkerPatchCallRelative) { + if (!patch.IsPcRelative()) { writer_->absolute_patch_locations_.push_back(base_loc + patch.LiteralOffset()); } } @@ -799,22 +450,18 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { if (writer_->compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) { // Record debug information for this function if we are doing that. - - std::string name = PrettyMethod(it.GetMemberIndex(), *dex_file_, true); - if (deduped) { - // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol - // so that it will show up in a debuggerd crash report. - name += " [ DEDUPED ]"; - } - const uint32_t quick_code_start = quick_code_offset - - writer_->oat_header_->GetExecutableOffset(); - const DexFile::CodeItem *code_item = it.GetMethodCodeItem(); - writer_->method_info_.push_back(DebugInfo(name, - dex_file_->GetSourceFile(dex_file_->GetClassDef(class_def_index_)), - quick_code_start, quick_code_start + code_size, - code_item == nullptr ? nullptr : dex_file_->GetDebugInfoStream(code_item), - compiled_method)); + writer_->oat_header_->GetExecutableOffset() - thumb_offset; + writer_->method_info_.push_back(DebugInfo { + dex_file_, + class_def_index_, + it.GetMemberIndex(), + it.GetMethodAccessFlags(), + it.GetMethodCodeItem(), + deduped, + quick_code_start, + quick_code_start + code_size, + compiled_method}); } if (kIsDebugBuild) { @@ -851,6 +498,37 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { } private: + struct CodeOffsetsKeyComparator { + bool operator()(const CompiledMethod* lhs, const CompiledMethod* rhs) const { + if (lhs->GetQuickCode() != rhs->GetQuickCode()) { + return lhs->GetQuickCode() < rhs->GetQuickCode(); + } + // If the code is the same, all other fields are likely to be the same as well. + if (UNLIKELY(lhs->GetMappingTable() != rhs->GetMappingTable())) { + return lhs->GetMappingTable() < rhs->GetMappingTable(); + } + if (UNLIKELY(lhs->GetVmapTable() != rhs->GetVmapTable())) { + return lhs->GetVmapTable() < rhs->GetVmapTable(); + } + if (UNLIKELY(lhs->GetGcMap() != rhs->GetGcMap())) { + return lhs->GetGcMap() < rhs->GetGcMap(); + } + const auto& lhs_patches = lhs->GetPatches(); + const auto& rhs_patches = rhs->GetPatches(); + if (UNLIKELY(lhs_patches.size() != rhs_patches.size())) { + return lhs_patches.size() < rhs_patches.size(); + } + auto rit = rhs_patches.begin(); + for (const LinkerPatch& lpatch : lhs_patches) { + if (UNLIKELY(!(lpatch == *rit))) { + return lpatch < *rit; + } + ++rit; + } + return false; + } + }; + // Deduplication is already done on a pointer basis by the compiler driver, // so we can simply compare the pointers to find out if things are duplicated. SafeMap<const CompiledMethod*, uint32_t, CodeOffsetsKeyComparator> dedupe_map_; @@ -978,7 +656,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { bool result = OatDexMethodVisitor::EndClass(); if (oat_class_index_ == writer_->oat_classes_.size()) { DCHECK(result); // OatDexMethodVisitor::EndClass() never fails. - offset_ = writer_->relative_call_patcher_->WriteThunks(out_, offset_); + offset_ = writer_->relative_patcher_->WriteThunks(out_, offset_); if (UNLIKELY(offset_ == 0u)) { PLOG(ERROR) << "Failed to write final relative call thunks"; result = false; @@ -1000,33 +678,32 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { if (quick_code != nullptr) { // Need a wrapper if we create a copy for patching. ArrayRef<const uint8_t> wrapped(*quick_code); - - offset_ = writer_->relative_call_patcher_->WriteThunks(out, offset_); - if (offset_ == 0u) { - ReportWriteFailure("relative call thunk", it); - return false; - } - uint32_t aligned_offset = compiled_method->AlignCode(offset_); - uint32_t aligned_code_delta = aligned_offset - offset_; - if (aligned_code_delta != 0) { - if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) { - ReportWriteFailure("code alignment padding", it); - return false; - } - offset_ += aligned_code_delta; - DCHECK_OFFSET_(); - } - DCHECK_ALIGNED_PARAM(offset_, - GetInstructionSetAlignment(compiled_method->GetInstructionSet())); uint32_t code_size = quick_code->size() * sizeof(uint8_t); CHECK_NE(code_size, 0U); // Deduplicate code arrays. const OatMethodOffsets& method_offsets = oat_class->method_offsets_[method_offsets_index_]; - DCHECK(method_offsets.code_offset_ < offset_ || method_offsets.code_offset_ == - offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta()) - << PrettyMethod(it.GetMemberIndex(), *dex_file_); if (method_offsets.code_offset_ >= offset_) { + offset_ = writer_->relative_patcher_->WriteThunks(out, offset_); + if (offset_ == 0u) { + ReportWriteFailure("relative call thunk", it); + return false; + } + uint32_t aligned_offset = compiled_method->AlignCode(offset_); + uint32_t aligned_code_delta = aligned_offset - offset_; + if (aligned_code_delta != 0) { + if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) { + ReportWriteFailure("code alignment padding", it); + return false; + } + offset_ += aligned_code_delta; + DCHECK_OFFSET_(); + } + DCHECK_ALIGNED_PARAM(offset_, + GetInstructionSetAlignment(compiled_method->GetInstructionSet())); + DCHECK_EQ(method_offsets.code_offset_, + offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta()) + << PrettyMethod(it.GetMemberIndex(), *dex_file_); const OatQuickMethodHeader& method_header = oat_class->method_headers_[method_offsets_index_]; writer_->oat_header_->UpdateChecksum(&method_header, sizeof(method_header)); @@ -1039,15 +716,21 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { DCHECK_OFFSET_(); if (!compiled_method->GetPatches().empty()) { - patched_code_ = std::vector<uint8_t>(quick_code->begin(), quick_code->end()); + patched_code_.assign(quick_code->begin(), quick_code->end()); wrapped = ArrayRef<const uint8_t>(patched_code_); for (const LinkerPatch& patch : compiled_method->GetPatches()) { if (patch.Type() == kLinkerPatchCallRelative) { // NOTE: Relative calls across oat files are not supported. uint32_t target_offset = GetTargetOffset(patch); uint32_t literal_offset = patch.LiteralOffset(); - writer_->relative_call_patcher_->Patch(&patched_code_, literal_offset, + writer_->relative_patcher_->PatchCall(&patched_code_, literal_offset, offset_ + literal_offset, target_offset); + } else if (patch.Type() == kLinkerPatchDexCacheArray) { + uint32_t target_offset = GetDexCacheOffset(patch); + uint32_t literal_offset = patch.LiteralOffset(); + writer_->relative_patcher_->PatchDexCacheReference(&patched_code_, patch, + offset_ + literal_offset, + target_offset); } else if (patch.Type() == kLinkerPatchCall) { uint32_t target_offset = GetTargetOffset(patch); PatchCodeAddress(&patched_code_, patch.LiteralOffset(), target_offset); @@ -1102,9 +785,9 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } uint32_t GetTargetOffset(const LinkerPatch& patch) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - auto target_it = writer_->method_offset_map_.find(patch.TargetMethod()); + auto target_it = writer_->method_offset_map_.map.find(patch.TargetMethod()); uint32_t target_offset = - (target_it != writer_->method_offset_map_.end()) ? target_it->second : 0u; + (target_it != writer_->method_offset_map_.map.end()) ? target_it->second : 0u; // If there's no compiled code, point to the correct trampoline. if (UNLIKELY(target_offset == 0)) { mirror::ArtMethod* target = GetTargetMethod(patch); @@ -1134,6 +817,18 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { return type; } + uint32_t GetDexCacheOffset(const LinkerPatch& patch) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (writer_->image_writer_ != nullptr) { + auto* element = writer_->image_writer_->GetDexCacheArrayElementImageAddress( + patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset()); + const uint8_t* oat_data = writer_->image_writer_->GetOatFileBegin() + file_offset_; + return reinterpret_cast<const uint8_t*>(element) - oat_data; + } else { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); + } + } + void PatchObjectAddress(std::vector<uint8_t>* code, uint32_t offset, mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { // NOTE: Direct method pointers across oat files don't use linker patches. However, direct @@ -1472,6 +1167,7 @@ bool OatWriter::Write(OutputStream* out) { DO_STAT(size_code_); DO_STAT(size_code_alignment_); DO_STAT(size_relative_call_thunks_); + DO_STAT(size_misc_thunks_); DO_STAT(size_mapping_table_); DO_STAT(size_vmap_table_); DO_STAT(size_gc_map_); @@ -1630,6 +1326,10 @@ size_t OatWriter::WriteCodeDexFiles(OutputStream* out, #undef VISIT + size_code_alignment_ += relative_patcher_->CodeAlignmentSize(); + size_relative_call_thunks_ += relative_patcher_->RelativeCallThunksSize(); + size_misc_thunks_ += relative_patcher_->MiscThunksSize(); + return relative_offset; } @@ -1645,6 +1345,15 @@ bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delt return true; } +std::pair<bool, uint32_t> OatWriter::MethodOffsetMap::FindMethodOffset(MethodReference ref) { + auto it = map.find(ref); + if (it == map.end()) { + return std::pair<bool, uint32_t>(false, 0u); + } else { + return std::pair<bool, uint32_t>(true, it->second); + } +} + OatWriter::OatDexFile::OatDexFile(size_t offset, const DexFile& dex_file) { offset_ = offset; const std::string& location(dex_file.GetLocation()); diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index fd2ccae4a5..51bc9b4483 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -21,7 +21,7 @@ #include <cstddef> #include <memory> -#include "driver/compiler_driver.h" +#include "linker/relative_patcher.h" // For linker::RelativePatcherTargetProvider. #include "mem_map.h" #include "method_reference.h" #include "oat.h" @@ -32,8 +32,10 @@ namespace art { class BitVector; class CompiledMethod; +class CompilerDriver; class ImageWriter; class OutputStream; +class TimingLogger; // OatHeader variable length with count of D OatDexFiles // @@ -113,25 +115,25 @@ class OatWriter { ~OatWriter(); struct DebugInfo { - DebugInfo(const std::string& method_name, const char* src_file_name, - uint32_t low_pc, uint32_t high_pc, const uint8_t* dbgstream, - CompiledMethod* compiled_method) - : method_name_(method_name), src_file_name_(src_file_name), - low_pc_(low_pc), high_pc_(high_pc), dbgstream_(dbgstream), - compiled_method_(compiled_method) { - } - std::string method_name_; // Note: this name is a pretty-printed name. - const char* src_file_name_; - uint32_t low_pc_; - uint32_t high_pc_; - const uint8_t* dbgstream_; + const DexFile* dex_file_; + size_t class_def_index_; + uint32_t dex_method_index_; + uint32_t access_flags_; + const DexFile::CodeItem *code_item_; + bool deduped_; + uint32_t low_pc_; + uint32_t high_pc_; CompiledMethod* compiled_method_; }; - const std::vector<DebugInfo>& GetCFIMethodInfo() const { + const std::vector<DebugInfo>& GetMethodDebugInfo() const { return method_info_; } + const CompilerDriver* GetCompilerDriver() { + return compiler_driver_; + } + private: // The DataAccess classes are helper classes that provide access to members related to // a given map, i.e. GC map, mapping table or vmap table. By abstracting these away @@ -312,6 +314,7 @@ class OatWriter { uint32_t size_code_; uint32_t size_code_alignment_; uint32_t size_relative_call_thunks_; + uint32_t size_misc_thunks_; uint32_t size_mapping_table_; uint32_t size_vmap_table_; uint32_t size_gc_map_; @@ -325,50 +328,19 @@ class OatWriter { uint32_t size_oat_class_method_bitmaps_; uint32_t size_oat_class_method_offsets_; - class RelativeCallPatcher; - class NoRelativeCallPatcher; - class X86RelativeCallPatcher; - class ArmBaseRelativeCallPatcher; - class Thumb2RelativeCallPatcher; - class Arm64RelativeCallPatcher; - - std::unique_ptr<RelativeCallPatcher> relative_call_patcher_; + std::unique_ptr<linker::RelativePatcher> relative_patcher_; // The locations of absolute patches relative to the start of the executable section. std::vector<uintptr_t> absolute_patch_locations_; - SafeMap<MethodReference, uint32_t, MethodReferenceComparator> method_offset_map_; - - struct CodeOffsetsKeyComparator { - bool operator()(const CompiledMethod* lhs, const CompiledMethod* rhs) const { - if (lhs->GetQuickCode() != rhs->GetQuickCode()) { - return lhs->GetQuickCode() < rhs->GetQuickCode(); - } - // If the code is the same, all other fields are likely to be the same as well. - if (UNLIKELY(lhs->GetMappingTable() != rhs->GetMappingTable())) { - return lhs->GetMappingTable() < rhs->GetMappingTable(); - } - if (UNLIKELY(lhs->GetVmapTable() != rhs->GetVmapTable())) { - return lhs->GetVmapTable() < rhs->GetVmapTable(); - } - if (UNLIKELY(lhs->GetGcMap() != rhs->GetGcMap())) { - return lhs->GetGcMap() < rhs->GetGcMap(); - } - const auto& lhs_patches = lhs->GetPatches(); - const auto& rhs_patches = rhs->GetPatches(); - if (UNLIKELY(lhs_patches.size() != rhs_patches.size())) { - return lhs_patches.size() < rhs_patches.size(); - } - auto rit = rhs_patches.begin(); - for (const LinkerPatch& lpatch : lhs_patches) { - if (UNLIKELY(!(lpatch == *rit))) { - return lpatch < *rit; - } - ++rit; - } - return false; - } + // Map method reference to assigned offset. + // Wrap the map in a class implementing linker::RelativePatcherTargetProvider. + class MethodOffsetMap FINAL : public linker::RelativePatcherTargetProvider { + public: + std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) OVERRIDE; + SafeMap<MethodReference, uint32_t, MethodReferenceComparator> map; }; + MethodOffsetMap method_offset_map_; DISALLOW_COPY_AND_ASSIGN(OatWriter); }; diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc index ab77505b6f..be432c5a20 100644 --- a/compiler/optimizing/boolean_simplifier.cc +++ b/compiler/optimizing/boolean_simplifier.cc @@ -59,7 +59,8 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) { return new (allocator) HGreaterThan(lhs, rhs); } else if (cond->IsGreaterThan()) { return new (allocator) HLessThanOrEqual(lhs, rhs); - } else if (cond->IsGreaterThanOrEqual()) { + } else { + DCHECK(cond->IsGreaterThanOrEqual()); return new (allocator) HLessThan(lhs, rhs); } } else if (cond->IsIntConstant()) { @@ -70,10 +71,11 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) { DCHECK(int_const->IsOne()); return graph->GetIntConstant(0); } + } else { + // General case when 'cond' is another instruction of type boolean. + // Negate with 'cond == 0'. + return new (allocator) HEqual(cond, graph->GetIntConstant(0)); } - - // TODO: b/19992954 - return nullptr; } void HBooleanSimplifier::Run() { @@ -105,10 +107,6 @@ void HBooleanSimplifier::Run() { HInstruction* replacement; if (NegatesCondition(true_value, false_value)) { replacement = GetOppositeCondition(if_condition); - if (replacement == nullptr) { - // Something we could not handle. - continue; - } if (replacement->GetBlock() == nullptr) { block->InsertInstructionBefore(replacement, if_instruction); } diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 1d167949f4..6511120794 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -239,7 +239,6 @@ class ValueBound : public ValueObject { *underflow = true; return Min(); } - return ValueBound(instruction_, new_constant); } private: @@ -443,9 +442,31 @@ class MonotonicValueRange : public ValueRange { class BCEVisitor : public HGraphVisitor { public: + // The least number of bounds checks that should be eliminated by triggering + // the deoptimization technique. + static constexpr size_t kThresholdForAddingDeoptimize = 2; + + // Very large constant index is considered as an anomaly. This is a threshold + // beyond which we don't bother to apply the deoptimization technique since + // it's likely some AIOOBE will be thrown. + static constexpr int32_t kMaxConstantForAddingDeoptimize = INT_MAX - 1024 * 1024; + explicit BCEVisitor(HGraph* graph) : HGraphVisitor(graph), - maps_(graph->GetBlocks().Size()) {} + maps_(graph->GetBlocks().Size()), + need_to_revisit_block_(false) {} + + void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + first_constant_index_bounds_check_map_.clear(); + HGraphVisitor::VisitBasicBlock(block); + if (need_to_revisit_block_) { + AddComparesWithDeoptimization(block); + need_to_revisit_block_ = false; + first_constant_index_bounds_check_map_.clear(); + GetValueRangeMap(block)->clear(); + HGraphVisitor::VisitBasicBlock(block); + } + } private: // Return the map of proven value ranges at the beginning of a basic block. @@ -701,9 +722,26 @@ class BCEVisitor : public HGraphVisitor { } } + if (first_constant_index_bounds_check_map_.find(array_length->GetId()) == + first_constant_index_bounds_check_map_.end()) { + // Remember the first bounds check against array_length of a constant index. + // That bounds check instruction has an associated HEnvironment where we + // may add an HDeoptimize to eliminate bounds checks of constant indices + // against array_length. + first_constant_index_bounds_check_map_.Put(array_length->GetId(), bounds_check); + } else { + // We've seen it at least twice. It's beneficial to introduce a compare with + // deoptimization fallback to eliminate the bounds checks. + need_to_revisit_block_ = true; + } + // Once we have an array access like 'array[5] = 1', we record array.length >= 6. // We currently don't do it for non-constant index since a valid array[i] can't prove // a valid array[i-1] yet due to the lower bound side. + if (constant == INT_MAX) { + // INT_MAX as an index will definitely throw AIOOBE. + return; + } ValueBound lower = ValueBound(nullptr, constant + 1); ValueBound upper = ValueBound::Max(); ValueRange* range = new (GetGraph()->GetArena()) @@ -938,8 +976,90 @@ class BCEVisitor : public HGraphVisitor { } } + void VisitDeoptimize(HDeoptimize* deoptimize) { + // Right now it's only HLessThanOrEqual. + DCHECK(deoptimize->InputAt(0)->IsLessThanOrEqual()); + HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual(); + HInstruction* instruction = less_than_or_equal->InputAt(0); + if (instruction->IsArrayLength()) { + HInstruction* constant = less_than_or_equal->InputAt(1); + DCHECK(constant->IsIntConstant()); + DCHECK(constant->AsIntConstant()->GetValue() <= kMaxConstantForAddingDeoptimize); + ValueBound lower = ValueBound(nullptr, constant->AsIntConstant()->GetValue() + 1); + ValueRange* range = new (GetGraph()->GetArena()) + ValueRange(GetGraph()->GetArena(), lower, ValueBound::Max()); + GetValueRangeMap(deoptimize->GetBlock())->Overwrite(instruction->GetId(), range); + } + } + + void AddCompareWithDeoptimization(HInstruction* array_length, + HIntConstant* const_instr, + HBasicBlock* block) { + DCHECK(array_length->IsArrayLength()); + ValueRange* range = LookupValueRange(array_length, block); + ValueBound lower_bound = range->GetLower(); + DCHECK(lower_bound.IsConstant()); + DCHECK(const_instr->GetValue() <= kMaxConstantForAddingDeoptimize); + DCHECK_EQ(lower_bound.GetConstant(), const_instr->GetValue() + 1); + + // If array_length is less than lower_const, deoptimize. + HBoundsCheck* bounds_check = first_constant_index_bounds_check_map_.Get( + array_length->GetId())->AsBoundsCheck(); + HCondition* cond = new (GetGraph()->GetArena()) HLessThanOrEqual(array_length, const_instr); + HDeoptimize* deoptimize = new (GetGraph()->GetArena()) + HDeoptimize(cond, bounds_check->GetDexPc()); + block->InsertInstructionBefore(cond, bounds_check); + block->InsertInstructionBefore(deoptimize, bounds_check); + deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment()); + } + + void AddComparesWithDeoptimization(HBasicBlock* block) { + for (ArenaSafeMap<int, HBoundsCheck*>::iterator it = + first_constant_index_bounds_check_map_.begin(); + it != first_constant_index_bounds_check_map_.end(); + ++it) { + HBoundsCheck* bounds_check = it->second; + HArrayLength* array_length = bounds_check->InputAt(1)->AsArrayLength(); + HIntConstant* lower_bound_const_instr = nullptr; + int32_t lower_bound_const = INT_MIN; + size_t counter = 0; + // Count the constant indexing for which bounds checks haven't + // been removed yet. + for (HUseIterator<HInstruction*> it2(array_length->GetUses()); + !it2.Done(); + it2.Advance()) { + HInstruction* user = it2.Current()->GetUser(); + if (user->GetBlock() == block && + user->IsBoundsCheck() && + user->AsBoundsCheck()->InputAt(0)->IsIntConstant()) { + DCHECK_EQ(array_length, user->AsBoundsCheck()->InputAt(1)); + HIntConstant* const_instr = user->AsBoundsCheck()->InputAt(0)->AsIntConstant(); + if (const_instr->GetValue() > lower_bound_const) { + lower_bound_const = const_instr->GetValue(); + lower_bound_const_instr = const_instr; + } + counter++; + } + } + if (counter >= kThresholdForAddingDeoptimize && + lower_bound_const_instr->GetValue() <= kMaxConstantForAddingDeoptimize) { + AddCompareWithDeoptimization(array_length, lower_bound_const_instr, block); + } + } + } + std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_; + // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in + // a block that checks a constant index against that HArrayLength. + SafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_; + + // For the block, there is at least one HArrayLength instruction for which there + // is more than one bounds check instruction with constant indexing. And it's + // beneficial to add a compare instruction that has deoptimization fallback and + // eliminate those bounds checks. + bool need_to_revisit_block_; + DISALLOW_COPY_AND_ASSIGN(BCEVisitor); }; diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index b3653fe903..75cf1cf063 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -284,9 +284,9 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { ASSERT_FALSE(IsRemoved(bounds_check)); } -// array[5] = 1; // Can't eliminate. -// array[4] = 1; // Can eliminate. // array[6] = 1; // Can't eliminate. +// array[5] = 1; // Can eliminate. +// array[4] = 1; // Can eliminate. TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); @@ -311,35 +311,35 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0); HArrayLength* array_length = new (&allocator) HArrayLength(null_check); - HBoundsCheck* bounds_check5 = new (&allocator) - HBoundsCheck(constant_5, array_length, 0); + HBoundsCheck* bounds_check6 = new (&allocator) + HBoundsCheck(constant_6, array_length, 0); HInstruction* array_set = new (&allocator) HArraySet( - null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0); + null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0); block->AddInstruction(null_check); block->AddInstruction(array_length); - block->AddInstruction(bounds_check5); + block->AddInstruction(bounds_check6); block->AddInstruction(array_set); null_check = new (&allocator) HNullCheck(parameter, 0); array_length = new (&allocator) HArrayLength(null_check); - HBoundsCheck* bounds_check4 = new (&allocator) - HBoundsCheck(constant_4, array_length, 0); + HBoundsCheck* bounds_check5 = new (&allocator) + HBoundsCheck(constant_5, array_length, 0); array_set = new (&allocator) HArraySet( - null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0); + null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0); block->AddInstruction(null_check); block->AddInstruction(array_length); - block->AddInstruction(bounds_check4); + block->AddInstruction(bounds_check5); block->AddInstruction(array_set); null_check = new (&allocator) HNullCheck(parameter, 0); array_length = new (&allocator) HArrayLength(null_check); - HBoundsCheck* bounds_check6 = new (&allocator) - HBoundsCheck(constant_6, array_length, 0); + HBoundsCheck* bounds_check4 = new (&allocator) + HBoundsCheck(constant_4, array_length, 0); array_set = new (&allocator) HArraySet( - null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0); + null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0); block->AddInstruction(null_check); block->AddInstruction(array_length); - block->AddInstruction(bounds_check6); + block->AddInstruction(bounds_check4); block->AddInstruction(array_set); block->AddInstruction(new (&allocator) HGoto()); @@ -353,9 +353,9 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check5)); - ASSERT_TRUE(IsRemoved(bounds_check4)); ASSERT_FALSE(IsRemoved(bounds_check6)); + ASSERT_TRUE(IsRemoved(bounds_check5)); + ASSERT_TRUE(IsRemoved(bounds_check4)); } // for (int i=initial; i<array.length; i+=increment) { array[i] = 10; } diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 2cdd5af9f3..a912d4ccc4 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -23,6 +23,7 @@ #include "dex_instruction.h" #include "dex_instruction-inl.h" #include "driver/compiler_driver-inl.h" +#include "driver/compiler_options.h" #include "mirror/art_field.h" #include "mirror/art_field-inl.h" #include "mirror/class_loader.h" @@ -230,8 +231,7 @@ void HGraphBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) { } } -bool HGraphBuilder::SkipCompilation(size_t number_of_dex_instructions, - size_t number_of_blocks ATTRIBUTE_UNUSED, +bool HGraphBuilder::SkipCompilation(const DexFile::CodeItem& code_item, size_t number_of_branches) { const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions(); CompilerOptions::CompilerFilter compiler_filter = compiler_options.GetCompilerFilter(); @@ -239,19 +239,20 @@ bool HGraphBuilder::SkipCompilation(size_t number_of_dex_instructions, return false; } - if (compiler_options.IsHugeMethod(number_of_dex_instructions)) { + if (compiler_options.IsHugeMethod(code_item.insns_size_in_code_units_)) { VLOG(compiler) << "Skip compilation of huge method " << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) - << ": " << number_of_dex_instructions << " dex instructions"; + << ": " << code_item.insns_size_in_code_units_ << " code units"; MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod); return true; } // If it's large and contains no branches, it's likely to be machine generated initialization. - if (compiler_options.IsLargeMethod(number_of_dex_instructions) && (number_of_branches == 0)) { + if (compiler_options.IsLargeMethod(code_item.insns_size_in_code_units_) + && (number_of_branches == 0)) { VLOG(compiler) << "Skip compilation of large method with no branch " << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) - << ": " << number_of_dex_instructions << " dex instructions"; + << ": " << code_item.insns_size_in_code_units_ << " code units"; MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches); return true; } @@ -278,18 +279,14 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) { // Compute the number of dex instructions, blocks, and branches. We will // check these values against limits given to the compiler. - size_t number_of_dex_instructions = 0; - size_t number_of_blocks = 0; size_t number_of_branches = 0; // To avoid splitting blocks, we compute ahead of time the instructions that // start a new block, and create these blocks. - ComputeBranchTargets( - code_ptr, code_end, &number_of_dex_instructions, &number_of_blocks, &number_of_branches); + ComputeBranchTargets(code_ptr, code_end, &number_of_branches); // Note that the compiler driver is null when unit testing. - if ((compiler_driver_ != nullptr) - && SkipCompilation(number_of_dex_instructions, number_of_blocks, number_of_branches)) { + if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) { return false; } @@ -355,8 +352,6 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) { void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_t* code_end, - size_t* number_of_dex_instructions, - size_t* number_of_blocks, size_t* number_of_branches) { branch_targets_.SetSize(code_end - code_ptr); @@ -369,7 +364,6 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, // the locations these instructions branch to. uint32_t dex_pc = 0; while (code_ptr < code_end) { - (*number_of_dex_instructions)++; const Instruction& instruction = *Instruction::At(code_ptr); if (instruction.IsBranch()) { (*number_of_branches)++; @@ -378,14 +372,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, if (FindBlockStartingAt(target) == nullptr) { block = new (arena_) HBasicBlock(graph_, target); branch_targets_.Put(target, block); - (*number_of_blocks)++; } dex_pc += instruction.SizeInCodeUnits(); code_ptr += instruction.SizeInCodeUnits(); if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) { block = new (arena_) HBasicBlock(graph_, dex_pc); branch_targets_.Put(dex_pc, block); - (*number_of_blocks)++; } } else if (instruction.IsSwitch()) { SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH); @@ -403,14 +395,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, if (FindBlockStartingAt(target) == nullptr) { block = new (arena_) HBasicBlock(graph_, target); branch_targets_.Put(target, block); - (*number_of_blocks)++; } // The next case gets its own block. if (i < num_entries) { block = new (arena_) HBasicBlock(graph_, target); branch_targets_.Put(table.GetDexPcForIndex(i), block); - (*number_of_blocks)++; } } @@ -420,7 +410,6 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) { block = new (arena_) HBasicBlock(graph_, dex_pc); branch_targets_.Put(dex_pc, block); - (*number_of_blocks)++; } } else { code_ptr += instruction.SizeInCodeUnits(); diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 6a0738a7b9..dc6d97eb0c 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -90,8 +90,6 @@ class HGraphBuilder : public ValueObject { // branches. void ComputeBranchTargets(const uint16_t* start, const uint16_t* end, - size_t* number_of_dex_instructions, - size_t* number_of_block, size_t* number_of_branches); void MaybeUpdateCurrentBlock(size_t index); HBasicBlock* FindBlockStartingAt(int32_t index) const; @@ -217,9 +215,7 @@ class HGraphBuilder : public ValueObject { HInstruction* value, int32_t case_value_int, int32_t target_offset, uint32_t dex_pc); - bool SkipCompilation(size_t number_of_dex_instructions, - size_t number_of_blocks, - size_t number_of_branches); + bool SkipCompilation(const DexFile::CodeItem& code_item, size_t number_of_branches); void MaybeRecordStat(MethodCompilationStat compilation_stat); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index bd6e943bf0..8736374306 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -82,6 +82,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) HGraphVisitor* instruction_visitor = GetInstructionVisitor(); DCHECK_EQ(current_block_index_, 0u); GenerateFrameEntry(); + DCHECK_EQ(GetAssembler()->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size_)); for (size_t e = block_order_->Size(); current_block_index_ < e; ++current_block_index_) { HBasicBlock* block = block_order_->Get(current_block_index_); // Don't generate code for an empty block. Its predecessors will branch to its successor @@ -132,7 +133,6 @@ size_t CodeGenerator::FindFreeEntry(bool* array, size_t length) { } LOG(FATAL) << "Could not find a register in baseline register allocator"; UNREACHABLE(); - return -1; } size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length) { @@ -145,7 +145,6 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l } LOG(FATAL) << "Could not find a register in baseline register allocator"; UNREACHABLE(); - return -1; } void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, @@ -378,10 +377,14 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph, case kMips: return nullptr; case kX86: { - return new x86::CodeGeneratorX86(graph, compiler_options); + return new x86::CodeGeneratorX86(graph, + *isa_features.AsX86InstructionSetFeatures(), + compiler_options); } case kX86_64: { - return new x86_64::CodeGeneratorX86_64(graph, compiler_options); + return new x86_64::CodeGeneratorX86_64(graph, + *isa_features.AsX86_64InstructionSetFeatures(), + compiler_options); } default: return nullptr; @@ -413,7 +416,16 @@ void CodeGenerator::BuildNativeGCMap( } } -void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap* src_map) const { +void CodeGenerator::BuildSourceMap(DefaultSrcMap* src_map) const { + for (size_t i = 0; i < pc_infos_.Size(); i++) { + struct PcInfo pc_info = pc_infos_.Get(i); + uint32_t pc2dex_offset = pc_info.native_pc; + int32_t pc2dex_dalvik_offset = pc_info.dex_pc; + src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset})); + } +} + +void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { uint32_t pc2dex_data_size = 0u; uint32_t pc2dex_entries = pc_infos_.Size(); uint32_t pc2dex_offset = 0u; @@ -423,19 +435,12 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap* uint32_t dex2pc_offset = 0u; int32_t dex2pc_dalvik_offset = 0; - if (src_map != nullptr) { - src_map->reserve(pc2dex_entries); - } - for (size_t i = 0; i < pc2dex_entries; i++) { struct PcInfo pc_info = pc_infos_.Get(i); pc2dex_data_size += UnsignedLeb128Size(pc_info.native_pc - pc2dex_offset); pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset); pc2dex_offset = pc_info.native_pc; pc2dex_dalvik_offset = pc_info.dex_pc; - if (src_map != nullptr) { - src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset})); - } } // Walk over the blocks and find which ones correspond to catch block entries. diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 07ca6b1ccf..b888aca264 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -205,7 +205,8 @@ class CodeGenerator { slow_paths_.Add(slow_path); } - void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const; + void BuildSourceMap(DefaultSrcMap* src_map) const; + void BuildMappingTable(std::vector<uint8_t>* vector) const; void BuildVMapTable(std::vector<uint8_t>* vector) const; void BuildNativeGCMap( std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; @@ -425,6 +426,8 @@ class CodeGenerator { StackMapStream stack_map_stream_; + friend class OptimizingCFITest; + DISALLOW_COPY_AND_ASSIGN(CodeGenerator); }; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 1f95041a92..a799a519c0 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -287,6 +287,26 @@ class TypeCheckSlowPathARM : public SlowPathCodeARM { DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM); }; +class DeoptimizationSlowPathARM : public SlowPathCodeARM { + public: + explicit DeoptimizationSlowPathARM(HInstruction* instruction) + : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); + DCHECK(instruction_->IsDeoptimize()); + HDeoptimize* deoptimize = instruction_->AsDeoptimize(); + uint32_t dex_pc = deoptimize->GetDexPc(); + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + } + + private: + HInstruction* const instruction_; + DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM); +}; + #undef __ #undef __ @@ -493,6 +513,14 @@ void CodeGeneratorARM::ComputeSpillMask() { } } +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::ArmCore(static_cast<int>(reg)); +} + +static dwarf::Reg DWARFReg(SRegister reg) { + return dwarf::Reg::ArmFp(static_cast<int>(reg)); +} + void CodeGeneratorARM::GenerateFrameEntry() { bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); @@ -511,12 +539,19 @@ void CodeGeneratorARM::GenerateFrameEntry() { // PC is in the list of callee-save to mimic Quick, but we need to push // LR at entry instead. - __ PushList((core_spill_mask_ & (~(1 << PC))) | 1 << LR); + uint32_t push_mask = (core_spill_mask_ & (~(1 << PC))) | 1 << LR; + __ PushList(push_mask); + __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(push_mask)); + __ cfi().RelOffsetForMany(DWARFReg(Register(0)), 0, push_mask, kArmWordSize); if (fpu_spill_mask_ != 0) { SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_)); __ vpushs(start_register, POPCOUNT(fpu_spill_mask_)); + __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_)); + __ cfi().RelOffsetForMany(DWARFReg(SRegister(0)), 0, fpu_spill_mask_, kArmWordSize); } - __ AddConstant(SP, -(GetFrameSize() - FrameEntrySpillSize())); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ AddConstant(SP, -adjust); + __ cfi().AdjustCFAOffset(adjust); __ StoreToOffset(kStoreWord, R0, SP, 0); } @@ -525,10 +560,14 @@ void CodeGeneratorARM::GenerateFrameExit() { __ bx(LR); return; } - __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize()); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ AddConstant(SP, adjust); + __ cfi().AdjustCFAOffset(-adjust); if (fpu_spill_mask_ != 0) { SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_)); __ vpops(start_register, POPCOUNT(fpu_spill_mask_)); + __ cfi().AdjustCFAOffset(-kArmPointerSize * POPCOUNT(fpu_spill_mask_)); + __ cfi().RestoreMany(DWARFReg(SRegister(0)), fpu_spill_mask_); } __ PopList(core_spill_mask_); } @@ -542,7 +581,6 @@ Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const { case Primitive::kPrimLong: case Primitive::kPrimDouble: return Location::DoubleStackSlot(GetStackSlot(load->GetLocal())); - break; case Primitive::kPrimInt: case Primitive::kPrimNot: @@ -555,10 +593,11 @@ Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const { case Primitive::kPrimShort: case Primitive::kPrimVoid: LOG(FATAL) << "Unexpected type " << load->GetType(); + UNREACHABLE(); } LOG(FATAL) << "Unreachable"; - return Location(); + UNREACHABLE(); } Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { @@ -663,7 +702,6 @@ Location InvokeDexCallingConventionVisitor::GetReturnLocation(Primitive::Type ty return Location(); } UNREACHABLE(); - return Location(); } void CodeGeneratorARM::Move32(Location destination, Location source) { @@ -887,24 +925,17 @@ void InstructionCodeGeneratorARM::VisitExit(HExit* exit) { UNUSED(exit); } -void LocationsBuilderARM::VisitIf(HIf* if_instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - locations->SetInAt(0, Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { - HInstruction* cond = if_instr->InputAt(0); +void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target) { + HInstruction* cond = instruction->InputAt(0); if (cond->IsIntConstant()) { // Constant condition, statically compared against 1. int32_t cond_value = cond->AsIntConstant()->GetValue(); if (cond_value == 1) { - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + if (always_true_target != nullptr) { + __ b(always_true_target); } return; } else { @@ -913,10 +944,10 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { } else { if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { // Condition has been materialized, compare the output to 0 - DCHECK(if_instr->GetLocations()->InAt(0).IsRegister()); - __ cmp(if_instr->GetLocations()->InAt(0).AsRegister<Register>(), + DCHECK(instruction->GetLocations()->InAt(0).IsRegister()); + __ cmp(instruction->GetLocations()->InAt(0).AsRegister<Register>(), ShifterOperand(0)); - __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), NE); + __ b(true_target, NE); } else { // Condition has not been materialized, use its inputs as the // comparison and its condition as the branch condition. @@ -938,16 +969,55 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { __ cmp(left, ShifterOperand(temp)); } } - __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), - ARMCondition(cond->AsCondition()->GetCondition())); + __ b(true_target, ARMCondition(cond->AsCondition()->GetCondition())); } } - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - __ b(codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); + if (false_target != nullptr) { + __ b(false_target); + } +} + +void LocationsBuilderARM::VisitIf(HIf* if_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); + HInstruction* cond = if_instr->InputAt(0); + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { + Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); + Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); + Label* always_true_target = true_target; + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfTrueSuccessor())) { + always_true_target = nullptr; + } + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfFalseSuccessor())) { + false_target = nullptr; + } + GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); +} + +void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + HInstruction* cond = deoptimize->InputAt(0); + DCHECK(cond->IsCondition()); + if (cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::RequiresRegister()); } } +void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) { + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) + DeoptimizationSlowPathARM(deoptimize); + codegen_->AddSlowPath(slow_path); + Label* slow_path_entry = slow_path->GetEntryLabel(); + GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); +} void LocationsBuilderARM::VisitCondition(HCondition* comp) { LocationSummary* locations = @@ -1139,7 +1209,10 @@ void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) { void InstructionCodeGeneratorARM::VisitReturnVoid(HReturnVoid* ret) { UNUSED(ret); + __ cfi().RememberState(); codegen_->GenerateFrameExit(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderARM::VisitReturn(HReturn* ret) { @@ -1150,7 +1223,10 @@ void LocationsBuilderARM::VisitReturn(HReturn* ret) { void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) { UNUSED(ret); + __ cfi().RememberState(); codegen_->GenerateFrameExit(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index bcdea7a639..06f425ea21 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -188,6 +188,10 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target); ArmAssembler* const assembler_; CodeGeneratorARM* const codegen_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 32ada3837e..5fe8adc86a 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -352,6 +352,26 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64); }; +class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { + public: + explicit DeoptimizationSlowPathARM64(HInstruction* instruction) + : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); + DCHECK(instruction_->IsDeoptimize()); + HDeoptimize* deoptimize = instruction_->AsDeoptimize(); + uint32_t dex_pc = deoptimize->GetDexPc(); + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + } + + private: + HInstruction* const instruction_; + DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); +}; + #undef __ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { @@ -445,18 +465,65 @@ void CodeGeneratorARM64::GenerateFrameEntry() { // ... : reserved frame space. // sp[0] : current method. __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); - __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); - __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + GetAssembler()->cfi().AdjustCFAOffset(frame_size); + SpillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + SpillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); } } void CodeGeneratorARM64::GenerateFrameExit() { if (!HasEmptyFrame()) { int frame_size = GetFrameSize(); - __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); - __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + UnspillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + UnspillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); __ Drop(frame_size); + GetAssembler()->cfi().AdjustCFAOffset(-frame_size); + } +} + +static inline dwarf::Reg DWARFReg(CPURegister reg) { + if (reg.IsFPRegister()) { + return dwarf::Reg::Arm64Fp(reg.code()); + } else { + DCHECK_LT(reg.code(), 31u); // X0 - X30. + return dwarf::Reg::Arm64Core(reg.code()); + } +} + +void CodeGeneratorARM64::SpillRegisters(vixl::CPURegList registers, int offset) { + int size = registers.RegisterSizeInBytes(); + while (registers.Count() >= 2) { + const CPURegister& dst0 = registers.PopLowestIndex(); + const CPURegister& dst1 = registers.PopLowestIndex(); + __ Stp(dst0, dst1, MemOperand(__ StackPointer(), offset)); + GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset); + GetAssembler()->cfi().RelOffset(DWARFReg(dst1), offset + size); + offset += 2 * size; } + if (!registers.IsEmpty()) { + const CPURegister& dst0 = registers.PopLowestIndex(); + __ Str(dst0, MemOperand(__ StackPointer(), offset)); + GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset); + } + DCHECK(registers.IsEmpty()); +} + +void CodeGeneratorARM64::UnspillRegisters(vixl::CPURegList registers, int offset) { + int size = registers.RegisterSizeInBytes(); + while (registers.Count() >= 2) { + const CPURegister& dst0 = registers.PopLowestIndex(); + const CPURegister& dst1 = registers.PopLowestIndex(); + __ Ldp(dst0, dst1, MemOperand(__ StackPointer(), offset)); + GetAssembler()->cfi().Restore(DWARFReg(dst0)); + GetAssembler()->cfi().Restore(DWARFReg(dst1)); + offset += 2 * size; + } + if (!registers.IsEmpty()) { + const CPURegister& dst0 = registers.PopLowestIndex(); + __ Ldr(dst0, MemOperand(__ StackPointer(), offset)); + GetAssembler()->cfi().Restore(DWARFReg(dst0)); + } + DCHECK(registers.IsEmpty()); } void CodeGeneratorARM64::Bind(HBasicBlock* block) { @@ -1611,25 +1678,18 @@ void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) { } } -void LocationsBuilderARM64::VisitIf(HIf* if_instr) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - locations->SetInAt(0, Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { - HInstruction* cond = if_instr->InputAt(0); +void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction, + vixl::Label* true_target, + vixl::Label* false_target, + vixl::Label* always_true_target) { + HInstruction* cond = instruction->InputAt(0); HCondition* condition = cond->AsCondition(); - vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); - vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); if (cond->IsIntConstant()) { int32_t cond_value = cond->AsIntConstant()->GetValue(); if (cond_value == 1) { - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) { - __ B(true_target); + if (always_true_target != nullptr) { + __ B(always_true_target); } return; } else { @@ -1637,31 +1697,87 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { } } else if (!cond->IsCondition() || condition->NeedsMaterialization()) { // The condition instruction has been materialized, compare the output to 0. - Location cond_val = if_instr->GetLocations()->InAt(0); + Location cond_val = instruction->GetLocations()->InAt(0); DCHECK(cond_val.IsRegister()); - __ Cbnz(InputRegisterAt(if_instr, 0), true_target); + __ Cbnz(InputRegisterAt(instruction, 0), true_target); } else { // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. Register lhs = InputRegisterAt(condition, 0); Operand rhs = InputOperandAt(condition, 1); Condition arm64_cond = ARM64Condition(condition->GetCondition()); - if ((arm64_cond == eq || arm64_cond == ne) && rhs.IsImmediate() && (rhs.immediate() == 0)) { - if (arm64_cond == eq) { - __ Cbz(lhs, true_target); - } else { - __ Cbnz(lhs, true_target); + if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) { + switch (arm64_cond) { + case eq: + __ Cbz(lhs, true_target); + break; + case ne: + __ Cbnz(lhs, true_target); + break; + case lt: + // Test the sign bit and branch accordingly. + __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); + break; + case ge: + // Test the sign bit and branch accordingly. + __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); + break; + default: + // Without the `static_cast` the compiler throws an error for + // `-Werror=sign-promo`. + LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond); } } else { __ Cmp(lhs, rhs); __ B(arm64_cond, true_target); } } - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) { + if (false_target != nullptr) { __ B(false_target); } } +void LocationsBuilderARM64::VisitIf(HIf* if_instr) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + HInstruction* cond = if_instr->InputAt(0); + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { + vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); + vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); + vixl::Label* always_true_target = true_target; + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfTrueSuccessor())) { + always_true_target = nullptr; + } + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfFalseSuccessor())) { + false_target = nullptr; + } + GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); +} + +void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + HInstruction* cond = deoptimize->InputAt(0); + DCHECK(cond->IsCondition()); + if (cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) + DeoptimizationSlowPathARM64(deoptimize); + codegen_->AddSlowPath(slow_path); + vixl::Label* slow_path_entry = slow_path->GetEntryLabel(); + GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); +} + void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); @@ -2349,8 +2465,11 @@ void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction) { UNUSED(instruction); + GetAssembler()->cfi().RememberState(); codegen_->GenerateFrameExit(); __ Ret(); + GetAssembler()->cfi().RestoreState(); + GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) { @@ -2359,8 +2478,11 @@ void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) { void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction) { UNUSED(instruction); + GetAssembler()->cfi().RememberState(); codegen_->GenerateFrameExit(); __ Ret(); + GetAssembler()->cfi().RestoreState(); + GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderARM64::VisitShl(HShl* shl) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 2c624d2926..9430e31037 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -23,8 +23,8 @@ #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/arm64/assembler_arm64.h" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +#include "vixl/a64/disasm-a64.h" +#include "vixl/a64/macro-assembler-a64.h" #include "arch/arm64/quick_method_frame_info_arm64.h" namespace art { @@ -165,6 +165,10 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { void HandleShift(HBinaryOperation* instr); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateTestAndBranch(HInstruction* instruction, + vixl::Label* true_target, + vixl::Label* false_target, + vixl::Label* always_true_target); Arm64Assembler* const assembler_; CodeGeneratorARM64* const codegen_; @@ -223,6 +227,8 @@ class CodeGeneratorARM64 : public CodeGenerator { void GenerateFrameEntry() OVERRIDE; void GenerateFrameExit() OVERRIDE; + void SpillRegisters(vixl::CPURegList registers, int offset); + void UnspillRegisters(vixl::CPURegList registers, int offset); vixl::CPURegList GetFramePreservedCoreRegisters() const { return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 007e25ab4a..a6fb07fa98 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -52,7 +52,7 @@ class NullCheckSlowPathX86 : public SlowPathCodeX86 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowNullPointer))); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); } private: @@ -67,7 +67,7 @@ class DivZeroCheckSlowPathX86 : public SlowPathCodeX86 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowDivZero))); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); } private: @@ -116,7 +116,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCodeX86 { length_location_, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowArrayBounds))); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); } private: @@ -137,7 +137,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 { __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend))); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); @@ -295,6 +295,27 @@ class TypeCheckSlowPathX86 : public SlowPathCodeX86 { DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86); }; +class DeoptimizationSlowPathX86 : public SlowPathCodeX86 { + public: + explicit DeoptimizationSlowPathX86(HInstruction* instruction) + : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pDeoptimize))); + // No need to restore live registers. + DCHECK(instruction_->IsDeoptimize()); + HDeoptimize* deoptimize = instruction_->AsDeoptimize(); + uint32_t dex_pc = deoptimize->GetDexPc(); + codegen->RecordPcInfo(instruction_, dex_pc, this); + } + + private: + HInstruction* const instruction_; + DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86); +}; + #undef __ #define __ reinterpret_cast<X86Assembler*>(GetAssembler())-> @@ -340,7 +361,9 @@ size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32 return GetFloatingPointSpillSlotSize(); } -CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options) +CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, + const X86InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, @@ -353,7 +376,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compile block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this) { + move_resolver_(graph->GetArena(), this), + isa_features_(isa_features) { // Use a fake return address register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -436,7 +460,12 @@ InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGene assembler_(codegen->GetAssembler()), codegen_(codegen) {} +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::X86Core(static_cast<int>(reg)); +} + void CodeGeneratorX86::GenerateFrameEntry() { + __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address __ Bind(&frame_entry_label_); bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); @@ -455,10 +484,14 @@ void CodeGeneratorX86::GenerateFrameEntry() { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { __ pushl(reg); + __ cfi().AdjustCFAOffset(kX86WordSize); + __ cfi().RelOffset(DWARFReg(reg), 0); } } - __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ subl(ESP, Immediate(adjust)); + __ cfi().AdjustCFAOffset(adjust); __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); } @@ -467,12 +500,16 @@ void CodeGeneratorX86::GenerateFrameExit() { return; } - __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ addl(ESP, Immediate(adjust)); + __ cfi().AdjustCFAOffset(-adjust); for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { __ popl(reg); + __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize)); + __ cfi().Restore(DWARFReg(reg)); } } } @@ -491,7 +528,6 @@ Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const { case Primitive::kPrimLong: case Primitive::kPrimDouble: return Location::DoubleStackSlot(GetStackSlot(load->GetLocal())); - break; case Primitive::kPrimInt: case Primitive::kPrimNot: @@ -504,10 +540,11 @@ Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const { case Primitive::kPrimShort: case Primitive::kPrimVoid: LOG(FATAL) << "Unexpected type " << load->GetType(); + UNREACHABLE(); } LOG(FATAL) << "Unreachable"; - return Location(); + UNREACHABLE(); } Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { @@ -785,24 +822,17 @@ void InstructionCodeGeneratorX86::VisitExit(HExit* exit) { UNUSED(exit); } -void LocationsBuilderX86::VisitIf(HIf* if_instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - locations->SetInAt(0, Location::Any()); - } -} - -void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { - HInstruction* cond = if_instr->InputAt(0); +void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target) { + HInstruction* cond = instruction->InputAt(0); if (cond->IsIntConstant()) { // Constant condition, statically compared against 1. int32_t cond_value = cond->AsIntConstant()->GetValue(); if (cond_value == 1) { - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + if (always_true_target != nullptr) { + __ jmp(always_true_target); } return; } else { @@ -815,20 +845,19 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { // evaluated just before the if, we don't need to evaluate it // again. bool eflags_set = cond->IsCondition() - && cond->AsCondition()->IsBeforeWhenDisregardMoves(if_instr); + && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction); if (materialized) { if (!eflags_set) { // Materialized condition, compare against 0. - Location lhs = if_instr->GetLocations()->InAt(0); + Location lhs = instruction->GetLocations()->InAt(0); if (lhs.IsRegister()) { __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); } else { __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); } - __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(kNotEqual, true_target); } else { - __ j(X86Condition(cond->AsCondition()->GetCondition()), - codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); } } else { Location lhs = cond->GetLocations()->InAt(0); @@ -847,14 +876,54 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { } else { __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex())); } - __ j(X86Condition(cond->AsCondition()->GetCondition()), - codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); } } - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); + if (false_target != nullptr) { + __ jmp(false_target); + } +} + +void LocationsBuilderX86::VisitIf(HIf* if_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); + HInstruction* cond = if_instr->InputAt(0); + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); + } +} + +void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { + Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); + Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); + Label* always_true_target = true_target; + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfTrueSuccessor())) { + always_true_target = nullptr; + } + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfFalseSuccessor())) { + false_target = nullptr; } + GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); +} + +void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + HInstruction* cond = deoptimize->InputAt(0); + DCHECK(cond->IsCondition()); + if (cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); + } +} + +void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { + SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) + DeoptimizationSlowPathX86(deoptimize); + codegen_->AddSlowPath(slow_path); + Label* slow_path_entry = slow_path->GetEntryLabel(); + GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); } void LocationsBuilderX86::VisitLocal(HLocal* local) { @@ -1047,8 +1116,11 @@ void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) { void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret) { UNUSED(ret); + __ cfi().RememberState(); codegen_->GenerateFrameExit(); __ ret(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderX86::VisitReturn(HReturn* ret) { @@ -1106,12 +1178,15 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType(); } } + __ cfi().RememberState(); codegen_->GenerateFrameExit(); __ ret(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - IntrinsicLocationsBuilderX86 intrinsic(GetGraph()->GetArena()); + IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { return; } @@ -2637,16 +2712,16 @@ void LocationsBuilderX86::HandleShift(HBinaryOperation* op) { switch (op->GetResultType()) { case Primitive::kPrimInt: { - locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. + locations->SetInAt(0, Location::Any()); + // The shift count needs to be in CL or a constant. locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. - locations->SetInAt(1, Location::RegisterLocation(ECX)); + // The shift count needs to be in CL or a constant. + locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2665,38 +2740,87 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { switch (op->GetResultType()) { case Primitive::kPrimInt: { - Register first_reg = first.AsRegister<Register>(); - if (second.IsRegister()) { - Register second_reg = second.AsRegister<Register>(); - DCHECK_EQ(ECX, second_reg); - if (op->IsShl()) { - __ shll(first_reg, second_reg); - } else if (op->IsShr()) { - __ sarl(first_reg, second_reg); + if (first.IsRegister()) { + Register first_reg = first.AsRegister<Register>(); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + __ shll(first_reg, second_reg); + } else if (op->IsShr()) { + __ sarl(first_reg, second_reg); + } else { + __ shrl(first_reg, second_reg); + } } else { - __ shrl(first_reg, second_reg); + int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue; + if (shift == 0) { + return; + } + Immediate imm(shift); + if (op->IsShl()) { + __ shll(first_reg, imm); + } else if (op->IsShr()) { + __ sarl(first_reg, imm); + } else { + __ shrl(first_reg, imm); + } } } else { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); - if (op->IsShl()) { - __ shll(first_reg, imm); - } else if (op->IsShr()) { - __ sarl(first_reg, imm); + DCHECK(first.IsStackSlot()) << first; + Address addr(ESP, first.GetStackIndex()); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + __ shll(addr, second_reg); + } else if (op->IsShr()) { + __ sarl(addr, second_reg); + } else { + __ shrl(addr, second_reg); + } } else { - __ shrl(first_reg, imm); + int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue; + if (shift == 0) { + return; + } + Immediate imm(shift); + if (op->IsShl()) { + __ shll(addr, imm); + } else if (op->IsShr()) { + __ sarl(addr, imm); + } else { + __ shrl(addr, imm); + } } } + break; } case Primitive::kPrimLong: { - Register second_reg = second.AsRegister<Register>(); - DCHECK_EQ(ECX, second_reg); - if (op->IsShl()) { - GenerateShlLong(first, second_reg); - } else if (op->IsShr()) { - GenerateShrLong(first, second_reg); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + GenerateShlLong(first, second_reg); + } else if (op->IsShr()) { + GenerateShrLong(first, second_reg); + } else { + GenerateUShrLong(first, second_reg); + } } else { - GenerateUShrLong(first, second_reg); + // Shift by a constant. + int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue; + // Nothing to do if the shift is 0, as the input is already the output. + if (shift != 0) { + if (op->IsShl()) { + GenerateShlLong(first, shift); + } else if (op->IsShr()) { + GenerateShrLong(first, shift); + } else { + GenerateUShrLong(first, shift); + } + } } break; } @@ -2705,6 +2829,26 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { } } +void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Shift by 32 is easy. High gets low, and low gets 0. + codegen_->EmitParallelMoves( + loc.ToLow(), loc.ToHigh(), + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToLow()); + } else if (shift > 32) { + // Low part becomes 0. High part is low part << (shift-32). + __ movl(high, low); + __ shll(high, Immediate(shift - 32)); + __ xorl(low, low); + } else { + // Between 1 and 31. + __ shld(high, low, Immediate(shift)); + __ shll(low, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) { Label done; __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter); @@ -2716,6 +2860,27 @@ void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Need to copy the sign. + DCHECK_NE(low, high); + __ movl(low, high); + __ sarl(high, Immediate(31)); + } else if (shift > 32) { + DCHECK_NE(low, high); + // High part becomes sign. Low part is shifted by shift - 32. + __ movl(low, high); + __ sarl(high, Immediate(31)); + __ shrl(low, Immediate(shift - 32)); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ sarl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); @@ -2727,6 +2892,26 @@ void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Shift by 32 is easy. Low gets high, and high gets 0. + codegen_->EmitParallelMoves( + loc.ToHigh(), loc.ToLow(), + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToHigh()); + } else if (shift > 32) { + // Low part is high >> (shift - 32). High part becomes 0. + __ movl(low, high); + __ shrl(low, Immediate(shift - 32)); + __ xorl(high, high); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ shrl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); @@ -3301,7 +3486,7 @@ void InstructionCodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruct Location obj = locations->InAt(0); if (obj.IsRegister()) { - __ cmpl(obj.AsRegister<Register>(), Immediate(0)); + __ testl(obj.AsRegister<Register>(), obj.AsRegister<Register>()); } else if (obj.IsStackSlot()) { __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0)); } else { @@ -3487,7 +3672,13 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // Ensure the value is in a byte register. locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2))); } else { - locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); + bool is_fp_type = (value_type == Primitive::kPrimFloat) + || (value_type == Primitive::kPrimDouble); + if (is_fp_type) { + locations->SetInAt(2, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); + } } // Temporary registers for the write barrier. if (needs_write_barrier) { @@ -3766,23 +3957,43 @@ X86Assembler* ParallelMoveResolverX86::GetAssembler() const { } void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) { - ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); - int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(temp_reg, Address(ESP, src + stack_offset)); - __ movl(Address(ESP, dst + stack_offset), temp_reg); + ScratchRegisterScope possible_scratch( + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int temp = possible_scratch.GetRegister(); + if (temp == kNoRegister) { + // Use the stack. + __ pushl(Address(ESP, src)); + __ popl(Address(ESP, dst)); + } else { + Register temp_reg = static_cast<Register>(temp); + __ movl(temp_reg, Address(ESP, src)); + __ movl(Address(ESP, dst), temp_reg); + } } void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) { - ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); - int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(temp_reg, Address(ESP, src + stack_offset)); - __ movl(Address(ESP, dst + stack_offset), temp_reg); - __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize)); - __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg); + ScratchRegisterScope possible_scratch( + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int temp = possible_scratch.GetRegister(); + if (temp == kNoRegister) { + // Use the stack instead. + // Push src low word. + __ pushl(Address(ESP, src)); + // Push src high word. Stack offset = 4. + __ pushl(Address(ESP, src + 4 /* offset */ + kX86WordSize /* high */)); + + // Pop into dst high word. Stack offset = 8. + // Pop with ESP address uses the 'after increment' value of ESP. + __ popl(Address(ESP, dst + 4 /* offset */ + kX86WordSize /* high */)); + // Finally dst low word. Stack offset = 4. + __ popl(Address(ESP, dst)); + } else { + Register temp_reg = static_cast<Register>(temp); + __ movl(temp_reg, Address(ESP, src)); + __ movl(Address(ESP, dst), temp_reg); + __ movl(temp_reg, Address(ESP, src + kX86WordSize)); + __ movl(Address(ESP, dst + kX86WordSize), temp_reg); + } } void ParallelMoveResolverX86::EmitMove(size_t index) { @@ -3847,10 +4058,18 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ xorps(dest, dest); } else { ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - Register temp = static_cast<Register>(ensure_scratch.GetRegister()); - __ movl(temp, Immediate(value)); - __ movd(dest, temp); + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int temp_reg = ensure_scratch.GetRegister(); + if (temp_reg == kNoRegister) { + // Avoid spilling/restoring a scratch register by using the stack. + __ pushl(Immediate(value)); + __ movss(dest, Address(ESP, 0)); + __ addl(ESP, Immediate(4)); + } else { + Register temp = static_cast<Register>(temp_reg); + __ movl(temp, Immediate(value)); + __ movd(dest, temp); + } } } else { DCHECK(destination.IsStackSlot()) << destination; @@ -3899,42 +4118,96 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { } } -void ParallelMoveResolverX86::Exchange(Register reg, int mem) { - Register suggested_scratch = reg == EAX ? EBX : EAX; - ScratchRegisterScope ensure_scratch( - this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters()); +void ParallelMoveResolverX86::Exchange(Register reg1, Register reg2) { + // Prefer to avoid xchg as it isn't speedy on smaller processors. + ScratchRegisterScope possible_scratch( + this, reg1, codegen_->GetNumberOfCoreRegisters()); + int temp_reg = possible_scratch.GetRegister(); + if (temp_reg == kNoRegister || temp_reg == reg2) { + __ pushl(reg1); + __ movl(reg1, reg2); + __ popl(reg2); + } else { + Register temp = static_cast<Register>(temp_reg); + __ movl(temp, reg1); + __ movl(reg1, reg2); + __ movl(reg2, temp); + } +} - int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset)); - __ movl(Address(ESP, mem + stack_offset), reg); - __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister())); +void ParallelMoveResolverX86::Exchange(Register reg, int mem) { + ScratchRegisterScope possible_scratch( + this, reg, codegen_->GetNumberOfCoreRegisters()); + int temp_reg = possible_scratch.GetRegister(); + if (temp_reg == kNoRegister) { + __ pushl(Address(ESP, mem)); + __ movl(Address(ESP, mem + kX86WordSize), reg); + __ popl(reg); + } else { + Register temp = static_cast<Register>(temp_reg); + __ movl(temp, Address(ESP, mem)); + __ movl(Address(ESP, mem), reg); + __ movl(reg, temp); + } } void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) { - ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - - Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); - int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(temp_reg, Address(ESP, mem + stack_offset)); - __ movss(Address(ESP, mem + stack_offset), reg); - __ movd(reg, temp_reg); + ScratchRegisterScope possible_scratch( + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int temp_reg = possible_scratch.GetRegister(); + if (temp_reg == kNoRegister) { + __ pushl(Address(ESP, mem)); + __ movss(Address(ESP, mem + kX86WordSize), reg); + __ movss(reg, Address(ESP, 0)); + __ addl(ESP, Immediate(kX86WordSize)); + } else { + Register temp = static_cast<Register>(temp_reg); + __ movl(temp, Address(ESP, mem)); + __ movss(Address(ESP, mem), reg); + __ movd(reg, temp); + } } void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { - ScratchRegisterScope ensure_scratch1( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - - Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX; - ScratchRegisterScope ensure_scratch2( - this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters()); - - int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0; - stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0; - __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset)); - __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset)); - __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister())); - __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister())); + ScratchRegisterScope possible_scratch1( + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int temp_reg1 = possible_scratch1.GetRegister(); + if (temp_reg1 == kNoRegister) { + // No free registers. Use the stack. + __ pushl(Address(ESP, mem1)); + __ pushl(Address(ESP, mem2 + kX86WordSize)); + // Pop with ESP address uses the 'after increment' value of ESP. + __ popl(Address(ESP, mem1 + kX86WordSize)); + __ popl(Address(ESP, mem2)); + } else { + // Got the first one. Try for a second. + ScratchRegisterScope possible_scratch2( + this, temp_reg1, codegen_->GetNumberOfCoreRegisters()); + int temp_reg2 = possible_scratch2.GetRegister(); + if (temp_reg2 == kNoRegister) { + Register temp = static_cast<Register>(temp_reg1); + // Bummer. Only have one free register to use. + // Save mem1 on the stack. + __ pushl(Address(ESP, mem1)); + + // Copy mem2 into mem1. + __ movl(temp, Address(ESP, mem2 + kX86WordSize)); + __ movl(Address(ESP, mem1 + kX86WordSize), temp); + + // Now pop mem1 into mem2. + // Pop with ESP address uses the 'after increment' value of ESP. + __ popl(Address(ESP, mem2)); + } else { + // Great. We have 2 registers to play with. + Register temp1 = static_cast<Register>(temp_reg1); + Register temp2 = static_cast<Register>(temp_reg2); + DCHECK_NE(temp1, temp2); + __ movl(temp1, Address(ESP, mem1)); + __ movl(temp2, Address(ESP, mem2)); + __ movl(Address(ESP, mem2), temp1); + __ movl(Address(ESP, mem1), temp2); + } + } } void ParallelMoveResolverX86::EmitSwap(size_t index) { @@ -3943,7 +4216,7 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { Location destination = move->GetDestination(); if (source.IsRegister() && destination.IsRegister()) { - __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>()); + Exchange(destination.AsRegister<Register>(), source.AsRegister<Register>()); } else if (source.IsRegister() && destination.IsStackSlot()) { Exchange(source.AsRegister<Register>(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsRegister()) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index a5489d2844..8c56e35329 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -106,6 +106,7 @@ class ParallelMoveResolverX86 : public ParallelMoveResolver { X86Assembler* GetAssembler() const; private: + void Exchange(Register reg1, Register Reg2); void Exchange(Register reg, int mem); void Exchange(int mem1, int mem2); void Exchange32(XmmRegister reg, int mem); @@ -171,6 +172,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); void GenerateUShrLong(const Location& loc, Register shifter); + void GenerateShlLong(const Location& loc, int shift); + void GenerateShrLong(const Location& loc, int shift); + void GenerateUShrLong(const Location& loc, int shift); void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); @@ -179,6 +183,10 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; @@ -188,7 +196,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { class CodeGeneratorX86 : public CodeGenerator { public: - CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options); + CodeGeneratorX86(HGraph* graph, + const X86InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options); virtual ~CodeGeneratorX86() {} void GenerateFrameEntry() OVERRIDE; @@ -274,6 +284,10 @@ class CodeGeneratorX86 : public CodeGenerator { Label* GetFrameEntryLabel() { return &frame_entry_label_; } + const X86InstructionSetFeatures& GetInstructionSetFeatures() const { + return isa_features_; + } + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; @@ -282,6 +296,7 @@ class CodeGeneratorX86 : public CodeGenerator { InstructionCodeGeneratorX86 instruction_visitor_; ParallelMoveResolverX86 move_resolver_; X86Assembler assembler_; + const X86InstructionSetFeatures& isa_features_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86); }; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 2bb0349932..01b24ea33f 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -315,6 +315,27 @@ class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 { DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64); }; +class DeoptimizationSlowPathX86_64 : public SlowPathCodeX86_64 { + public: + explicit DeoptimizationSlowPathX86_64(HInstruction* instruction) + : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); + __ gs()->call( + Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pDeoptimize), true)); + DCHECK(instruction_->IsDeoptimize()); + HDeoptimize* deoptimize = instruction_->AsDeoptimize(); + uint32_t dex_pc = deoptimize->GetDexPc(); + codegen->RecordPcInfo(instruction_, dex_pc, this); + } + + private: + HInstruction* const instruction_; + DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64); +}; + #undef __ #define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())-> @@ -391,7 +412,9 @@ size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uin static constexpr int kNumberOfCpuRegisterPairs = 0; // Use a fake return address register to mimic Quick. static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); -CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options) +CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, + const X86_64InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, @@ -405,7 +428,9 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& c block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this) { + move_resolver_(graph->GetArena(), this), + isa_features_(isa_features), + constant_area_start_(0) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -458,7 +483,15 @@ void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const { } } +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::X86_64Core(static_cast<int>(reg)); +} +static dwarf::Reg DWARFReg(FloatRegister reg) { + return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); +} + void CodeGeneratorX86_64::GenerateFrameEntry() { + __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address __ Bind(&frame_entry_label_); bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); @@ -478,17 +511,22 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { __ pushq(CpuRegister(reg)); + __ cfi().AdjustCFAOffset(kX86_64WordSize); + __ cfi().RelOffset(DWARFReg(reg), 0); } } - __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize())); + int adjust = GetFrameSize() - GetCoreSpillSize(); + __ subq(CpuRegister(RSP), Immediate(adjust)); + __ cfi().AdjustCFAOffset(adjust); uint32_t xmm_spill_location = GetFpuSpillStart(); size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { - __ movsd(Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)), - XmmRegister(kFpuCalleeSaves[i])); + int offset = xmm_spill_location + (xmm_spill_slot_size * i); + __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i])); + __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset); } } @@ -503,17 +541,22 @@ void CodeGeneratorX86_64::GenerateFrameExit() { size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { - __ movsd(XmmRegister(kFpuCalleeSaves[i]), - Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i))); + int offset = xmm_spill_location + (xmm_spill_slot_size * i); + __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset)); + __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i])); } } - __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize())); + int adjust = GetFrameSize() - GetCoreSpillSize(); + __ addq(CpuRegister(RSP), Immediate(adjust)); + __ cfi().AdjustCFAOffset(-adjust); for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { __ popq(CpuRegister(reg)); + __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize)); + __ cfi().Restore(DWARFReg(reg)); } } } @@ -532,7 +575,6 @@ Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const { case Primitive::kPrimLong: case Primitive::kPrimDouble: return Location::DoubleStackSlot(GetStackSlot(load->GetLocal())); - break; case Primitive::kPrimInt: case Primitive::kPrimNot: @@ -545,10 +587,11 @@ Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const { case Primitive::kPrimShort: case Primitive::kPrimVoid: LOG(FATAL) << "Unexpected type " << load->GetType(); + UNREACHABLE(); } LOG(FATAL) << "Unreachable"; - return Location(); + UNREACHABLE(); } void CodeGeneratorX86_64::Move(Location destination, Location source) { @@ -607,7 +650,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { source.AsFpuRegister<XmmRegister>()); } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); - int64_t value = constant->AsLongConstant()->GetValue(); + int64_t value; if (constant->IsDoubleConstant()) { value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); } else { @@ -735,24 +778,17 @@ void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) { UNUSED(exit); } -void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - locations->SetInAt(0, Location::Any()); - } -} - -void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { - HInstruction* cond = if_instr->InputAt(0); +void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target) { + HInstruction* cond = instruction->InputAt(0); if (cond->IsIntConstant()) { // Constant condition, statically compared against 1. int32_t cond_value = cond->AsIntConstant()->GetValue(); if (cond_value == 1) { - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + if (always_true_target != nullptr) { + __ jmp(always_true_target); } return; } else { @@ -765,21 +801,20 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { // evaluated just before the if, we don't need to evaluate it // again. bool eflags_set = cond->IsCondition() - && cond->AsCondition()->IsBeforeWhenDisregardMoves(if_instr); + && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction); if (materialized) { if (!eflags_set) { // Materialized condition, compare against 0. - Location lhs = if_instr->GetLocations()->InAt(0); + Location lhs = instruction->GetLocations()->InAt(0); if (lhs.IsRegister()) { __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); } else { __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); } - __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(kNotEqual, true_target); } else { - __ j(X86_64Condition(cond->AsCondition()->GetCondition()), - codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target); } } else { Location lhs = cond->GetLocations()->InAt(0); @@ -797,16 +832,56 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); } - __ j(X86_64Condition(cond->AsCondition()->GetCondition()), - codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target); } } - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); + if (false_target != nullptr) { + __ jmp(false_target); + } +} + +void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); + HInstruction* cond = if_instr->InputAt(0); + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); } } +void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { + Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); + Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); + Label* always_true_target = true_target; + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfTrueSuccessor())) { + always_true_target = nullptr; + } + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfFalseSuccessor())) { + false_target = nullptr; + } + GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); +} + +void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + HInstruction* cond = deoptimize->InputAt(0); + DCHECK(cond->IsCondition()); + if (cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); + } +} + +void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { + SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) + DeoptimizationSlowPathX86_64(deoptimize); + codegen_->AddSlowPath(slow_path); + Label* slow_path_entry = slow_path->GetEntryLabel(); + GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); +} + void LocationsBuilderX86_64::VisitLocal(HLocal* local) { local->SetLocations(nullptr); } @@ -1068,8 +1143,11 @@ void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret) { UNUSED(ret); + __ cfi().RememberState(); codegen_->GenerateFrameExit(); __ ret(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { @@ -1120,8 +1198,11 @@ void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType(); } } + __ cfi().RememberState(); codegen_->GenerateFrameExit(); __ ret(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { @@ -1181,7 +1262,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena()); + IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { return; } @@ -1242,7 +1323,7 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { } void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena()); + IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { return; } @@ -1896,7 +1977,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { case Primitive::kPrimDouble: case Primitive::kPrimFloat: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -1960,12 +2041,30 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { } case Primitive::kPrimFloat: { - __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ addss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ addss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { - __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ addsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ addsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -1993,7 +2092,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2031,12 +2130,30 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { } case Primitive::kPrimFloat: { - __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ subss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ subss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { - __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ subsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ subsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -2069,7 +2186,7 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2114,13 +2231,31 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { case Primitive::kPrimFloat: { DCHECK(first.Equals(locations->Out())); - __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ mulss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ mulss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { DCHECK(first.Equals(locations->Out())); - __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ mulsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ mulsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -2493,7 +2628,7 @@ void LocationsBuilderX86_64::VisitDiv(HDiv* div) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2518,12 +2653,30 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { } case Primitive::kPrimFloat: { - __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ divss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ divss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { - __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ divsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ divsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -3668,15 +3821,27 @@ void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) { void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) { ScratchRegisterScope ensure_scratch( - this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); + this, TMP, codegen_->GetNumberOfCoreRegisters()); - int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; - __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); - __ movq(CpuRegister(ensure_scratch.GetRegister()), - Address(CpuRegister(RSP), mem2 + stack_offset)); - __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); - __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), - CpuRegister(ensure_scratch.GetRegister())); + int temp_reg = ensure_scratch.GetRegister(); + if (temp_reg == kNoRegister) { + // Use the stack as a temporary. + // Save mem1 on the stack. + __ pushq(Address(CpuRegister(RSP), mem1)); + + // Copy mem2 into mem1. + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem2 + kX86_64WordSize)); + __ movq(Address(CpuRegister(RSP), mem1 + kX86_64WordSize), CpuRegister(TMP)); + + // Now pop mem1 into mem2. + __ popq(Address(CpuRegister(RSP), mem2)); + } else { + CpuRegister temp = CpuRegister(temp_reg); + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1)); + __ movq(temp, Address(CpuRegister(RSP), mem2)); + __ movq(Address(CpuRegister(RSP), mem2), CpuRegister(TMP)); + __ movq(Address(CpuRegister(RSP), mem1), temp); + } } void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { @@ -3685,6 +3850,13 @@ void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { __ movd(reg, CpuRegister(TMP)); } +void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) { + // Prefer to avoid xchg as it isn't speedy on smaller processors. + __ movq(CpuRegister(TMP), reg1); + __ movq(reg1, reg2); + __ movq(reg2, CpuRegister(TMP)); +} + void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); __ movsd(Address(CpuRegister(RSP), mem), reg); @@ -3697,7 +3869,7 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) { Location destination = move->GetDestination(); if (source.IsRegister() && destination.IsRegister()) { - __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); + Exchange64(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); } else if (source.IsRegister() && destination.IsStackSlot()) { Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsRegister()) { @@ -4062,5 +4234,66 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) { LOG(FATAL) << "Unreachable"; } +void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { + // Generate the constant area if needed. + X86_64Assembler* assembler = GetAssembler(); + if (!assembler->IsConstantAreaEmpty()) { + // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 + // byte values. If used for vectors at a later time, this will need to be + // updated to 16 bytes with the appropriate offset. + assembler->Align(4, 0); + constant_area_start_ = assembler->CodeSize(); + assembler->AddConstantArea(); + } + + // And finish up. + CodeGenerator::Finalize(allocator); +} + +/** + * Class to handle late fixup of offsets into constant area. + */ +class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> { + public: + RIPFixup(const CodeGeneratorX86_64& codegen, int offset) + : codegen_(codegen), offset_into_constant_area_(offset) {} + + private: + void Process(const MemoryRegion& region, int pos) OVERRIDE { + // Patch the correct offset for the instruction. We use the address of the + // 'next' instruction, which is 'pos' (patch the 4 bytes before). + int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_; + int relative_position = constant_offset - pos; + + // Patch in the right value. + region.StoreUnaligned<int32_t>(pos - 4, relative_position); + } + + const CodeGeneratorX86_64& codegen_; + + // Location in constant area that the fixup refers to. + int offset_into_constant_area_; +}; + +Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v)); + return Address::RIP(fixup); +} + +Address CodeGeneratorX86_64::LiteralFloatAddress(float v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v)); + return Address::RIP(fixup); +} + +Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v)); + return Address::RIP(fixup); +} + +Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v)); + return Address::RIP(fixup); +} + } // namespace x86_64 } // namespace art diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index f6fbc2e6bc..61bf6ac71d 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -118,6 +118,7 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolver { void Exchange32(CpuRegister reg, int mem); void Exchange32(XmmRegister reg, int mem); void Exchange32(int mem1, int mem2); + void Exchange64(CpuRegister reg1, CpuRegister reg2); void Exchange64(CpuRegister reg, int mem); void Exchange64(XmmRegister reg, int mem); void Exchange64(int mem1, int mem2); @@ -185,6 +186,10 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void GenerateExplicitNullCheck(HNullCheck* instruction); void PushOntoFPStack(Location source, uint32_t temp_offset, uint32_t stack_adjustment, bool is_float); + void GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target); X86_64Assembler* const assembler_; CodeGeneratorX86_64* const codegen_; @@ -194,7 +199,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { class CodeGeneratorX86_64 : public CodeGenerator { public: - CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options); + CodeGeneratorX86_64(HGraph* graph, + const X86_64InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options); virtual ~CodeGeneratorX86_64() {} void GenerateFrameEntry() OVERRIDE; @@ -240,6 +247,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + void Finalize(CodeAllocator* allocator) OVERRIDE; InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kX86_64; @@ -267,6 +275,19 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp); + const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const { + return isa_features_; + } + + int ConstantAreaStart() const { + return constant_area_start_; + } + + Address LiteralDoubleAddress(double v); + Address LiteralFloatAddress(float v); + Address LiteralInt32Address(int32_t v); + Address LiteralInt64Address(int64_t v); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; @@ -275,6 +296,11 @@ class CodeGeneratorX86_64 : public CodeGenerator { InstructionCodeGeneratorX86_64 instruction_visitor_; ParallelMoveResolverX86_64 move_resolver_; X86_64Assembler assembler_; + const X86_64InstructionSetFeatures& isa_features_; + + // Offset to the start of the constant area in the assembled code. + // Used for fixups to the constant area. + int constant_area_start_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); }; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 6053ad51f4..2be117bf38 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -19,6 +19,8 @@ #include "arch/instruction_set.h" #include "arch/arm/instruction_set_features_arm.h" #include "arch/arm64/instruction_set_features_arm64.h" +#include "arch/x86/instruction_set_features_x86.h" +#include "arch/x86_64/instruction_set_features_x86_64.h" #include "base/macros.h" #include "builder.h" #include "code_generator_arm.h" @@ -108,7 +110,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { InternalCodeAllocator allocator; CompilerOptions compiler_options; - x86::CodeGeneratorX86 codegenX86(graph, compiler_options); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); // We avoid doing a stack overflow check that requires the runtime being setup, // by making sure the compiler knows the methods we are running are leaf methods. codegenX86.CompileBaseline(&allocator, true); @@ -124,7 +128,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { Run(allocator, codegenARM, has_result, expected); } - x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options); + std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( + X86_64InstructionSetFeatures::FromCppDefines()); + x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options); codegenX86_64.CompileBaseline(&allocator, true); if (kRuntimeISA == kX86_64) { Run(allocator, codegenX86_64, has_result, expected); @@ -175,10 +181,14 @@ static void RunCodeOptimized(HGraph* graph, compiler_options); RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kX86) { - x86::CodeGeneratorX86 codegenX86(graph, compiler_options); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kX86_64) { - x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options); + std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( + X86_64InstructionSetFeatures::FromCppDefines()); + x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options); RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected); } } diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index fd8c0c6242..966165bf4c 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -20,8 +20,8 @@ #include "locations.h" #include "nodes.h" #include "utils/arm64/assembler_arm64.h" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +#include "vixl/a64/disasm-a64.h" +#include "vixl/a64/macro-assembler-a64.h" namespace art { namespace arm64 { diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index 6853d54c48..02ad675dc3 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -16,6 +16,7 @@ #include <functional> +#include "arch/x86/instruction_set_features_x86.h" #include "code_generator_x86.h" #include "constant_folding.h" #include "dead_code_elimination.h" @@ -46,7 +47,9 @@ static void TestCode(const uint16_t* data, std::string actual_before = printer_before.str(); ASSERT_EQ(expected_before, actual_before); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions()); HConstantFolding(graph).Run(); SSAChecker ssa_checker_cf(&allocator, graph); ssa_checker_cf.Run(); diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index a644719622..98ae1ec5d3 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "arch/x86/instruction_set_features_x86.h" #include "code_generator_x86.h" #include "dead_code_elimination.h" #include "driver/compiler_options.h" @@ -40,7 +41,9 @@ static void TestCode(const uint16_t* data, std::string actual_before = printer_before.str(); ASSERT_EQ(actual_before, expected_before); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions()); HDeadCodeElimination(graph).Run(); SSAChecker ssa_checker(&allocator, graph); ssa_checker.Run(); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 49c0d3884f..4c283788b5 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -337,13 +337,11 @@ class HGraphVisualizerPrinter : public HGraphVisitor { HGraphVisualizer::HGraphVisualizer(std::ostream* output, HGraph* graph, - const CodeGenerator& codegen, - const char* method_name) - : output_(output), graph_(graph), codegen_(codegen) { - if (output == nullptr) { - return; - } + const CodeGenerator& codegen) + : output_(output), graph_(graph), codegen_(codegen) {} +void HGraphVisualizer::PrintHeader(const char* method_name) const { + DCHECK(output_ != nullptr); HGraphVisualizerPrinter printer(graph_, *output_, "", true, codegen_); printer.StartTag("compilation"); printer.PrintProperty("name", method_name); diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h index bc553aed74..513bceb369 100644 --- a/compiler/optimizing/graph_visualizer.h +++ b/compiler/optimizing/graph_visualizer.h @@ -35,9 +35,9 @@ class HGraphVisualizer : public ValueObject { public: HGraphVisualizer(std::ostream* output, HGraph* graph, - const CodeGenerator& codegen, - const char* method_name); + const CodeGenerator& codegen); + void PrintHeader(const char* method_name) const; void DumpGraph(const char* pass_name, bool is_after_pass = true) const; private: diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 628a844cc7..20aa45f197 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -90,7 +90,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) { LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; UNREACHABLE(); } - break; case kIntrinsicReverseBytes: switch (GetType(method.d.data, true)) { case Primitive::kPrimShort: @@ -103,7 +102,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) { LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; UNREACHABLE(); } - break; // Abs. case kIntrinsicAbsDouble: @@ -166,7 +164,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) { LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; UNREACHABLE(); } - break; // Memory.poke. case kIntrinsicPoke: @@ -183,7 +180,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) { LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; UNREACHABLE(); } - break; // String. case kIntrinsicCharAt: @@ -211,7 +207,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) { LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; UNREACHABLE(); } - break; case kIntrinsicUnsafeGet: { const bool is_volatile = (method.d.data & kIntrinsicFlagIsVolatile); switch (GetType(method.d.data, false)) { @@ -225,7 +220,6 @@ static Intrinsics GetIntrinsic(InlineMethod method) { LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; UNREACHABLE(); } - break; } case kIntrinsicUnsafePut: { enum Sync { kNoSync, kVolatile, kOrdered }; diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 33176f009c..94e27e912e 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -776,10 +776,10 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ mov(out, ShifterOperand(0), CC); } -void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke ATTRIBUTE_UNUSED) { +void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) { CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } -void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke ATTRIBUTE_UNUSED) { +void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 72d303c870..d1176c460f 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -28,8 +28,8 @@ #include "utils/arm64/assembler_arm64.h" #include "utils/arm64/constants_arm64.h" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +#include "vixl/a64/disasm-a64.h" +#include "vixl/a64/macro-assembler-a64.h" using namespace vixl; // NOLINT(build/namespaces) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 384737f55a..aec2d19b1d 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -16,6 +16,7 @@ #include "intrinsics_x86.h" +#include "arch/x86/instruction_set_features_x86.h" #include "code_generator_x86.h" #include "entrypoints/quick/quick_entrypoints.h" #include "intrinsics.h" @@ -34,6 +35,11 @@ static constexpr int kDoubleNaNHigh = 0x7FF80000; static constexpr int kDoubleNaNLow = 0x00000000; static constexpr int kFloatNaN = 0x7FC00000; +IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen) + : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) { +} + + X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() { return reinterpret_cast<X86Assembler*>(codegen_->GetAssembler()); } @@ -152,6 +158,7 @@ class IntrinsicSlowPathX86 : public SlowPathCodeX86 { if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX); + RecordPcInfo(codegen, invoke_, invoke_->GetDexPc()); } else { UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; UNREACHABLE(); @@ -313,6 +320,27 @@ void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); } +void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) { + CreateLongToLongLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Location input = locations->InAt(0); + Register input_lo = input.AsRegisterPairLow<Register>(); + Register input_hi = input.AsRegisterPairHigh<Register>(); + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + + X86Assembler* assembler = GetAssembler(); + // Assign the inputs to the outputs, mixing low/high. + __ movl(output_lo, input_hi); + __ movl(output_hi, input_lo); + __ bswapl(output_lo); + __ bswapl(output_hi); +} + void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) { CreateIntToIntLocations(arena_, invoke); } @@ -719,6 +747,149 @@ void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) { GetAssembler()->sqrtsd(out, in); } +static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) { + MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + + DCHECK(invoke->IsInvokeStaticOrDirect()); + codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX); + codegen->RecordPcInfo(invoke, invoke->GetDexPc()); + + // Copy the result back to the expected output. + Location out = invoke->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); + MoveFromReturnRegister(out, invoke->GetType(), codegen); + } +} + +static void CreateSSE41FPToFPLocations(ArenaAllocator* arena, + HInvoke* invoke, + CodeGeneratorX86* codegen) { + // Do we have instruction support? + if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { + CreateFPToFPLocations(arena, invoke); + return; + } + + // We have to fall back to a call to the intrinsic. + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::FpuRegisterLocation(XMM0)); + // Needs to be EAX for the invoke. + locations->AddTemp(Location::RegisterLocation(EAX)); +} + +static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen, + HInvoke* invoke, + X86Assembler* assembler, + int round_mode) { + LocationSummary* locations = invoke->GetLocations(); + if (locations->WillCall()) { + InvokeOutOfLineIntrinsic(codegen, invoke); + } else { + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + __ roundsd(out, in, Immediate(round_mode)); + } +} + +void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2); +} + +void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1); +} + +void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); +} + +// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble, +// as it needs 64 bit instructions. +void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { + // Do we have instruction support? + if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + return; + } + + // We have to fall back to a call to the intrinsic. + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::RegisterLocation(EAX)); + // Needs to be EAX for the invoke. + locations->AddTemp(Location::RegisterLocation(EAX)); +} + +void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + if (locations->WillCall()) { + InvokeOutOfLineIntrinsic(codegen_, invoke); + return; + } + + // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int. + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + Register out = locations->Out().AsRegister<Register>(); + XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + Label done, nan; + X86Assembler* assembler = GetAssembler(); + + // Generate 0.5 into inPlusPointFive. + __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f))); + __ movd(inPlusPointFive, out); + + // Add in the input. + __ addss(inPlusPointFive, in); + + // And truncate to an integer. + __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); + + __ movl(out, Immediate(kPrimIntMax)); + // maxInt = int-to-float(out) + __ cvtsi2ss(maxInt, out); + + // if inPlusPointFive >= maxInt goto done + __ comiss(inPlusPointFive, maxInt); + __ j(kAboveEqual, &done); + + // if input == NaN goto nan + __ j(kUnordered, &nan); + + // output = float-to-int-truncate(input) + __ cvttss2si(out, inPlusPointFive); + __ jmp(&done); + __ Bind(&nan); + + // output = 0 + __ xorl(out, out); + __ Bind(&done); +} + void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) { // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, @@ -1180,6 +1351,181 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_); } +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + // Offset is a long, but in 32 bit mode, we only need the low word. + // Can we update the invoke here to remove a TypeConvert to Long? + locations->SetInAt(2, Location::RequiresRegister()); + // Expected value must be in EAX or EDX:EAX. + // For long, new value must be in ECX:EBX. + if (type == Primitive::kPrimLong) { + locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX)); + locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX)); + } else { + locations->SetInAt(3, Location::RegisterLocation(EAX)); + locations->SetInAt(4, Location::RequiresRegister()); + } + + // Force a byte register for the output. + locations->SetOut(Location::RegisterLocation(EAX)); + if (type == Primitive::kPrimNot) { + // Need temp registers for card-marking. + locations->AddTemp(Location::RequiresRegister()); + // Need a byte register for marking. + locations->AddTemp(Location::RegisterLocation(ECX)); + } +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke); +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke); +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); +} + +static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) { + X86Assembler* assembler = + reinterpret_cast<X86Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + Register base = locations->InAt(1).AsRegister<Register>(); + Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); + Location out = locations->Out(); + DCHECK_EQ(out.AsRegister<Register>(), EAX); + + if (type == Primitive::kPrimLong) { + DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX); + DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX); + DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX); + DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX); + __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0)); + } else { + // Integer or object. + DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX); + Register value = locations->InAt(4).AsRegister<Register>(); + if (type == Primitive::kPrimNot) { + // Mark card for object assuming new value is stored. + codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(), + locations->GetTemp(1).AsRegister<Register>(), + base, + value); + } + + __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); + } + + // locked cmpxchg has full barrier semantics, and we don't need scheduling + // barriers at this time. + + // Convert ZF into the boolean result. + __ setb(kZero, out.AsRegister<Register>()); + __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) { + GenCAS(Primitive::kPrimInt, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { + GenCAS(Primitive::kPrimLong, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { + GenCAS(Primitive::kPrimNot, invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask, + X86Assembler* assembler) { + Immediate imm_shift(shift); + Immediate imm_mask(mask); + __ movl(temp, reg); + __ shrl(reg, imm_shift); + __ andl(temp, imm_mask); + __ andl(reg, imm_mask); + __ shll(temp, imm_shift); + __ orl(reg, temp); +} + +void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) { + X86Assembler* assembler = + reinterpret_cast<X86Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + Register reg = locations->InAt(0).AsRegister<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + + /* + * Use one bswap instruction to reverse byte order first and then use 3 rounds of + * swapping bits to reverse bits in a number x. Using bswap to save instructions + * compared to generic luni implementation which has 5 rounds of swapping bits. + * x = bswap x + * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; + * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; + * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; + */ + __ bswapl(reg); + SwapBits(reg, temp, 1, 0x55555555, assembler); + SwapBits(reg, temp, 2, 0x33333333, assembler); + SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); +} + +void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) { + X86Assembler* assembler = + reinterpret_cast<X86Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>(); + Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + + // We want to swap high/low, then bswap each one, and then do the same + // as a 32 bit reverse. + // Exchange high and low. + __ movl(temp, reg_low); + __ movl(reg_low, reg_high); + __ movl(reg_high, temp); + + // bit-reverse low + __ bswapl(reg_low); + SwapBits(reg_low, temp, 1, 0x55555555, assembler); + SwapBits(reg_low, temp, 2, 0x33333333, assembler); + SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler); + + // bit-reverse high + __ bswapl(reg_high); + SwapBits(reg_high, temp, 1, 0x55555555, assembler); + SwapBits(reg_high, temp, 2, 0x33333333, assembler); + SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -1188,20 +1534,10 @@ void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } -UNIMPLEMENTED_INTRINSIC(IntegerReverse) -UNIMPLEMENTED_INTRINSIC(LongReverse) -UNIMPLEMENTED_INTRINSIC(LongReverseBytes) -UNIMPLEMENTED_INTRINSIC(MathFloor) -UNIMPLEMENTED_INTRINSIC(MathCeil) -UNIMPLEMENTED_INTRINSIC(MathRint) UNIMPLEMENTED_INTRINSIC(MathRoundDouble) -UNIMPLEMENTED_INTRINSIC(MathRoundFloat) UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) -UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) -UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) } // namespace x86 diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h index e1e8260a5f..4292ec7b99 100644 --- a/compiler/optimizing/intrinsics_x86.h +++ b/compiler/optimizing/intrinsics_x86.h @@ -32,7 +32,7 @@ class X86Assembler; class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor { public: - explicit IntrinsicLocationsBuilderX86(ArenaAllocator* arena) : arena_(arena) {} + explicit IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen); // Define visitor methods. @@ -50,6 +50,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) private: ArenaAllocator* arena_; + CodeGeneratorX86* codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86); }; diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 736cea88cb..cbf94f0f81 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -16,6 +16,7 @@ #include "intrinsics_x86_64.h" +#include "arch/x86_64/instruction_set_features_x86_64.h" #include "code_generator_x86_64.h" #include "entrypoints/quick/quick_entrypoints.h" #include "intrinsics.h" @@ -30,6 +31,11 @@ namespace art { namespace x86_64 { +IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen) + : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) { +} + + X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() { return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); } @@ -292,25 +298,27 @@ static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) // TODO: Allow x86 to work with memory. This requires assembler support, see below. // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly. locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); // Immediate constant. - locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above. + locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. } -static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { +static void MathAbsFP(LocationSummary* locations, + bool is64bit, + X86_64Assembler* assembler, + CodeGeneratorX86_64* codegen) { Location output = locations->Out(); - CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); if (output.IsFpuRegister()) { // In-register - XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // TODO: Can mask directly with constant area using pand if we can guarantee + // that the literal is aligned on a 16 byte boundary. This will avoid a + // temporary. if (is64bit) { - __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF))); - __ movd(xmm_temp, cpu_temp); + __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); } else { - __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF))); - __ movd(xmm_temp, cpu_temp); + __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); } } else { @@ -335,7 +343,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), true, GetAssembler()); + MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { @@ -343,7 +351,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), false, GetAssembler()); + MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_); } static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { @@ -393,8 +401,11 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); } -static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, - X86_64Assembler* assembler) { +static void GenMinMaxFP(LocationSummary* locations, + bool is_min, + bool is_double, + X86_64Assembler* assembler, + CodeGeneratorX86_64* codegen) { Location op1_loc = locations->InAt(0); Location op2_loc = locations->InAt(1); Location out_loc = locations->Out(); @@ -421,7 +432,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, // // This removes one jmp, but needs to copy one input (op1) to out. // - // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? + // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); @@ -455,14 +466,11 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, // NaN handling. __ Bind(&nan); - CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access. if (is_double) { - __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000))); + __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000))); } else { - __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000))); + __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000))); } - __ movd(out, cpu_temp, is_double); __ jmp(&done); // out := op2; @@ -477,7 +485,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, __ Bind(&done); } -static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -486,39 +494,38 @@ static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invo // The following is sub-optimal, but all we can do for now. It would be fine to also accept // the second input to be the output (we can simply swap inputs). locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); // Immediate constant. } void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_); } static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, @@ -614,6 +621,203 @@ void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { GetAssembler()->sqrtsd(out, in); } +static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) { + MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + + DCHECK(invoke->IsInvokeStaticOrDirect()); + codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI)); + codegen->RecordPcInfo(invoke, invoke->GetDexPc()); + + // Copy the result back to the expected output. + Location out = invoke->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); + MoveFromReturnRegister(out, invoke->GetType(), codegen); + } +} + +static void CreateSSE41FPToFPLocations(ArenaAllocator* arena, + HInvoke* invoke, + CodeGeneratorX86_64* codegen) { + // Do we have instruction support? + if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { + CreateFPToFPLocations(arena, invoke); + return; + } + + // We have to fall back to a call to the intrinsic. + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::FpuRegisterLocation(XMM0)); + // Needs to be RDI for the invoke. + locations->AddTemp(Location::RegisterLocation(RDI)); +} + +static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen, + HInvoke* invoke, + X86_64Assembler* assembler, + int round_mode) { + LocationSummary* locations = invoke->GetLocations(); + if (locations->WillCall()) { + InvokeOutOfLineIntrinsic(codegen, invoke); + } else { + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + __ roundsd(out, in, Immediate(round_mode)); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); +} + +static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + CodeGeneratorX86_64* codegen) { + // Do we have instruction support? + if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + return; + } + + // We have to fall back to a call to the intrinsic. + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::RegisterLocation(RAX)); + // Needs to be RDI for the invoke. + locations->AddTemp(Location::RegisterLocation(RDI)); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) { + CreateSSE41FPToIntLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + if (locations->WillCall()) { + InvokeOutOfLineIntrinsic(codegen_, invoke); + return; + } + + // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int. + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + Label done, nan; + X86_64Assembler* assembler = GetAssembler(); + + // Generate 0.5 into inPlusPointFive. + __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f))); + __ movd(inPlusPointFive, out, false); + + // Add in the input. + __ addss(inPlusPointFive, in); + + // And truncate to an integer. + __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); + + __ movl(out, Immediate(kPrimIntMax)); + // maxInt = int-to-float(out) + __ cvtsi2ss(maxInt, out); + + // if inPlusPointFive >= maxInt goto done + __ comiss(inPlusPointFive, maxInt); + __ j(kAboveEqual, &done); + + // if input == NaN goto nan + __ j(kUnordered, &nan); + + // output = float-to-int-truncate(input) + __ cvttss2si(out, inPlusPointFive); + __ jmp(&done); + __ Bind(&nan); + + // output = 0 + __ xorl(out, out); + __ Bind(&done); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) { + CreateSSE41FPToIntLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + if (locations->WillCall()) { + InvokeOutOfLineIntrinsic(codegen_, invoke); + return; + } + + // Implement RoundDouble as t1 = floor(input + 0.5); convert to long. + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + XmmRegister maxLong = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + Label done, nan; + X86_64Assembler* assembler = GetAssembler(); + + // Generate 0.5 into inPlusPointFive. + __ movq(out, Immediate(bit_cast<int64_t, double>(0.5))); + __ movd(inPlusPointFive, out, true); + + // Add in the input. + __ addsd(inPlusPointFive, in); + + // And truncate to an integer. + __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1)); + + __ movq(out, Immediate(kPrimLongMax)); + // maxLong = long-to-double(out) + __ cvtsi2sd(maxLong, out, true); + + // if inPlusPointFive >= maxLong goto done + __ comisd(inPlusPointFive, maxLong); + __ j(kAboveEqual, &done); + + // if input == NaN goto nan + __ j(kUnordered, &nan); + + // output = double-to-long-truncate(input) + __ cvttsd2si(out, inPlusPointFive, true); + __ jmp(&done); + __ Bind(&nan); + + // output = 0 + __ xorq(out, out); + __ Bind(&done); +} + void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) { // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, @@ -999,6 +1203,175 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_); } +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + // expected value must be in EAX/RAX. + locations->SetInAt(3, Location::RegisterLocation(RAX)); + locations->SetInAt(4, Location::RequiresRegister()); + + locations->SetOut(Location::RequiresRegister()); + if (type == Primitive::kPrimNot) { + // Need temp registers for card-marking. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); +} + +static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) { + X86_64Assembler* assembler = + reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); + CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>(); + DCHECK_EQ(expected.AsRegister(), RAX); + CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + if (type == Primitive::kPrimLong) { + __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value); + } else { + // Integer or object. + if (type == Primitive::kPrimNot) { + // Mark card for object assuming new value is stored. + codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), + locations->GetTemp(1).AsRegister<CpuRegister>(), + base, + value); + } + + __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); + } + + // locked cmpxchg has full barrier semantics, and we don't need scheduling + // barriers at this time. + + // Convert ZF into the boolean result. + __ setcc(kZero, out); + __ movzxb(out, out); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) { + GenCAS(Primitive::kPrimInt, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) { + GenCAS(Primitive::kPrimLong, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { + GenCAS(Primitive::kPrimNot, invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask, + X86_64Assembler* assembler) { + Immediate imm_shift(shift); + Immediate imm_mask(mask); + __ movl(temp, reg); + __ shrl(reg, imm_shift); + __ andl(temp, imm_mask); + __ andl(reg, imm_mask); + __ shll(temp, imm_shift); + __ orl(reg, temp); +} + +void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) { + X86_64Assembler* assembler = + reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + + /* + * Use one bswap instruction to reverse byte order first and then use 3 rounds of + * swapping bits to reverse bits in a number x. Using bswap to save instructions + * compared to generic luni implementation which has 5 rounds of swapping bits. + * x = bswap x + * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; + * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; + * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; + */ + __ bswapl(reg); + SwapBits(reg, temp, 1, 0x55555555, assembler); + SwapBits(reg, temp, 2, 0x33333333, assembler); + SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); +} + +void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask, + int32_t shift, int64_t mask, X86_64Assembler* assembler) { + Immediate imm_shift(shift); + __ movq(temp_mask, Immediate(mask)); + __ movq(temp, reg); + __ shrq(reg, imm_shift); + __ andq(temp, temp_mask); + __ andq(reg, temp_mask); + __ shlq(temp, imm_shift); + __ orq(reg, temp); +} + +void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) { + X86_64Assembler* assembler = + reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); + + /* + * Use one bswap instruction to reverse byte order first and then use 3 rounds of + * swapping bits to reverse bits in a long number x. Using bswap to save instructions + * compared to generic luni implementation which has 5 rounds of swapping bits. + * x = bswap x + * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555; + * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333; + * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F; + */ + __ bswapq(reg); + SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler); + SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler); + SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -1007,19 +1380,9 @@ void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UN void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } -UNIMPLEMENTED_INTRINSIC(IntegerReverse) -UNIMPLEMENTED_INTRINSIC(LongReverse) -UNIMPLEMENTED_INTRINSIC(MathFloor) -UNIMPLEMENTED_INTRINSIC(MathCeil) -UNIMPLEMENTED_INTRINSIC(MathRint) -UNIMPLEMENTED_INTRINSIC(MathRoundDouble) -UNIMPLEMENTED_INTRINSIC(MathRoundFloat) UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) -UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) -UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) } // namespace x86_64 diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h index dfae7fa90e..0e0e72c1fc 100644 --- a/compiler/optimizing/intrinsics_x86_64.h +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -32,7 +32,7 @@ class X86_64Assembler; class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor { public: - explicit IntrinsicLocationsBuilderX86_64(ArenaAllocator* arena) : arena_(arena) {} + explicit IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen); // Define visitor methods. @@ -50,6 +50,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) private: ArenaAllocator* arena_; + CodeGeneratorX86_64* codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64); }; diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index f22b7a7e82..28c5555d57 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -16,6 +16,7 @@ #include <fstream> +#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "base/stringprintf.h" #include "builder.h" @@ -46,7 +47,9 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num graph->TryBuildingSsa(); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index c102c4f02f..61d6593f2b 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" @@ -65,7 +66,9 @@ TEST(LiveRangesTest, CFG1) { ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -111,7 +114,9 @@ TEST(LiveRangesTest, CFG2) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -160,7 +165,9 @@ TEST(LiveRangesTest, CFG3) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -237,7 +244,9 @@ TEST(LiveRangesTest, Loop1) { ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); RemoveSuspendChecks(graph); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -315,7 +324,9 @@ TEST(LiveRangesTest, Loop2) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -391,7 +402,9 @@ TEST(LiveRangesTest, CFG4) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 0b0cfde0cf..81250ca133 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" @@ -53,7 +54,9 @@ static void TestCode(const uint16_t* data, const char* expected) { graph->TryBuildingSsa(); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index dca612e6b7..d8a8554610 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -752,8 +752,8 @@ HInstruction* HBinaryOperation::GetLeastConstantLeft() const { } } -bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const { - return this == if_->GetPreviousDisregardingMoves(); +bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const { + return this == instruction->GetPreviousDisregardingMoves(); } bool HInstruction::Equals(HInstruction* other) const { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 21ed3504f1..f764eb421f 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -682,6 +682,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(ClinitCheck, Instruction) \ M(Compare, BinaryOperation) \ M(Condition, BinaryOperation) \ + M(Deoptimize, Instruction) \ M(Div, BinaryOperation) \ M(DivZeroCheck, Instruction) \ M(DoubleConstant, Constant) \ @@ -1191,7 +1192,17 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { bool HasEnvironment() const { return environment_ != nullptr; } HEnvironment* GetEnvironment() const { return environment_; } - void SetEnvironment(HEnvironment* environment) { environment_ = environment; } + // Set the `environment_` field. Raw because this method does not + // update the uses lists. + void SetRawEnvironment(HEnvironment* environment) { environment_ = environment; } + + // Set the environment of this instruction, copying it from `environment`. While + // copying, the uses lists are being updated. + void CopyEnvironmentFrom(HEnvironment* environment) { + ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena(); + environment_ = new (allocator) HEnvironment(allocator, environment->Size()); + environment_->CopyFrom(environment); + } // Returns the number of entries in the environment. Typically, that is the // number of dex registers in a method. It could be more in case of inlining. @@ -1544,12 +1555,31 @@ class HIf : public HTemplateInstruction<1> { DECLARE_INSTRUCTION(If); - virtual bool IsIfInstruction() const { return true; } - private: DISALLOW_COPY_AND_ASSIGN(HIf); }; +// Deoptimize to interpreter, upon checking a condition. +class HDeoptimize : public HTemplateInstruction<1> { + public: + HDeoptimize(HInstruction* cond, uint32_t dex_pc) + : HTemplateInstruction(SideEffects::None()), + dex_pc_(dex_pc) { + SetRawInputAt(0, cond); + } + + bool NeedsEnvironment() const OVERRIDE { return true; } + bool CanThrow() const OVERRIDE { return true; } + uint32_t GetDexPc() const { return dex_pc_; } + + DECLARE_INSTRUCTION(Deoptimize); + + private: + uint32_t dex_pc_; + + DISALLOW_COPY_AND_ASSIGN(HDeoptimize); +}; + class HUnaryOperation : public HExpression<1> { public: HUnaryOperation(Primitive::Type result_type, HInstruction* input) @@ -1667,8 +1697,8 @@ class HCondition : public HBinaryOperation { void ClearNeedsMaterialization() { needs_materialization_ = false; } // For code generation purposes, returns whether this instruction is just before - // `if_`, and disregard moves in between. - bool IsBeforeWhenDisregardMoves(HIf* if_) const; + // `instruction`, and disregard moves in between. + bool IsBeforeWhenDisregardMoves(HInstruction* instruction) const; DECLARE_INSTRUCTION(Condition); @@ -2307,6 +2337,9 @@ class HNewArray : public HExpression<1> { // Calls runtime so needs an environment. bool NeedsEnvironment() const OVERRIDE { return true; } + // May throw NegativeArraySizeException, OutOfMemoryError, etc. + bool CanThrow() const OVERRIDE { return true; } + bool CanBeNull() const OVERRIDE { return false; } QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; } diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc index 4cf22d3b2e..4e83ce576c 100644 --- a/compiler/optimizing/nodes_test.cc +++ b/compiler/optimizing/nodes_test.cc @@ -50,7 +50,7 @@ TEST(Node, RemoveInstruction) { exit_block->AddInstruction(new (&allocator) HExit()); HEnvironment* environment = new (&allocator) HEnvironment(&allocator, 1); - null_check->SetEnvironment(environment); + null_check->SetRawEnvironment(environment); environment->SetRawEnvAt(0, parameter); parameter->AddEnvUseAt(null_check->GetEnvironment(), 0); diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc new file mode 100644 index 0000000000..6d986ba7d3 --- /dev/null +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -0,0 +1,127 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <memory> +#include <vector> + +#include "arch/instruction_set.h" +#include "cfi_test.h" +#include "gtest/gtest.h" +#include "optimizing/code_generator.h" +#include "utils/assembler.h" + +#include "optimizing/optimizing_cfi_test_expected.inc" + +namespace art { + +// Run the tests only on host. +#ifndef HAVE_ANDROID_OS + +class OptimizingCFITest : public CFITest { + public: + // Enable this flag to generate the expected outputs. + static constexpr bool kGenerateExpected = false; + + void TestImpl(InstructionSet isa, const char* isa_str, + const std::vector<uint8_t>& expected_asm, + const std::vector<uint8_t>& expected_cfi) { + // Setup simple context. + ArenaPool pool; + ArenaAllocator allocator(&pool); + CompilerOptions opts; + std::unique_ptr<const InstructionSetFeatures> isa_features; + std::string error; + isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); + HGraph graph(&allocator); + // Generate simple frame with some spills. + std::unique_ptr<CodeGenerator> code_gen( + CodeGenerator::Create(&graph, isa, *isa_features.get(), opts)); + const int frame_size = 64; + int core_reg = 0; + int fp_reg = 0; + for (int i = 0; i < 2; i++) { // Two registers of each kind. + for (; core_reg < 32; core_reg++) { + if (code_gen->IsCoreCalleeSaveRegister(core_reg)) { + auto location = Location::RegisterLocation(core_reg); + code_gen->AddAllocatedRegister(location); + core_reg++; + break; + } + } + for (; fp_reg < 32; fp_reg++) { + if (code_gen->IsFloatingPointCalleeSaveRegister(fp_reg)) { + auto location = Location::FpuRegisterLocation(fp_reg); + code_gen->AddAllocatedRegister(location); + fp_reg++; + break; + } + } + } + code_gen->ComputeSpillMask(); + code_gen->SetFrameSize(frame_size); + code_gen->GenerateFrameEntry(); + code_gen->GetInstructionVisitor()->VisitReturnVoid(new (&allocator) HReturnVoid()); + // Get the outputs. + InternalCodeAllocator code_allocator; + code_gen->Finalize(&code_allocator); + const std::vector<uint8_t>& actual_asm = code_allocator.GetMemory(); + Assembler* opt_asm = code_gen->GetAssembler(); + const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data()); + + if (kGenerateExpected) { + GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi); + } else { + EXPECT_EQ(expected_asm, actual_asm); + EXPECT_EQ(expected_cfi, actual_cfi); + } + } + + private: + class InternalCodeAllocator : public CodeAllocator { + public: + InternalCodeAllocator() {} + + virtual uint8_t* Allocate(size_t size) { + memory_.resize(size); + return memory_.data(); + } + + const std::vector<uint8_t>& GetMemory() { return memory_; } + + private: + std::vector<uint8_t> memory_; + + DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); + }; +}; + +#define TEST_ISA(isa) \ + TEST_F(OptimizingCFITest, isa) { \ + std::vector<uint8_t> expected_asm(expected_asm_##isa, \ + expected_asm_##isa + arraysize(expected_asm_##isa)); \ + std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \ + expected_cfi_##isa + arraysize(expected_cfi_##isa)); \ + TestImpl(isa, #isa, expected_asm, expected_cfi); \ + } + +TEST_ISA(kThumb2) +TEST_ISA(kArm64) +TEST_ISA(kX86) +TEST_ISA(kX86_64) + +#endif // HAVE_ANDROID_OS + +} // namespace art diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc new file mode 100644 index 0000000000..2125f6eb01 --- /dev/null +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -0,0 +1,141 @@ +static constexpr uint8_t expected_asm_kThumb2[] = { + 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0, + 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD, +}; +static constexpr uint8_t expected_cfi_kThumb2[] = { + 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14, + 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42, + 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B, 0x0E, + 0x40, +}; +// 0x00000000: push {r5, r6, lr} +// 0x00000002: .cfi_def_cfa_offset: 12 +// 0x00000002: .cfi_offset: r5 at cfa-12 +// 0x00000002: .cfi_offset: r6 at cfa-8 +// 0x00000002: .cfi_offset: r14 at cfa-4 +// 0x00000002: vpush.f32 {s16-s17} +// 0x00000006: .cfi_def_cfa_offset: 20 +// 0x00000006: .cfi_offset_extended: r80 at cfa-20 +// 0x00000006: .cfi_offset_extended: r81 at cfa-16 +// 0x00000006: sub sp, sp, #44 +// 0x00000008: .cfi_def_cfa_offset: 64 +// 0x00000008: str r0, [sp, #0] +// 0x0000000a: .cfi_remember_state +// 0x0000000a: add sp, sp, #44 +// 0x0000000c: .cfi_def_cfa_offset: 20 +// 0x0000000c: vpop.f32 {s16-s17} +// 0x00000010: .cfi_def_cfa_offset: 12 +// 0x00000010: .cfi_restore_extended: r80 +// 0x00000010: .cfi_restore_extended: r81 +// 0x00000010: pop {r5, r6, pc} +// 0x00000012: .cfi_restore_state +// 0x00000012: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kArm64[] = { + 0xE0, 0x0F, 0x1C, 0xB8, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9, + 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF3, 0xD3, 0x42, 0xA9, + 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6, +}; +static constexpr uint8_t expected_cfi_kArm64[] = { + 0x44, 0x0E, 0x40, 0x44, 0x93, 0x06, 0x94, 0x04, 0x44, 0x9E, 0x02, 0x44, + 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49, + 0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: str w0, [sp, #-64]! +// 0x00000004: .cfi_def_cfa_offset: 64 +// 0x00000004: stp x19, x20, [sp, #40] +// 0x00000008: .cfi_offset: r19 at cfa-24 +// 0x00000008: .cfi_offset: r20 at cfa-16 +// 0x00000008: str lr, [sp, #56] +// 0x0000000c: .cfi_offset: r30 at cfa-8 +// 0x0000000c: stp d8, d9, [sp, #24] +// 0x00000010: .cfi_offset_extended: r72 at cfa-40 +// 0x00000010: .cfi_offset_extended: r73 at cfa-32 +// 0x00000010: .cfi_remember_state +// 0x00000010: ldp d8, d9, [sp, #24] +// 0x00000014: .cfi_restore_extended: r72 +// 0x00000014: .cfi_restore_extended: r73 +// 0x00000014: ldp x19, x20, [sp, #40] +// 0x00000018: .cfi_restore: r19 +// 0x00000018: .cfi_restore: r20 +// 0x00000018: ldr lr, [sp, #56] +// 0x0000001c: .cfi_restore: r30 +// 0x0000001c: add sp, sp, #0x40 (64) +// 0x00000020: .cfi_def_cfa_offset: 0 +// 0x00000020: ret +// 0x00000024: .cfi_restore_state +// 0x00000024: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kX86[] = { + 0x56, 0x55, 0x83, 0xEC, 0x34, 0x89, 0x04, 0x24, 0x83, 0xC4, 0x34, 0x5D, + 0x5E, 0xC3, +}; +static constexpr uint8_t expected_cfi_kX86[] = { + 0x41, 0x0E, 0x08, 0x86, 0x02, 0x41, 0x0E, 0x0C, 0x85, 0x03, 0x43, 0x0E, + 0x40, 0x43, 0x0A, 0x43, 0x0E, 0x0C, 0x41, 0x0E, 0x08, 0xC5, 0x41, 0x0E, + 0x04, 0xC6, 0x41, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: push esi +// 0x00000001: .cfi_def_cfa_offset: 8 +// 0x00000001: .cfi_offset: r6 at cfa-8 +// 0x00000001: push ebp +// 0x00000002: .cfi_def_cfa_offset: 12 +// 0x00000002: .cfi_offset: r5 at cfa-12 +// 0x00000002: sub esp, 52 +// 0x00000005: .cfi_def_cfa_offset: 64 +// 0x00000005: mov [esp], eax +// 0x00000008: .cfi_remember_state +// 0x00000008: add esp, 52 +// 0x0000000b: .cfi_def_cfa_offset: 12 +// 0x0000000b: pop ebp +// 0x0000000c: .cfi_def_cfa_offset: 8 +// 0x0000000c: .cfi_restore: r5 +// 0x0000000c: pop esi +// 0x0000000d: .cfi_def_cfa_offset: 4 +// 0x0000000d: .cfi_restore: r6 +// 0x0000000d: ret +// 0x0000000e: .cfi_restore_state +// 0x0000000e: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kX86_64[] = { + 0x55, 0x53, 0x48, 0x83, 0xEC, 0x28, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24, + 0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0x89, 0x3C, 0x24, 0xF2, + 0x44, 0x0F, 0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24, + 0x20, 0x48, 0x83, 0xC4, 0x28, 0x5B, 0x5D, 0xC3, +}; +static constexpr uint8_t expected_cfi_kX86_64[] = { + 0x41, 0x0E, 0x10, 0x86, 0x04, 0x41, 0x0E, 0x18, 0x83, 0x06, 0x44, 0x0E, + 0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x43, 0x0A, 0x47, 0xDD, 0x47, + 0xDE, 0x44, 0x0E, 0x18, 0x41, 0x0E, 0x10, 0xC3, 0x41, 0x0E, 0x08, 0xC6, + 0x41, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: push rbp +// 0x00000001: .cfi_def_cfa_offset: 16 +// 0x00000001: .cfi_offset: r6 at cfa-16 +// 0x00000001: push rbx +// 0x00000002: .cfi_def_cfa_offset: 24 +// 0x00000002: .cfi_offset: r3 at cfa-24 +// 0x00000002: subq rsp, 40 +// 0x00000006: .cfi_def_cfa_offset: 64 +// 0x00000006: movsd [rsp + 32], xmm13 +// 0x0000000d: .cfi_offset: r30 at cfa-32 +// 0x0000000d: movsd [rsp + 24], xmm12 +// 0x00000014: .cfi_offset: r29 at cfa-40 +// 0x00000014: mov [rsp], edi +// 0x00000017: .cfi_remember_state +// 0x00000017: movsd xmm12, [rsp + 24] +// 0x0000001e: .cfi_restore: r29 +// 0x0000001e: movsd xmm13, [rsp + 32] +// 0x00000025: .cfi_restore: r30 +// 0x00000025: addq rsp, 40 +// 0x00000029: .cfi_def_cfa_offset: 24 +// 0x00000029: pop rbx +// 0x0000002a: .cfi_def_cfa_offset: 16 +// 0x0000002a: .cfi_restore: r3 +// 0x0000002a: pop rbp +// 0x0000002b: .cfi_def_cfa_offset: 8 +// 0x0000002b: .cfi_restore: r6 +// 0x0000002b: ret +// 0x0000002c: .cfi_restore_state +// 0x0000002c: .cfi_def_cfa_offset: 64 + diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index b2f9c65153..0e02212867 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -26,11 +26,13 @@ #include "bounds_check_elimination.h" #include "builder.h" #include "code_generator.h" +#include "compiled_method.h" #include "compiler.h" #include "constant_folding.h" #include "dead_code_elimination.h" #include "dex/quick/dex_file_to_method_inliner_map.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "elf_writer_quick.h" #include "graph_visualizer.h" @@ -48,6 +50,7 @@ #include "ssa_builder.h" #include "ssa_phi_elimination.h" #include "ssa_liveness_analysis.h" +#include "utils/assembler.h" #include "reference_type_propagation.h" namespace art { @@ -94,10 +97,13 @@ class PassInfoPrinter : public ValueObject { timing_logger_enabled_(compiler_driver->GetDumpPasses()), timing_logger_(method_name, true, true), visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()), - visualizer_(visualizer_output, graph, codegen, method_name_) { + visualizer_(visualizer_output, graph, codegen) { if (strstr(method_name, kStringFilter) == nullptr) { timing_logger_enabled_ = visualizer_enabled_ = false; } + if (visualizer_enabled_) { + visualizer_.PrintHeader(method_name_); + } } ~PassInfoPrinter() { @@ -199,8 +205,13 @@ class OptimizingCompiler FINAL : public Compiler { const std::vector<const art::DexFile*>& dex_files, const std::string& android_root, bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, - *GetCompilerDriver()); + if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) { + return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, + *GetCompilerDriver()); + } else { + return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, + *GetCompilerDriver()); + } } void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE; @@ -360,6 +371,9 @@ static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) { return ArrayRef<const uint8_t>(vector); } +// TODO: The function below uses too much stack space. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wframe-larger-than=" CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, CodeGenerator* codegen, @@ -385,12 +399,17 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, CodeVectorAllocator allocator; codegen->CompileOptimized(&allocator); + DefaultSrcMap src_mapping_table; + if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) { + codegen->BuildSourceMap(&src_mapping_table); + } + std::vector<uint8_t> stack_map; codegen->BuildStackMaps(&stack_map); compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized); - return CompiledMethod::SwapAllocCompiledMethodStackMap( + return CompiledMethod::SwapAllocCompiledMethod( compiler_driver, codegen->GetInstructionSet(), ArrayRef<const uint8_t>(allocator.GetMemory()), @@ -400,9 +419,15 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), - ArrayRef<const uint8_t>(stack_map)); + &src_mapping_table, + ArrayRef<const uint8_t>(), // mapping_table. + ArrayRef<const uint8_t>(stack_map), + ArrayRef<const uint8_t>(), // native_gc_map. + ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), + ArrayRef<const LinkerPatch>()); } +#pragma GCC diagnostic pop CompiledMethod* OptimizingCompiler::CompileBaseline( CodeGenerator* codegen, @@ -412,9 +437,11 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( codegen->CompileBaseline(&allocator); std::vector<uint8_t> mapping_table; + codegen->BuildMappingTable(&mapping_table); DefaultSrcMap src_mapping_table; - bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols(); - codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr); + if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) { + codegen->BuildSourceMap(&src_mapping_table); + } std::vector<uint8_t> vmap_table; codegen->BuildVMapTable(&vmap_table); std::vector<uint8_t> gc_map; @@ -435,7 +462,8 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( AlignVectorSize(mapping_table), AlignVectorSize(vmap_table), AlignVectorSize(gc_map), - ArrayRef<const uint8_t>()); + ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), + ArrayRef<const LinkerPatch>()); } CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item, @@ -501,6 +529,8 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen); return nullptr; } + codegen->GetAssembler()->cfi().SetEnabled( + compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()); PassInfoPrinter pass_info_printer(graph, method_name.c_str(), diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 7d0641ec13..4936685367 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include <iostream> #include "parallel_move_resolver.h" #include "nodes.h" @@ -63,39 +64,42 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { } } +Location LowOf(Location location) { + if (location.IsRegisterPair()) { + return Location::RegisterLocation(location.low()); + } else if (location.IsFpuRegisterPair()) { + return Location::FpuRegisterLocation(location.low()); + } else if (location.IsDoubleStackSlot()) { + return Location::StackSlot(location.GetStackIndex()); + } else { + return Location::NoLocation(); + } +} + +Location HighOf(Location location) { + if (location.IsRegisterPair()) { + return Location::RegisterLocation(location.high()); + } else if (location.IsFpuRegisterPair()) { + return Location::FpuRegisterLocation(location.high()); + } else if (location.IsDoubleStackSlot()) { + return Location::StackSlot(location.GetHighStackIndex(4)); + } else { + return Location::NoLocation(); + } +} + // Update the source of `move`, knowing that `updated_location` has been swapped // with `new_source`. Note that `updated_location` can be a pair, therefore if // `move` is non-pair, we need to extract which register to use. static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) { Location source = move->GetSource(); - if (new_source.GetKind() == source.GetKind()) { - DCHECK(updated_location.Equals(source)); - move->SetSource(new_source); - } else if (new_source.IsStackSlot() - || new_source.IsDoubleStackSlot() - || source.IsStackSlot() - || source.IsDoubleStackSlot()) { - // Stack slots never take part of a pair/non-pair swap. - DCHECK(updated_location.Equals(source)); + if (LowOf(updated_location).Equals(source)) { + move->SetSource(LowOf(new_source)); + } else if (HighOf(updated_location).Equals(source)) { + move->SetSource(HighOf(new_source)); + } else { + DCHECK(updated_location.Equals(source)) << updated_location << " " << source; move->SetSource(new_source); - } else if (source.IsRegister()) { - DCHECK(new_source.IsRegisterPair()) << new_source; - DCHECK(updated_location.IsRegisterPair()) << updated_location; - if (updated_location.low() == source.reg()) { - move->SetSource(Location::RegisterLocation(new_source.low())); - } else { - DCHECK_EQ(updated_location.high(), source.reg()); - move->SetSource(Location::RegisterLocation(new_source.high())); - } - } else if (source.IsFpuRegister()) { - DCHECK(new_source.IsFpuRegisterPair()) << new_source; - DCHECK(updated_location.IsFpuRegisterPair()) << updated_location; - if (updated_location.low() == source.reg()) { - move->SetSource(Location::FpuRegisterLocation(new_source.low())); - } else { - DCHECK_EQ(updated_location.high(), source.reg()); - move->SetSource(Location::FpuRegisterLocation(new_source.high())); - } } } @@ -265,6 +269,20 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked, } +int ParallelMoveResolver::AllocateScratchRegister(int blocked, + int register_count) { + int scratch = -1; + for (int reg = 0; reg < register_count; ++reg) { + if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) { + scratch = reg; + break; + } + } + + return scratch; +} + + ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers) : resolver_(resolver), @@ -278,6 +296,16 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( } +ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( + ParallelMoveResolver* resolver, int blocked, int number_of_registers) + : resolver_(resolver), + reg_(kNoRegister), + spilled_(false) { + // We don't want to spill a register if none are free. + reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers); +} + + ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() { if (spilled_) { resolver_->RestoreScratch(reg_); diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 3fa1b37afd..173cffc71e 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -42,10 +42,15 @@ class ParallelMoveResolver : public ValueObject { protected: class ScratchRegisterScope : public ValueObject { public: + // Spill a scratch register if no regs are free. ScratchRegisterScope(ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers); + // Grab a scratch register only if available. + ScratchRegisterScope(ParallelMoveResolver* resolver, + int blocked, + int number_of_registers); ~ScratchRegisterScope(); int GetRegister() const { return reg_; } @@ -62,6 +67,8 @@ class ParallelMoveResolver : public ValueObject { // Allocate a scratch register for performing a move. The method will try to use // a register that is the destination of a move, but that move has not been emitted yet. int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled); + // As above, but return -1 if no free register. + int AllocateScratchRegister(int blocked, int register_count); // Emit a move. virtual void EmitMove(size_t index) = 0; diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 817a44b184..5c502f7ef4 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -31,8 +31,13 @@ class TestParallelMoveResolver : public ParallelMoveResolver { message_ << "C"; } else if (location.IsPair()) { message_ << location.low() << "," << location.high(); - } else { + } else if (location.IsRegister()) { message_ << location.reg(); + } else if (location.IsStackSlot()) { + message_ << location.GetStackIndex() << "(sp)"; + } else { + message_ << "2x" << location.GetStackIndex() << "(sp)"; + DCHECK(location.IsDoubleStackSlot()) << location; } } @@ -279,6 +284,26 @@ TEST(ParallelMoveTest, Pairs) { resolver.EmitNativeCode(moves); ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); } + + { + // Test involving registers used in single context and pair context. + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterLocation(10), + Location::RegisterLocation(5), + nullptr); + moves->AddMove( + Location::RegisterPairLocation(4, 5), + Location::DoubleStackSlot(32), + nullptr); + moves->AddMove( + Location::DoubleStackSlot(32), + Location::RegisterPairLocation(10, 11), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(2x32(sp) <-> 10,11) (4,5 <-> 2x32(sp)) (4 -> 5)", resolver.GetMessage().c_str()); + } } } // namespace art diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 2d9a2bf330..f5d8d82571 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -60,11 +60,11 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { bool needs_materialization = false; - if (!condition->GetUses().HasOnlyOneUse()) { + if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) { needs_materialization = true; } else { HInstruction* user = condition->GetUses().GetFirst()->GetUser(); - if (!user->IsIf()) { + if (!user->IsIf() && !user->IsDeoptimize()) { needs_materialization = true; } else { // TODO: if there is no intervening instructions with side-effect between this condition diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index cf38bd3f8c..4bca43499f 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -1408,26 +1408,36 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { // Walk over all uses covered by this interval, and update the location // information. - while (use != nullptr && use->GetPosition() <= current->GetEnd()) { - LocationSummary* locations = use->GetUser()->GetLocations(); - if (use->GetIsEnvironment()) { - locations->SetEnvironmentAt(use->GetInputIndex(), source); - } else { - Location expected_location = locations->InAt(use->GetInputIndex()); - // The expected (actual) location may be invalid in case the input is unused. Currently - // this only happens for intrinsics. - if (expected_location.IsValid()) { - if (expected_location.IsUnallocated()) { - locations->SetInAt(use->GetInputIndex(), source); - } else if (!expected_location.IsConstant()) { - AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); - } + + LiveRange* range = current->GetFirstRange(); + while (range != nullptr) { + while (use != nullptr && use->GetPosition() < range->GetStart()) { + DCHECK(use->GetIsEnvironment()); + use = use->GetNext(); + } + while (use != nullptr && use->GetPosition() <= range->GetEnd()) { + DCHECK(current->Covers(use->GetPosition()) || (use->GetPosition() == range->GetEnd())); + LocationSummary* locations = use->GetUser()->GetLocations(); + if (use->GetIsEnvironment()) { + locations->SetEnvironmentAt(use->GetInputIndex(), source); } else { - DCHECK(use->GetUser()->IsInvoke()); - DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); + Location expected_location = locations->InAt(use->GetInputIndex()); + // The expected (actual) location may be invalid in case the input is unused. Currently + // this only happens for intrinsics. + if (expected_location.IsValid()) { + if (expected_location.IsUnallocated()) { + locations->SetInAt(use->GetInputIndex(), source); + } else if (!expected_location.IsConstant()) { + AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); + } + } else { + DCHECK(use->GetUser()->IsInvoke()); + DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); + } } + use = use->GetNext(); } - use = use->GetNext(); + range = range->GetNext(); } // If the next interval starts just after this one, and has a register, @@ -1503,7 +1513,15 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { } current = next_sibling; } while (current != nullptr); - DCHECK(use == nullptr); + + if (kIsDebugBuild) { + // Following uses can only be environment uses. The location for + // these environments will be none. + while (use != nullptr) { + DCHECK(use->GetIsEnvironment()); + use = use->GetNext(); + } + } } void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 7c3a0357d6..3951439881 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" @@ -42,7 +43,9 @@ static bool Check(const uint16_t* data) { const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); graph->TryBuildingSsa(); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -58,7 +61,9 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); GrowableArray<LiveInterval*> intervals(&allocator, 0); // Test with two intervals of the same range. @@ -298,7 +303,9 @@ TEST(RegisterAllocatorTest, Loop3) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -330,7 +337,9 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -383,7 +392,9 @@ TEST(RegisterAllocatorTest, DeadPhi) { ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); SsaDeadPhiElimination(graph).Run(); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -405,7 +416,9 @@ TEST(RegisterAllocatorTest, FreeUntil) { ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); SsaDeadPhiElimination(graph).Run(); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -507,7 +520,9 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -522,7 +537,9 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -539,7 +556,9 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -556,7 +575,9 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -608,7 +629,9 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { { HGraph* graph = BuildFieldReturn(&allocator, &field, &ret); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -621,7 +644,9 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { { HGraph* graph = BuildFieldReturn(&allocator, &field, &ret); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -671,7 +696,9 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { { HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -685,7 +712,9 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { { HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -734,7 +763,9 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { { HGraph* graph = BuildDiv(&allocator, &div); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -822,7 +853,9 @@ TEST(RegisterAllocatorTest, SpillInactive) { locations = new (&allocator) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); RegisterAllocator register_allocator(&allocator, &codegen, liveness); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index fcc4e69b37..e154ea4ee6 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -487,7 +487,7 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) { HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment( GetGraph()->GetArena(), current_locals_->Size()); environment->CopyFrom(current_locals_); - instruction->SetEnvironment(environment); + instruction->SetRawEnvironment(environment); } void SsaBuilder::VisitTemporary(HTemporary* temp) { diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 0f3973e5fb..95da6ef551 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -218,28 +218,34 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { current->GetLiveInterval()->SetFrom(current->GetLifetimePosition()); } - // All inputs of an instruction must be live. - for (size_t i = 0, e = current->InputCount(); i < e; ++i) { - HInstruction* input = current->InputAt(i); - // Some instructions 'inline' their inputs, that is they do not need - // to be materialized. - if (input->HasSsaIndex()) { - live_in->SetBit(input->GetSsaIndex()); - input->GetLiveInterval()->AddUse(current, i, false); - } - } - + // Process the environment first, because we know their uses come after + // or at the same liveness position of inputs. if (current->HasEnvironment()) { // Handle environment uses. See statements (b) and (c) of the // SsaLivenessAnalysis. HEnvironment* environment = current->GetEnvironment(); for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* instruction = environment->GetInstructionAt(i); - if (ShouldBeLiveForEnvironment(instruction)) { + bool should_be_live = ShouldBeLiveForEnvironment(instruction); + if (should_be_live) { DCHECK(instruction->HasSsaIndex()); live_in->SetBit(instruction->GetSsaIndex()); - instruction->GetLiveInterval()->AddUse(current, i, true); } + if (instruction != nullptr) { + instruction->GetLiveInterval()->AddUse( + current, i, /* is_environment */ true, should_be_live); + } + } + } + + // All inputs of an instruction must be live. + for (size_t i = 0, e = current->InputCount(); i < e; ++i) { + HInstruction* input = current->InputAt(i); + // Some instructions 'inline' their inputs, that is they do not need + // to be materialized. + if (input->HasSsaIndex()) { + live_in->SetBit(input->GetSsaIndex()); + input->GetLiveInterval()->AddUse(current, i, /* is_environment */ false); } } } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index bc78dc2e76..d2da84c0c0 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -189,7 +189,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { AddRange(position, position + 1); } - void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) { + void AddUse(HInstruction* instruction, + size_t input_index, + bool is_environment, + bool keep_alive = false) { // Set the use within the instruction. size_t position = instruction->GetLifetimePosition() + 1; LocationSummary* locations = instruction->GetLocations(); @@ -211,6 +214,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { && (first_use_->GetPosition() < position)) { // The user uses the instruction multiple times, and one use dies before the other. // We update the use list so that the latter is first. + DCHECK(!is_environment); UsePosition* cursor = first_use_; while ((cursor->GetNext() != nullptr) && (cursor->GetNext()->GetPosition() < position)) { cursor = cursor->GetNext(); @@ -225,6 +229,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return; } + first_use_ = new (allocator_) UsePosition( + instruction, input_index, is_environment, position, first_use_); + + if (is_environment && !keep_alive) { + // If this environment use does not keep the instruction live, it does not + // affect the live range of that instruction. + return; + } + size_t start_block_position = instruction->GetBlock()->GetLifetimeStart(); if (first_range_ == nullptr) { // First time we see a use of that interval. @@ -246,8 +259,6 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // and the check line 205 would succeed. first_range_ = new (allocator_) LiveRange(start_block_position, position, first_range_); } - first_use_ = new (allocator_) UsePosition( - instruction, input_index, is_environment, position, first_use_); } void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) { @@ -425,9 +436,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { UsePosition* use = first_use_; size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { - size_t use_position = use->GetPosition(); - if (use_position > position) { - return use_position; + if (!use->GetIsEnvironment()) { + size_t use_position = use->GetPosition(); + if (use_position > position) { + return use_position; + } } use = use->GetNext(); } diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 5818a37a46..a73c8d77f3 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -27,6 +27,32 @@ namespace art { +// Helper to build art::StackMapStream::LocationCatalogEntriesIndices. +class LocationCatalogEntriesIndicesEmptyFn { + public: + void MakeEmpty(std::pair<DexRegisterLocation, size_t>& item) const { + item.first = DexRegisterLocation::None(); + } + bool IsEmpty(const std::pair<DexRegisterLocation, size_t>& item) const { + return item.first == DexRegisterLocation::None(); + } +}; + +// Hash function for art::StackMapStream::LocationCatalogEntriesIndices. +// This hash function does not create collisions. +class DexRegisterLocationHashFn { + public: + size_t operator()(DexRegisterLocation key) const { + // Concatenate `key`s fields to create a 64-bit value to be hashed. + int64_t kind_and_value = + (static_cast<int64_t>(key.kind_) << 32) | static_cast<int64_t>(key.value_); + return inner_hash_fn_(kind_and_value); + } + private: + std::hash<int64_t> inner_hash_fn_; +}; + + /** * Collects and builds stack maps for a method. All the stack maps * for a method are placed in a CodeInfo object. @@ -36,11 +62,13 @@ class StackMapStream : public ValueObject { explicit StackMapStream(ArenaAllocator* allocator) : allocator_(allocator), stack_maps_(allocator, 10), + location_catalog_entries_(allocator, 4), dex_register_locations_(allocator, 10 * 4), inline_infos_(allocator, 2), stack_mask_max_(-1), dex_pc_max_(0), native_pc_offset_max_(0), + register_mask_max_(0), number_of_stack_maps_with_inline_info_(0), dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()) {} @@ -101,6 +129,7 @@ class StackMapStream : public ValueObject { dex_pc_max_ = std::max(dex_pc_max_, dex_pc); native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset); + register_mask_max_ = std::max(register_mask_max_, register_mask); } void AddInlineInfoEntry(uint32_t method_index) { @@ -111,6 +140,7 @@ class StackMapStream : public ValueObject { size_t ComputeNeededSize() { size_t size = CodeInfo::kFixedSize + + ComputeDexRegisterLocationCatalogSize() + ComputeStackMapsSize() + ComputeDexRegisterMapsSize() + ComputeInlineInfoSize(); @@ -128,24 +158,43 @@ class StackMapStream : public ValueObject { ComputeInlineInfoSize(), ComputeDexRegisterMapsSize(), dex_pc_max_, - native_pc_offset_max_); + native_pc_offset_max_, + register_mask_max_); + } + + // Compute the size of the Dex register location catalog of `entry`. + size_t ComputeDexRegisterLocationCatalogSize() const { + size_t size = DexRegisterLocationCatalog::kFixedSize; + for (size_t location_catalog_entry_index = 0; + location_catalog_entry_index < location_catalog_entries_.Size(); + ++location_catalog_entry_index) { + DexRegisterLocation dex_register_location = + location_catalog_entries_.Get(location_catalog_entry_index); + size += DexRegisterLocationCatalog::EntrySize(dex_register_location); + } + return size; } - // Compute the size of the Dex register map of `entry`. size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const { + // Size of the map in bytes. size_t size = DexRegisterMap::kFixedSize; - // Add the bit mask for the dex register liveness. - size += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers); - for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; + // Add the live bit mask for the Dex register liveness. + size += DexRegisterMap::GetLiveBitMaskSize(entry.num_dex_registers); + // Compute the size of the set of live Dex register entries. + size_t number_of_live_dex_registers = 0; + for (size_t dex_register_number = 0; dex_register_number < entry.num_dex_registers; ++dex_register_number) { if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { - DexRegisterLocation dex_register_location = dex_register_locations_.Get( - entry.dex_register_locations_start_index + index_in_dex_register_locations); - size += DexRegisterMap::EntrySize(dex_register_location); - index_in_dex_register_locations++; + ++number_of_live_dex_registers; } } + size_t map_entries_size_in_bits = + DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size()) + * number_of_live_dex_registers; + size_t map_entries_size_in_bytes = + RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte; + size += map_entries_size_in_bytes; return size; } @@ -168,8 +217,16 @@ class StackMapStream : public ValueObject { + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize); } + size_t ComputeDexRegisterLocationCatalogStart() const { + return CodeInfo::kFixedSize; + } + + size_t ComputeStackMapsStart() const { + return ComputeDexRegisterLocationCatalogStart() + ComputeDexRegisterLocationCatalogSize(); + } + size_t ComputeDexRegisterMapsStart() { - return CodeInfo::kFixedSize + ComputeStackMapsSize(); + return ComputeStackMapsStart() + ComputeStackMapsSize(); } size_t ComputeInlineInfoStart() { @@ -194,11 +251,32 @@ class StackMapStream : public ValueObject { ComputeInlineInfoStart(), inline_info_size); - code_info.SetEncoding( - inline_info_size, dex_register_map_size, dex_pc_max_, native_pc_offset_max_); + code_info.SetEncoding(inline_info_size, + dex_register_map_size, + dex_pc_max_, + native_pc_offset_max_, + register_mask_max_); code_info.SetNumberOfStackMaps(stack_maps_.Size()); code_info.SetStackMaskSize(stack_mask_size); - DCHECK_EQ(code_info.StackMapsSize(), ComputeStackMapsSize()); + DCHECK_EQ(code_info.GetStackMapsSize(), ComputeStackMapsSize()); + + // Set the Dex register location catalog. + code_info.SetNumberOfDexRegisterLocationCatalogEntries( + location_catalog_entries_.Size()); + MemoryRegion dex_register_location_catalog_region = region.Subregion( + ComputeDexRegisterLocationCatalogStart(), + ComputeDexRegisterLocationCatalogSize()); + DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); + // Offset in `dex_register_location_catalog` where to store the next + // register location. + size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize; + for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) { + DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i); + dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location); + location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location); + } + // Ensure we reached the end of the Dex registers location_catalog. + DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size()); uintptr_t next_dex_register_map_offset = 0; uintptr_t next_inline_info_offset = 0; @@ -234,25 +312,25 @@ class StackMapStream : public ValueObject { stack_map.SetDexRegisterMapOffset( code_info, register_region.start() - dex_register_locations_region.start()); - // Offset in `dex_register_map` where to store the next register entry. - size_t offset = DexRegisterMap::kFixedSize; - dex_register_map.SetLiveBitMask(offset, - entry.num_dex_registers, - *entry.live_dex_registers_mask); - offset += DexRegisterMap::LiveBitMaskSize(entry.num_dex_registers); + // Set the live bit mask. + dex_register_map.SetLiveBitMask(entry.num_dex_registers, *entry.live_dex_registers_mask); + + // Set the dex register location mapping data. for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; dex_register_number < entry.num_dex_registers; ++dex_register_number) { if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { - DexRegisterLocation dex_register_location = dex_register_locations_.Get( - entry.dex_register_locations_start_index + index_in_dex_register_locations); - dex_register_map.SetRegisterInfo(offset, dex_register_location); - offset += DexRegisterMap::EntrySize(dex_register_location); + size_t location_catalog_entry_index = + dex_register_locations_.Get(entry.dex_register_locations_start_index + + index_in_dex_register_locations); + dex_register_map.SetLocationCatalogEntryIndex( + index_in_dex_register_locations, + location_catalog_entry_index, + entry.num_dex_registers, + location_catalog_entries_.Size()); ++index_in_dex_register_locations; } } - // Ensure we reached the end of the Dex registers region. - DCHECK_EQ(offset, register_region.size()); } } @@ -282,12 +360,31 @@ class StackMapStream : public ValueObject { } void AddDexRegisterEntry(uint16_t dex_register, DexRegisterLocation::Kind kind, int32_t value) { + StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1); + DCHECK_LT(dex_register, entry.num_dex_registers); + if (kind != DexRegisterLocation::Kind::kNone) { // Ensure we only use non-compressed location kind at this stage. DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) << DexRegisterLocation::PrettyDescriptor(kind); - dex_register_locations_.Add(DexRegisterLocation(kind, value)); - StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1); + DexRegisterLocation location(kind, value); + + // Look for Dex register `location` in the location catalog (using the + // companion hash map of locations to indices). Use its index if it + // is already in the location catalog. If not, insert it (in the + // location catalog and the hash map) and use the newly created index. + auto it = location_catalog_entries_indices_.Find(location); + if (it != location_catalog_entries_indices_.end()) { + // Retrieve the index from the hash map. + dex_register_locations_.Add(it->second); + } else { + // Create a new entry in the location catalog and the hash map. + size_t index = location_catalog_entries_.Size(); + location_catalog_entries_.Add(location); + dex_register_locations_.Add(index); + location_catalog_entries_indices_.Insert(std::make_pair(location, index)); + } + entry.live_dex_registers_mask->SetBit(dex_register); entry.dex_register_map_hash += (1 << dex_register); entry.dex_register_map_hash += static_cast<uint32_t>(value); @@ -354,9 +451,9 @@ class StackMapStream : public ValueObject { return false; } if (a.live_dex_registers_mask->IsBitSet(i)) { - DexRegisterLocation a_loc = dex_register_locations_.Get( + size_t a_loc = dex_register_locations_.Get( a.dex_register_locations_start_index + index_in_dex_register_locations); - DexRegisterLocation b_loc = dex_register_locations_.Get( + size_t b_loc = dex_register_locations_.Get( b.dex_register_locations_start_index + index_in_dex_register_locations); if (a_loc != b_loc) { return false; @@ -369,21 +466,29 @@ class StackMapStream : public ValueObject { ArenaAllocator* allocator_; GrowableArray<StackMapEntry> stack_maps_; - GrowableArray<DexRegisterLocation> dex_register_locations_; + + // A catalog of unique [location_kind, register_value] pairs (per method). + GrowableArray<DexRegisterLocation> location_catalog_entries_; + // Map from Dex register location catalog entries to their indices in the + // location catalog. + typedef HashMap<DexRegisterLocation, size_t, LocationCatalogEntriesIndicesEmptyFn, + DexRegisterLocationHashFn> LocationCatalogEntriesIndices; + LocationCatalogEntriesIndices location_catalog_entries_indices_; + + // A set of concatenated maps of Dex register locations indices to + // `location_catalog_entries_`. + GrowableArray<size_t> dex_register_locations_; GrowableArray<InlineInfoEntry> inline_infos_; int stack_mask_max_; uint32_t dex_pc_max_; uint32_t native_pc_offset_max_; + uint32_t register_mask_max_; size_t number_of_stack_maps_with_inline_info_; ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_; static constexpr uint32_t kNoSameDexMapFound = -1; - ART_FRIEND_TEST(StackMapTest, Test1); - ART_FRIEND_TEST(StackMapTest, Test2); - ART_FRIEND_TEST(StackMapTest, TestNonLiveDexRegisters); - DISALLOW_COPY_AND_ASSIGN(StackMapStream); }; diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index e5a9790254..8d160bc81e 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -31,6 +31,8 @@ static bool SameBits(MemoryRegion region, const BitVector& bit_vector) { return true; } +using Kind = DexRegisterLocation::Kind; + TEST(StackMapTest, Test1) { ArenaPool pool; ArenaAllocator arena(&pool); @@ -39,8 +41,8 @@ TEST(StackMapTest, Test1) { ArenaBitVector sp_mask(&arena, 0, false); size_t number_of_dex_registers = 2; stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Short location. size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -51,6 +53,16 @@ TEST(StackMapTest, Test1) { ASSERT_EQ(0u, code_info.GetStackMaskSize()); ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + uint32_t number_of_location_catalog_entries = + code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + ASSERT_EQ(2u, number_of_location_catalog_entries); + DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(); + // The Dex register location catalog contains: + // - one 1-byte short Dex register location, and + // - one 5-byte large Dex register location. + size_t expected_location_catalog_size = 1u + 5u; + ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); + StackMap stack_map = code_info.GetStackMapAt(0); ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); @@ -62,14 +74,40 @@ TEST(StackMapTest, Test1) { ASSERT_TRUE(SameBits(stack_mask, sp_mask)); ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); - DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_EQ(7u, dex_registers.Size()); - DexRegisterLocation location0 = dex_registers.GetLocationKindAndValue(0, number_of_dex_registers); - DexRegisterLocation location1 = dex_registers.GetLocationKindAndValue(1, number_of_dex_registers); - ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind()); + DexRegisterMap dex_register_map = + code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); + // The Dex register map contains: + // - one 1-byte live bit mask, and + // - one 1-byte set of location catalog entry indices composed of two 2-bit values. + size_t expected_dex_register_map_size = 1u + 1u; + ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); + + ASSERT_EQ(Kind::kInStack, + dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstant, + dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kInStack, + dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstantLargeValue, + dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info)); + ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( + 0, number_of_dex_registers, number_of_location_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( + 1, number_of_dex_registers, number_of_location_catalog_entries); + ASSERT_EQ(0u, index0); + ASSERT_EQ(1u, index1); + DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); + DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + ASSERT_EQ(Kind::kInStack, location0.GetKind()); + ASSERT_EQ(Kind::kConstant, location1.GetKind()); + ASSERT_EQ(Kind::kInStack, location0.GetInternalKind()); + ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); @@ -86,8 +124,8 @@ TEST(StackMapTest, Test2) { sp_mask1.SetBit(4); size_t number_of_dex_registers = 2; stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInStack, 0); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. stream.AddInlineInfoEntry(42); stream.AddInlineInfoEntry(82); @@ -95,8 +133,8 @@ TEST(StackMapTest, Test2) { sp_mask2.SetBit(3); sp_mask1.SetBit(8); stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 18); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kInFpuRegister, 3); + stream.AddDexRegisterEntry(0, Kind::kInRegister, 18); // Short location. + stream.AddDexRegisterEntry(1, Kind::kInFpuRegister, 3); // Short location. size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -107,6 +145,16 @@ TEST(StackMapTest, Test2) { ASSERT_EQ(1u, code_info.GetStackMaskSize()); ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); + uint32_t number_of_location_catalog_entries = + code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + ASSERT_EQ(4u, number_of_location_catalog_entries); + DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(); + // The Dex register location catalog contains: + // - three 1-byte short Dex register locations, and + // - one 5-byte large Dex register location. + size_t expected_location_catalog_size = 3u * 1u + 5u; + ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); + // First stack map. { StackMap stack_map = code_info.GetStackMapAt(0); @@ -120,17 +168,40 @@ TEST(StackMapTest, Test2) { ASSERT_TRUE(SameBits(stack_mask, sp_mask1)); ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); - DexRegisterMap dex_registers = + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_EQ(7u, dex_registers.Size()); - DexRegisterLocation location0 = - dex_registers.GetLocationKindAndValue(0, number_of_dex_registers); - DexRegisterLocation location1 = - dex_registers.GetLocationKindAndValue(1, number_of_dex_registers); - ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kConstant, location1.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kInStack, location0.GetInternalKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kConstantLargeValue, location1.GetInternalKind()); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); + // The Dex register map contains: + // - one 1-byte live bit mask, and + // - one 1-byte set of location catalog entry indices composed of two 2-bit values. + size_t expected_dex_register_map_size = 1u + 1u; + ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); + + ASSERT_EQ(Kind::kInStack, + dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstant, + dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kInStack, + dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstantLargeValue, + dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info)); + ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( + 0, number_of_dex_registers, number_of_location_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( + 1, number_of_dex_registers, number_of_location_catalog_entries); + ASSERT_EQ(0u, index0); + ASSERT_EQ(1u, index1); + DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); + DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + ASSERT_EQ(Kind::kInStack, location0.GetKind()); + ASSERT_EQ(Kind::kConstant, location1.GetKind()); + ASSERT_EQ(Kind::kInStack, location0.GetInternalKind()); + ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); @@ -154,17 +225,40 @@ TEST(StackMapTest, Test2) { ASSERT_TRUE(SameBits(stack_mask, sp_mask2)); ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); - DexRegisterMap dex_registers = + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); - ASSERT_EQ(3u, dex_registers.Size()); - DexRegisterLocation location0 = - dex_registers.GetLocationKindAndValue(0, number_of_dex_registers); - DexRegisterLocation location1 = - dex_registers.GetLocationKindAndValue(1, number_of_dex_registers); - ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kInRegister, location0.GetInternalKind()); - ASSERT_EQ(DexRegisterLocation::Kind::kInFpuRegister, location1.GetInternalKind()); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); + // The Dex register map contains: + // - one 1-byte live bit mask, and + // - one 1-byte set of location catalog entry indices composed of two 2-bit values. + size_t expected_dex_register_map_size = 1u + 1u; + ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); + + ASSERT_EQ(Kind::kInRegister, + dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kInFpuRegister, + dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kInRegister, + dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kInFpuRegister, + dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(18, dex_register_map.GetMachineRegister(0, number_of_dex_registers, code_info)); + ASSERT_EQ(3, dex_register_map.GetMachineRegister(1, number_of_dex_registers, code_info)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( + 0, number_of_dex_registers, number_of_location_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( + 1, number_of_dex_registers, number_of_location_catalog_entries); + ASSERT_EQ(2u, index0); + ASSERT_EQ(3u, index1); + DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); + DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + ASSERT_EQ(Kind::kInRegister, location0.GetKind()); + ASSERT_EQ(Kind::kInFpuRegister, location1.GetKind()); + ASSERT_EQ(Kind::kInRegister, location0.GetInternalKind()); + ASSERT_EQ(Kind::kInFpuRegister, location1.GetInternalKind()); ASSERT_EQ(18, location0.GetValue()); ASSERT_EQ(3, location1.GetValue()); @@ -180,8 +274,8 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { ArenaBitVector sp_mask(&arena, 0, false); uint32_t number_of_dex_registers = 2; stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kNone, 0); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kNone, 0); // No location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -189,14 +283,62 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { stream.FillIn(region); CodeInfo code_info(region); + ASSERT_EQ(0u, code_info.GetStackMaskSize()); + ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + + uint32_t number_of_location_catalog_entries = + code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + ASSERT_EQ(1u, number_of_location_catalog_entries); + DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(); + // The Dex register location catalog contains: + // - one 5-byte large Dex register location. + size_t expected_location_catalog_size = 5u; + ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); + ASSERT_EQ(0u, stack_map.GetDexPc(code_info)); + ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info)); + ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info)); + ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); - DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2); - ASSERT_EQ(DexRegisterLocation::Kind::kNone, - dex_registers.GetLocationKind(0, number_of_dex_registers)); - ASSERT_EQ(DexRegisterLocation::Kind::kConstant, - dex_registers.GetLocationKind(1, number_of_dex_registers)); - ASSERT_EQ(-2, dex_registers.GetConstant(1, number_of_dex_registers)); + DexRegisterMap dex_register_map = + code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers); + ASSERT_FALSE(dex_register_map.IsDexRegisterLive(0)); + ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); + ASSERT_EQ(1u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); + // The Dex register map contains: + // - one 1-byte live bit mask. + // No space is allocated for the sole location catalog entry index, as it is useless. + size_t expected_dex_register_map_size = 1u + 0u; + ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); + + ASSERT_EQ(Kind::kNone, + dex_register_map.GetLocationKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstant, + dex_register_map.GetLocationKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kNone, + dex_register_map.GetLocationInternalKind(0, number_of_dex_registers, code_info)); + ASSERT_EQ(Kind::kConstantLargeValue, + dex_register_map.GetLocationInternalKind(1, number_of_dex_registers, code_info)); + ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( + 0, number_of_dex_registers, number_of_location_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( + 1, number_of_dex_registers, number_of_location_catalog_entries); + ASSERT_EQ(DexRegisterLocationCatalog::kNoLocationEntryIndex, index0); + ASSERT_EQ(0u, index1); + DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); + DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + ASSERT_EQ(Kind::kNone, location0.GetKind()); + ASSERT_EQ(Kind::kConstant, location1.GetKind()); + ASSERT_EQ(Kind::kNone, location0.GetInternalKind()); + ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); + ASSERT_EQ(0, location0.GetValue()); + ASSERT_EQ(-2, location1.GetValue()); + ASSERT_FALSE(stack_map.HasInlineInfo(code_info)); } @@ -209,14 +351,21 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { StackMapStream stream(&arena); ArenaBitVector sp_mask(&arena, 0, false); - uint32_t number_of_dex_registers = 0xEA; + uint32_t number_of_dex_registers = 1024; + // Create the first stack map (and its Dex register map). stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - for (uint32_t i = 0; i < number_of_dex_registers - 9; ++i) { - stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0); + uint32_t number_of_dex_live_registers_in_dex_register_map_0 = number_of_dex_registers - 8; + for (uint32_t i = 0; i < number_of_dex_live_registers_in_dex_register_map_0; ++i) { + // Use two different Dex register locations to populate this map, + // as using a single value (in the whole CodeInfo object) would + // make this Dex register mapping data empty (see + // art::DexRegisterMap::SingleEntrySizeInBits). + stream.AddDexRegisterEntry(i, Kind::kConstant, i % 2); // Short location. } + // Create the second stack map (and its Dex register map). stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); for (uint32_t i = 0; i < number_of_dex_registers; ++i) { - stream.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0); + stream.AddDexRegisterEntry(i, Kind::kConstant, 0); // Short location. } size_t size = stream.ComputeNeededSize(); @@ -225,10 +374,35 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { stream.FillIn(region); CodeInfo code_info(region); - StackMap stack_map = code_info.GetStackMapAt(1); - ASSERT_TRUE(stack_map.HasDexRegisterMap(code_info)); - ASSERT_NE(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMap); - ASSERT_EQ(stack_map.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMapSmallEncoding); + // The location catalog contains two entries (DexRegisterLocation(kConstant, 0) + // and DexRegisterLocation(kConstant, 1)), therefore the location catalog index + // has a size of 1 bit. + uint32_t number_of_location_catalog_entries = + code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + ASSERT_EQ(2u, number_of_location_catalog_entries); + ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_location_catalog_entries)); + + // The first Dex register map contains: + // - a live register bit mask for 1024 registers (that is, 128 bytes of + // data); and + // - Dex register mapping information for 1016 1-bit Dex (live) register + // locations (that is, 127 bytes of data). + // Hence it has a size of 255 bytes, and therefore... + ASSERT_EQ(128u, DexRegisterMap::GetLiveBitMaskSize(number_of_dex_registers)); + StackMap stack_map0 = code_info.GetStackMapAt(0); + DexRegisterMap dex_register_map0 = + code_info.GetDexRegisterMapOf(stack_map0, number_of_dex_registers); + ASSERT_EQ(127u, dex_register_map0.GetLocationMappingDataSize(number_of_dex_registers, + number_of_location_catalog_entries)); + ASSERT_EQ(255u, dex_register_map0.Size()); + + StackMap stack_map1 = code_info.GetStackMapAt(1); + ASSERT_TRUE(stack_map1.HasDexRegisterMap(code_info)); + // ...the offset of the second Dex register map (relative to the + // beginning of the Dex register maps region) is 255 (i.e., + // kNoDexRegisterMapSmallEncoding). + ASSERT_NE(stack_map1.GetDexRegisterMapOffset(code_info), StackMap::kNoDexRegisterMap); + ASSERT_EQ(stack_map1.GetDexRegisterMapOffset(code_info), 0xFFu); } TEST(StackMapTest, TestShareDexRegisterMap) { @@ -240,16 +414,16 @@ TEST(StackMapTest, TestShareDexRegisterMap) { uint32_t number_of_dex_registers = 2; // First stack map. stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 0); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. // Second stack map, which should share the same dex register map. stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 0); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. // Third stack map (doesn't share the dex register map). stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, DexRegisterLocation::Kind::kInRegister, 2); - stream.AddDexRegisterEntry(1, DexRegisterLocation::Kind::kConstant, -2); + stream.AddDexRegisterEntry(0, Kind::kInRegister, 2); // Short location. + stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. size_t size = stream.ComputeNeededSize(); void* memory = arena.Alloc(size, kArenaAllocMisc); @@ -260,20 +434,20 @@ TEST(StackMapTest, TestShareDexRegisterMap) { // Verify first stack map. StackMap sm0 = ci.GetStackMapAt(0); DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, number_of_dex_registers); - ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers)); - ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers)); + ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers, ci)); + ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers, ci)); // Verify second stack map. StackMap sm1 = ci.GetStackMapAt(1); DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1, number_of_dex_registers); - ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers)); - ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers)); + ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers, ci)); + ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers, ci)); // Verify third stack map. StackMap sm2 = ci.GetStackMapAt(2); DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2, number_of_dex_registers); - ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers)); - ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers)); + ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers, ci)); + ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers, ci)); // Verify dex register map offsets. ASSERT_EQ(sm0.GetDexRegisterMapOffset(ci), sm1.GetDexRegisterMapOffset(ci)); @@ -281,4 +455,39 @@ TEST(StackMapTest, TestShareDexRegisterMap) { ASSERT_NE(sm1.GetDexRegisterMapOffset(ci), sm2.GetDexRegisterMapOffset(ci)); } +TEST(StackMapTest, TestNoDexRegisterMap) { + ArenaPool pool; + ArenaAllocator arena(&pool); + StackMapStream stream(&arena); + + ArenaBitVector sp_mask(&arena, 0, false); + uint32_t number_of_dex_registers = 0; + stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + + size_t size = stream.ComputeNeededSize(); + void* memory = arena.Alloc(size, kArenaAllocMisc); + MemoryRegion region(memory, size); + stream.FillIn(region); + + CodeInfo code_info(region); + ASSERT_EQ(0u, code_info.GetStackMaskSize()); + ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + + uint32_t number_of_location_catalog_entries = + code_info.GetNumberOfDexRegisterLocationCatalogEntries(); + ASSERT_EQ(0u, number_of_location_catalog_entries); + DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(); + ASSERT_EQ(0u, location_catalog.Size()); + + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64))); + ASSERT_EQ(0u, stack_map.GetDexPc(code_info)); + ASSERT_EQ(64u, stack_map.GetNativePcOffset(code_info)); + ASSERT_EQ(0x3u, stack_map.GetRegisterMask(code_info)); + + ASSERT_FALSE(stack_map.HasDexRegisterMap(code_info)); + ASSERT_FALSE(stack_map.HasInlineInfo(code_info)); +} + } // namespace art diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc index a02191bc13..c41066027d 100644 --- a/compiler/utils/arm/assembler_arm.cc +++ b/compiler/utils/arm/assembler_arm.cc @@ -89,7 +89,6 @@ uint32_t ShifterOperand::encodingArm() const { } else { return immed_; } - break; case kRegister: if (is_shift_) { uint32_t shift_type; @@ -121,7 +120,6 @@ uint32_t ShifterOperand::encodingArm() const { // Simple register return static_cast<uint32_t>(rm_); } - break; default: // Can't get here. LOG(FATAL) << "Invalid shifter operand for ARM"; @@ -156,13 +154,11 @@ uint32_t ShifterOperand::encodingThumb() const { // Simple register return static_cast<uint32_t>(rm_); } - break; default: // Can't get here. LOG(FATAL) << "Invalid shifter operand for thumb"; - return 0; + UNREACHABLE(); } - return 0; } uint32_t Address::encodingArm() const { @@ -374,40 +370,46 @@ void ArmAssembler::Pad(uint32_t bytes) { } } +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::ArmCore(static_cast<int>(reg)); +} + +static dwarf::Reg DWARFReg(SRegister reg) { + return dwarf::Reg::ArmFp(static_cast<int>(reg)); +} + constexpr size_t kFramePointerSize = 4; void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) { + CHECK_EQ(buffer_.Size(), 0U); // Nothing emitted yet CHECK_ALIGNED(frame_size, kStackAlignment); CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister()); // Push callee saves and link register. - RegList push_list = 1 << LR; - size_t pushed_values = 1; - int32_t min_s = kNumberOfSRegisters; - int32_t max_s = -1; - for (size_t i = 0; i < callee_save_regs.size(); i++) { - if (callee_save_regs.at(i).AsArm().IsCoreRegister()) { - Register reg = callee_save_regs.at(i).AsArm().AsCoreRegister(); - push_list |= 1 << reg; - pushed_values++; + RegList core_spill_mask = 1 << LR; + uint32_t fp_spill_mask = 0; + for (const ManagedRegister& reg : callee_save_regs) { + if (reg.AsArm().IsCoreRegister()) { + core_spill_mask |= 1 << reg.AsArm().AsCoreRegister(); } else { - CHECK(callee_save_regs.at(i).AsArm().IsSRegister()); - min_s = std::min(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), min_s); - max_s = std::max(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), max_s); + fp_spill_mask |= 1 << reg.AsArm().AsSRegister(); } } - PushList(push_list); - if (max_s != -1) { - pushed_values += 1 + max_s - min_s; - vpushs(static_cast<SRegister>(min_s), 1 + max_s - min_s); + PushList(core_spill_mask); + cfi_.AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize); + cfi_.RelOffsetForMany(DWARFReg(Register(0)), 0, core_spill_mask, kFramePointerSize); + if (fp_spill_mask != 0) { + vpushs(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask)); + cfi_.AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize); + cfi_.RelOffsetForMany(DWARFReg(SRegister(0)), 0, fp_spill_mask, kFramePointerSize); } // Increase frame to required size. + int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); CHECK_GT(frame_size, pushed_values * kFramePointerSize); // Must at least have space for Method*. - size_t adjust = frame_size - (pushed_values * kFramePointerSize); - IncreaseFrameSize(adjust); + IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize); // handles CFI as well. // Write out Method*. StoreToOffset(kStoreWord, R0, SP, 0); @@ -436,46 +438,46 @@ void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void ArmAssembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + cfi_.RememberState(); + // Compute callee saves to pop and PC. - RegList pop_list = 1 << PC; - size_t pop_values = 1; - int32_t min_s = kNumberOfSRegisters; - int32_t max_s = -1; - for (size_t i = 0; i < callee_save_regs.size(); i++) { - if (callee_save_regs.at(i).AsArm().IsCoreRegister()) { - Register reg = callee_save_regs.at(i).AsArm().AsCoreRegister(); - pop_list |= 1 << reg; - pop_values++; + RegList core_spill_mask = 1 << PC; + uint32_t fp_spill_mask = 0; + for (const ManagedRegister& reg : callee_save_regs) { + if (reg.AsArm().IsCoreRegister()) { + core_spill_mask |= 1 << reg.AsArm().AsCoreRegister(); } else { - CHECK(callee_save_regs.at(i).AsArm().IsSRegister()); - min_s = std::min(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), min_s); - max_s = std::max(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), max_s); + fp_spill_mask |= 1 << reg.AsArm().AsSRegister(); } } - if (max_s != -1) { - pop_values += 1 + max_s - min_s; - } - // Decrease frame to start of callee saves. + int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); CHECK_GT(frame_size, pop_values * kFramePointerSize); - size_t adjust = frame_size - (pop_values * kFramePointerSize); - DecreaseFrameSize(adjust); + DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize)); // handles CFI as well. - if (max_s != -1) { - vpops(static_cast<SRegister>(min_s), 1 + max_s - min_s); + if (fp_spill_mask != 0) { + vpops(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask)); + cfi_.AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask)); + cfi_.RestoreMany(DWARFReg(SRegister(0)), fp_spill_mask); } // Pop callee saves and PC. - PopList(pop_list); + PopList(core_spill_mask); + + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size); } void ArmAssembler::IncreaseFrameSize(size_t adjust) { AddConstant(SP, -adjust); + cfi_.AdjustCFAOffset(adjust); } void ArmAssembler::DecreaseFrameSize(size_t adjust) { AddConstant(SP, adjust); + cfi_.AdjustCFAOffset(-adjust); } void ArmAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) { diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index a894319c99..3b42f63509 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -373,24 +373,34 @@ void Thumb2Assembler::ldrsh(Register rd, const Address& ad, Condition cond) { void Thumb2Assembler::ldrd(Register rd, const Address& ad, Condition cond) { + ldrd(rd, Register(rd + 1), ad, cond); +} + + +void Thumb2Assembler::ldrd(Register rd, Register rd2, const Address& ad, Condition cond) { CheckCondition(cond); - CHECK_EQ(rd % 2, 0); + // Encoding T1. // This is different from other loads. The encoding is like ARM. int32_t encoding = B31 | B30 | B29 | B27 | B22 | B20 | static_cast<int32_t>(rd) << 12 | - (static_cast<int32_t>(rd) + 1) << 8 | + static_cast<int32_t>(rd2) << 8 | ad.encodingThumbLdrdStrd(); Emit32(encoding); } void Thumb2Assembler::strd(Register rd, const Address& ad, Condition cond) { + strd(rd, Register(rd + 1), ad, cond); +} + + +void Thumb2Assembler::strd(Register rd, Register rd2, const Address& ad, Condition cond) { CheckCondition(cond); - CHECK_EQ(rd % 2, 0); + // Encoding T1. // This is different from other loads. The encoding is like ARM. int32_t encoding = B31 | B30 | B29 | B27 | B22 | static_cast<int32_t>(rd) << 12 | - (static_cast<int32_t>(rd) + 1) << 8 | + static_cast<int32_t>(rd2) << 8 | ad.encodingThumbLdrdStrd(); Emit32(encoding); } @@ -683,7 +693,7 @@ void Thumb2Assembler::Emit16(int16_t value) { bool Thumb2Assembler::Is32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED, Opcode opcode, - bool set_cc ATTRIBUTE_UNUSED, + bool set_cc, Register rn, Register rd, const ShifterOperand& so) { @@ -749,7 +759,6 @@ bool Thumb2Assembler::Is32BitDataProcessing(Condition cond ATTRIBUTE_UNUSED, break; case TEQ: return true; - break; case ADD: case SUB: break; @@ -2614,14 +2623,16 @@ void Thumb2Assembler::StoreToOffset(StoreOperandType type, Register tmp_reg = kNoRegister; if (!Address::CanHoldStoreOffsetThumb(type, offset)) { CHECK_NE(base, IP); - if (reg != IP) { + if (reg != IP && + (type != kStoreWordPair || reg + 1 != IP)) { tmp_reg = IP; } else { - // Be careful not to use IP twice (for `reg` and to build the - // Address object used by the store instruction(s) below). - // Instead, save R5 on the stack (or R6 if R5 is not available), - // use it as secondary temporary register, and restore it after - // the store instruction has been emitted. + // Be careful not to use IP twice (for `reg` (or `reg` + 1 in + // the case of a word-pair store)) and to build the Address + // object used by the store instruction(s) below). Instead, + // save R5 on the stack (or R6 if R5 is not available), use it + // as secondary temporary register, and restore it after the + // store instruction has been emitted. tmp_reg = base != R5 ? R5 : R6; Push(tmp_reg); if (base == SP) { diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 81dd13894f..e33c240dbf 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -135,9 +135,17 @@ class Thumb2Assembler FINAL : public ArmAssembler { void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; + // Load/store register dual instructions using registers `rd` and `rd` + 1. void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; + // Load/store register dual instructions using registers `rd` and `rd2`. + // Note that contrary to the ARM A1 encoding, the Thumb-2 T1 encoding + // does not require `rd` to be even, nor `rd2' to be equal to `rd` + 1. + void ldrd(Register rd, Register rd2, const Address& ad, Condition cond); + void strd(Register rd, Register rd2, const Address& ad, Condition cond); + + void ldm(BlockAddressMode am, Register base, RegList regs, Condition cond = AL) OVERRIDE; void stm(BlockAddressMode am, Register base, diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 813996b0db..5f5561a499 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -247,4 +247,103 @@ TEST_F(AssemblerThumb2Test, add) { DriverStr(expected, "add"); } +TEST_F(AssemblerThumb2Test, StoreWordToThumbOffset) { + arm::StoreOperandType type = arm::kStoreWord; + int32_t offset = 4092; + ASSERT_TRUE(arm::Address::CanHoldStoreOffsetThumb(type, offset)); + + __ StoreToOffset(type, arm::R0, arm::SP, offset); + __ StoreToOffset(type, arm::IP, arm::SP, offset); + __ StoreToOffset(type, arm::IP, arm::R5, offset); + + const char* expected = + "str r0, [sp, #4092]\n" + "str ip, [sp, #4092]\n" + "str ip, [r5, #4092]\n"; + DriverStr(expected, "StoreWordToThumbOffset"); +} + +TEST_F(AssemblerThumb2Test, StoreWordToNonThumbOffset) { + arm::StoreOperandType type = arm::kStoreWord; + int32_t offset = 4096; + ASSERT_FALSE(arm::Address::CanHoldStoreOffsetThumb(type, offset)); + + __ StoreToOffset(type, arm::R0, arm::SP, offset); + __ StoreToOffset(type, arm::IP, arm::SP, offset); + __ StoreToOffset(type, arm::IP, arm::R5, offset); + + const char* expected = + "mov ip, #4096\n" // LoadImmediate(ip, 4096) + "add ip, ip, sp\n" + "str r0, [ip, #0]\n" + + "str r5, [sp, #-4]!\n" // Push(r5) + "movw r5, #4100\n" // LoadImmediate(r5, 4096 + kRegisterSize) + "add r5, r5, sp\n" + "str ip, [r5, #0]\n" + "ldr r5, [sp], #4\n" // Pop(r5) + + "str r6, [sp, #-4]!\n" // Push(r6) + "mov r6, #4096\n" // LoadImmediate(r6, 4096) + "add r6, r6, r5\n" + "str ip, [r6, #0]\n" + "ldr r6, [sp], #4\n"; // Pop(r6) + DriverStr(expected, "StoreWordToNonThumbOffset"); +} + +TEST_F(AssemblerThumb2Test, StoreWordPairToThumbOffset) { + arm::StoreOperandType type = arm::kStoreWordPair; + int32_t offset = 1020; + ASSERT_TRUE(arm::Address::CanHoldStoreOffsetThumb(type, offset)); + + __ StoreToOffset(type, arm::R0, arm::SP, offset); + // We cannot use IP (i.e. R12) as first source register, as it would + // force us to use SP (i.e. R13) as second source register, which + // would have an "unpredictable" effect according to the ARMv7 + // specification (the T1 encoding describes the result as + // UNPREDICTABLE when of the source registers is R13). + // + // So we use (R11, IP) (e.g. (R11, R12)) as source registers in the + // following instructions. + __ StoreToOffset(type, arm::R11, arm::SP, offset); + __ StoreToOffset(type, arm::R11, arm::R5, offset); + + const char* expected = + "strd r0, r1, [sp, #1020]\n" + "strd r11, ip, [sp, #1020]\n" + "strd r11, ip, [r5, #1020]\n"; + DriverStr(expected, "StoreWordPairToThumbOffset"); +} + +TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) { + arm::StoreOperandType type = arm::kStoreWordPair; + int32_t offset = 1024; + ASSERT_FALSE(arm::Address::CanHoldStoreOffsetThumb(type, offset)); + + __ StoreToOffset(type, arm::R0, arm::SP, offset); + // Same comment as in AssemblerThumb2Test.StoreWordPairToThumbOffset + // regarding the use of (R11, IP) (e.g. (R11, R12)) as source + // registers in the following instructions. + __ StoreToOffset(type, arm::R11, arm::SP, offset); + __ StoreToOffset(type, arm::R11, arm::R5, offset); + + const char* expected = + "mov ip, #1024\n" // LoadImmediate(ip, 1024) + "add ip, ip, sp\n" + "strd r0, r1, [ip, #0]\n" + + "str r5, [sp, #-4]!\n" // Push(r5) + "movw r5, #1028\n" // LoadImmediate(r5, 1024 + kRegisterSize) + "add r5, r5, sp\n" + "strd r11, ip, [r5, #0]\n" + "ldr r5, [sp], #4\n" // Pop(r5) + + "str r6, [sp, #-4]!\n" // Push(r6) + "mov r6, #1024\n" // LoadImmediate(r6, 1024) + "add r6, r6, r5\n" + "strd r11, ip, [r6, #0]\n" + "ldr r6, [sp], #4\n"; // Pop(r6) + DriverStr(expected, "StoreWordPairToNonThumbOffset"); +} + } // namespace art diff --git a/compiler/utils/arm/managed_register_arm.h b/compiler/utils/arm/managed_register_arm.h index a496c87150..5fde9e8856 100644 --- a/compiler/utils/arm/managed_register_arm.h +++ b/compiler/utils/arm/managed_register_arm.h @@ -19,6 +19,7 @@ #include "base/logging.h" #include "constants_arm.h" +#include "dwarf/register.h" #include "utils/managed_register.h" namespace art { diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index 58c73674da..fbd04114e4 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -63,12 +63,14 @@ void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scr void Arm64Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); AddConstant(SP, -adjust); + cfi().AdjustCFAOffset(adjust); } // See Arm64 PCS Section 5.2.2.1. void Arm64Assembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); AddConstant(SP, adjust); + cfi().AdjustCFAOffset(-adjust); } void Arm64Assembler::AddConstant(XRegister rd, int32_t value, Condition cond) { @@ -638,6 +640,14 @@ void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) { ___ Brk(); } +static dwarf::Reg DWARFReg(XRegister reg) { + return dwarf::Reg::Arm64Core(static_cast<int>(reg)); +} + +static dwarf::Reg DWARFReg(DRegister reg) { + return dwarf::Reg::Arm64Fp(static_cast<int>(reg)); +} + constexpr size_t kFramePointerSize = 8; constexpr unsigned int kJniRefSpillRegsSize = 11 + 8; @@ -660,45 +670,20 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, // TUNING: Use stp. // Note: Must match Arm64JniCallingConvention::CoreSpillMask(). size_t reg_offset = frame_size; - reg_offset -= 8; - StoreToOffset(LR, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X29, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X28, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X27, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X26, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X25, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X24, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X23, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X22, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X21, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X20, SP, reg_offset); - - reg_offset -= 8; - StoreDToOffset(D15, SP, reg_offset); - reg_offset -= 8; - StoreDToOffset(D14, SP, reg_offset); - reg_offset -= 8; - StoreDToOffset(D13, SP, reg_offset); - reg_offset -= 8; - StoreDToOffset(D12, SP, reg_offset); - reg_offset -= 8; - StoreDToOffset(D11, SP, reg_offset); - reg_offset -= 8; - StoreDToOffset(D10, SP, reg_offset); - reg_offset -= 8; - StoreDToOffset(D9, SP, reg_offset); - reg_offset -= 8; - StoreDToOffset(D8, SP, reg_offset); + static constexpr XRegister x_spills[] = { + LR, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20 }; + for (size_t i = 0; i < arraysize(x_spills); i++) { + XRegister reg = x_spills[i]; + reg_offset -= 8; + StoreToOffset(reg, SP, reg_offset); + cfi_.RelOffset(DWARFReg(reg), reg_offset); + } + for (int d = 15; d >= 8; d--) { + DRegister reg = static_cast<DRegister>(d); + reg_offset -= 8; + StoreDToOffset(reg, SP, reg_offset); + cfi_.RelOffset(DWARFReg(reg), reg_offset); + } // Move TR(Caller saved) to ETR(Callee saved). The original (ETR)X21 has been saved on stack. // This way we make sure that TR is not trashed by native code. @@ -734,6 +719,7 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void Arm64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + cfi_.RememberState(); // For now we only check that the size of the frame is greater than the spill size. CHECK_EQ(callee_save_regs.size(), kJniRefSpillRegsSize); @@ -748,51 +734,30 @@ void Arm64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedReg // TUNING: Use ldp. // Note: Must match Arm64JniCallingConvention::CoreSpillMask(). size_t reg_offset = frame_size; - reg_offset -= 8; - LoadFromOffset(LR, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X29, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X28, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X27, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X26, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X25, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X24, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X23, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X22, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X21, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X20, SP, reg_offset); - - reg_offset -= 8; - LoadDFromOffset(D15, SP, reg_offset); - reg_offset -= 8; - LoadDFromOffset(D14, SP, reg_offset); - reg_offset -= 8; - LoadDFromOffset(D13, SP, reg_offset); - reg_offset -= 8; - LoadDFromOffset(D12, SP, reg_offset); - reg_offset -= 8; - LoadDFromOffset(D11, SP, reg_offset); - reg_offset -= 8; - LoadDFromOffset(D10, SP, reg_offset); - reg_offset -= 8; - LoadDFromOffset(D9, SP, reg_offset); - reg_offset -= 8; - LoadDFromOffset(D8, SP, reg_offset); + static constexpr XRegister x_spills[] = { + LR, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20 }; + for (size_t i = 0; i < arraysize(x_spills); i++) { + XRegister reg = x_spills[i]; + reg_offset -= 8; + LoadFromOffset(reg, SP, reg_offset); + cfi_.Restore(DWARFReg(reg)); + } + for (int d = 15; d >= 8; d--) { + DRegister reg = static_cast<DRegister>(d); + reg_offset -= 8; + LoadDFromOffset(reg, SP, reg_offset); + cfi_.Restore(DWARFReg(reg)); + } // Decrease frame size to start of callee saved regs. DecreaseFrameSize(frame_size); // Pop callee saved and return to LR. ___ Ret(); + + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size); } } // namespace arm64 diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index a69be2599e..8973b9ca8a 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -30,9 +30,11 @@ // TODO: make vixl clean wrt -Wshadow. #pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunknown-pragmas" #pragma GCC diagnostic ignored "-Wshadow" -#include "a64/macro-assembler-a64.h" -#include "a64/disasm-a64.h" +#pragma GCC diagnostic ignored "-Wmissing-noreturn" +#include "vixl/a64/macro-assembler-a64.h" +#include "vixl/a64/disasm-a64.h" #pragma GCC diagnostic pop namespace art { diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h index e1d6f3179d..62c1d4dbee 100644 --- a/compiler/utils/arm64/managed_register_arm64.h +++ b/compiler/utils/arm64/managed_register_arm64.h @@ -19,6 +19,7 @@ #include "base/logging.h" #include "constants_arm64.h" +#include "dwarf/register.h" #include "utils/managed_register.h" namespace art { diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h index b1b0ee5e53..ff5a77c97a 100644 --- a/compiler/utils/array_ref.h +++ b/compiler/utils/array_ref.h @@ -89,6 +89,8 @@ class ArrayRef { : array_(v.data()), size_(v.size()) { } + ArrayRef(const ArrayRef&) = default; + // Assignment operators. ArrayRef& operator=(const ArrayRef& other) { diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc index 5340dd3a25..36342c61c5 100644 --- a/compiler/utils/assembler.cc +++ b/compiler/utils/assembler.cc @@ -105,6 +105,9 @@ void AssemblerBuffer::ExtendCapacity() { CHECK_EQ(Size(), old_size); } +void DebugFrameOpCodeWriterForAssembler::ImplicitlyAdvancePC() { + this->AdvancePC(assembler_->CodeSize()); +} Assembler* Assembler::Create(InstructionSet instruction_set) { switch (instruction_set) { diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 923ecdbd9d..ebafd3dd1e 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -29,6 +29,7 @@ #include "offsets.h" #include "x86/constants_x86.h" #include "x86_64/constants_x86_64.h" +#include "dwarf/debug_frame_opcode_writer.h" namespace art { @@ -354,6 +355,23 @@ class AssemblerBuffer { friend class AssemblerFixup; }; +// The purpose of this class is to ensure that we do not have to explicitly +// call the AdvancePC method (which is good for convenience and correctness). +class DebugFrameOpCodeWriterForAssembler FINAL + : public dwarf::DebugFrameOpCodeWriter<> { + public: + // This method is called the by the opcode writers. + virtual void ImplicitlyAdvancePC() FINAL; + + explicit DebugFrameOpCodeWriterForAssembler(Assembler* buffer) + : dwarf::DebugFrameOpCodeWriter<>(), + assembler_(buffer) { + } + + private: + Assembler* assembler_; +}; + class Assembler { public: static Assembler* Create(InstructionSet instruction_set); @@ -504,18 +522,20 @@ class Assembler { // and branch to a ExceptionSlowPath if it is. virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0; - virtual void InitializeFrameDescriptionEntry() {} - virtual void FinalizeFrameDescriptionEntry() {} - // Give a vector containing FDE data, or null if not used. Note: the assembler must take care - // of handling the lifecycle. - virtual std::vector<uint8_t>* GetFrameDescriptionEntry() { return nullptr; } - virtual ~Assembler() {} + /** + * @brief Buffer of DWARF's Call Frame Information opcodes. + * @details It is used by debuggers and other tools to unwind the call stack. + */ + DebugFrameOpCodeWriterForAssembler& cfi() { return cfi_; } + protected: - Assembler() : buffer_() {} + Assembler() : buffer_(), cfi_(this) {} AssemblerBuffer buffer_; + + DebugFrameOpCodeWriterForAssembler cfi_; }; } // namespace art diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index 6f8b3012a4..3fe1a31d70 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -44,7 +44,9 @@ static std::string tmpnam_; enum class RegisterView { // private kUsePrimaryName, - kUseSecondaryName + kUseSecondaryName, + kUseTertiaryName, + kUseQuaternaryName, }; template<typename Ass, typename Reg, typename FPReg, typename Imm> @@ -97,6 +99,15 @@ class AssemblerTest : public testing::Test { fmt); } + std::string Repeatrb(void (Ass::*f)(Reg, Reg), std::string fmt) { + return RepeatTemplatedRegisters<Reg, Reg>(f, + GetRegisters(), + GetRegisters(), + &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>, + &AssemblerTest::GetRegName<RegisterView::kUseQuaternaryName>, + fmt); + } + std::string RepeatRr(void (Ass::*f)(Reg, Reg), std::string fmt) { return RepeatTemplatedRegisters<Reg, Reg>(f, GetRegisters(), @@ -123,6 +134,16 @@ class AssemblerTest : public testing::Test { fmt); } + std::string RepeatFFI(void (Ass::*f)(FPReg, FPReg, const Imm&), size_t imm_bytes, std::string fmt) { + return RepeatTemplatedRegistersImm<FPReg, FPReg>(f, + GetFPRegisters(), + GetFPRegisters(), + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + imm_bytes, + fmt); + } + std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) { return RepeatTemplatedRegisters<FPReg, Reg>(f, GetFPRegisters(), @@ -230,6 +251,18 @@ class AssemblerTest : public testing::Test { UNREACHABLE(); } + // Tertiary register names are the tertiary view on registers, e.g., 16b on 64b systems. + virtual std::string GetTertiaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "Architecture does not support tertiary registers"; + UNREACHABLE(); + } + + // Quaternary register names are the quaternary view on registers, e.g., 8b on 64b systems. + virtual std::string GetQuaternaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "Architecture does not support quaternary registers"; + UNREACHABLE(); + } + std::string GetRegisterName(const Reg& reg) { return GetRegName<RegisterView::kUsePrimaryName>(reg); } @@ -448,6 +481,57 @@ class AssemblerTest : public testing::Test { return str; } + template <typename Reg1, typename Reg2> + std::string RepeatTemplatedRegistersImm(void (Ass::*f)(Reg1, Reg2, const Imm&), + const std::vector<Reg1*> reg1_registers, + const std::vector<Reg2*> reg2_registers, + std::string (AssemblerTest::*GetName1)(const Reg1&), + std::string (AssemblerTest::*GetName2)(const Reg2&), + size_t imm_bytes, + std::string fmt) { + std::vector<int64_t> imms = CreateImmediateValues(imm_bytes); + WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size()); + + std::string str; + for (auto reg1 : reg1_registers) { + for (auto reg2 : reg2_registers) { + for (int64_t imm : imms) { + Imm new_imm = CreateImmediate(imm); + (assembler_.get()->*f)(*reg1, *reg2, new_imm); + std::string base = fmt; + + std::string reg1_string = (this->*GetName1)(*reg1); + size_t reg1_index; + while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { + base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); + } + + std::string reg2_string = (this->*GetName2)(*reg2); + size_t reg2_index; + while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { + base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); + } + + size_t imm_index = base.find(IMM_TOKEN); + if (imm_index != std::string::npos) { + std::ostringstream sreg; + sreg << imm; + std::string imm_string = sreg.str(); + base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); + } + + if (str.size() > 0) { + str += "\n"; + } + str += base; + } + } + } + // Add a newline at the end. + str += "\n"; + return str; + } + template <RegisterView kRegView> std::string GetRegName(const Reg& reg) { std::ostringstream sreg; @@ -459,6 +543,14 @@ class AssemblerTest : public testing::Test { case RegisterView::kUseSecondaryName: sreg << GetSecondaryRegisterName(reg); break; + + case RegisterView::kUseTertiaryName: + sreg << GetTertiaryRegisterName(reg); + break; + + case RegisterView::kUseQuaternaryName: + sreg << GetQuaternaryRegisterName(reg); + break; } return sreg.str(); } diff --git a/compiler/utils/dex_cache_arrays_layout-inl.h b/compiler/utils/dex_cache_arrays_layout-inl.h new file mode 100644 index 0000000000..7d02ce35d8 --- /dev/null +++ b/compiler/utils/dex_cache_arrays_layout-inl.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_INL_H_ +#define ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_INL_H_ + +#include "dex_cache_arrays_layout.h" + +#include "base/logging.h" +#include "globals.h" +#include "mirror/array-inl.h" +#include "primitive.h" +#include "utils.h" + +namespace mirror { +class ArtField; +class ArtMethod; +class Class; +class String; +} // namespace mirror + +namespace art { + +inline DexCacheArraysLayout::DexCacheArraysLayout(const DexFile* dex_file) + : /* types_offset_ is always 0u */ + methods_offset_(types_offset_ + ArraySize<mirror::Class>(dex_file->NumTypeIds())), + strings_offset_(methods_offset_ + ArraySize<mirror::ArtMethod>(dex_file->NumMethodIds())), + fields_offset_(strings_offset_ + ArraySize<mirror::String>(dex_file->NumStringIds())), + size_(fields_offset_ + ArraySize<mirror::ArtField>(dex_file->NumFieldIds())) { +} + +inline size_t DexCacheArraysLayout::TypeOffset(uint32_t type_idx) const { + return types_offset_ + ElementOffset<mirror::Class>(type_idx); +} + +inline size_t DexCacheArraysLayout::MethodOffset(uint32_t method_idx) const { + return methods_offset_ + ElementOffset<mirror::ArtMethod>(method_idx); +} + +inline size_t DexCacheArraysLayout::StringOffset(uint32_t string_idx) const { + return strings_offset_ + ElementOffset<mirror::String>(string_idx); +} + +inline size_t DexCacheArraysLayout::FieldOffset(uint32_t field_idx) const { + return fields_offset_ + ElementOffset<mirror::ArtField>(field_idx); +} + +template <typename MirrorType> +inline size_t DexCacheArraysLayout::ElementOffset(uint32_t idx) { + return mirror::Array::DataOffset(sizeof(mirror::HeapReference<MirrorType>)).Uint32Value() + + sizeof(mirror::HeapReference<MirrorType>) * idx; +} + +template <typename MirrorType> +inline size_t DexCacheArraysLayout::ArraySize(uint32_t num_elements) { + size_t array_size = mirror::ComputeArraySize( + num_elements, ComponentSizeShiftWidth<sizeof(mirror::HeapReference<MirrorType>)>()); + DCHECK_NE(array_size, 0u); // No overflow expected for dex cache arrays. + return RoundUp(array_size, kObjectAlignment); +} + +} // namespace art + +#endif // ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_INL_H_ diff --git a/compiler/utils/dex_cache_arrays_layout.h b/compiler/utils/dex_cache_arrays_layout.h new file mode 100644 index 0000000000..b461256f63 --- /dev/null +++ b/compiler/utils/dex_cache_arrays_layout.h @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_H_ +#define ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_H_ + +namespace art { + +/** + * @class DexCacheArraysLayout + * @details This class provides the layout information for the type, method, field and + * string arrays for a DexCache with a fixed arrays' layout (such as in the boot image), + */ +class DexCacheArraysLayout { + public: + // Construct an invalid layout. + DexCacheArraysLayout() + : /* types_offset_ is always 0u */ + methods_offset_(0u), + strings_offset_(0u), + fields_offset_(0u), + size_(0u) { + } + + // Construct a layout for a particular dex file. + explicit DexCacheArraysLayout(const DexFile* dex_file); + + bool Valid() const { + return Size() != 0u; + } + + size_t Size() const { + return size_; + } + + size_t TypesOffset() const { + return types_offset_; + } + + size_t TypeOffset(uint32_t type_idx) const; + + size_t MethodsOffset() const { + return methods_offset_; + } + + size_t MethodOffset(uint32_t method_idx) const; + + size_t StringsOffset() const { + return strings_offset_; + } + + size_t StringOffset(uint32_t string_idx) const; + + size_t FieldsOffset() const { + return fields_offset_; + } + + size_t FieldOffset(uint32_t field_idx) const; + + private: + static constexpr size_t types_offset_ = 0u; + const size_t methods_offset_; + const size_t strings_offset_; + const size_t fields_offset_; + const size_t size_; + + template <typename MirrorType> + static size_t ElementOffset(uint32_t idx); + + template <typename MirrorType> + static size_t ArraySize(uint32_t num_elements); +}; + +} // namespace art + +#endif // ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_H_ diff --git a/compiler/utils/dwarf_cfi.cc b/compiler/utils/dwarf_cfi.cc deleted file mode 100644 index a7e09c6517..0000000000 --- a/compiler/utils/dwarf_cfi.cc +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "leb128.h" -#include "utils.h" - -#include "dwarf_cfi.h" - -namespace art { - -void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment) { - if (increment < 64) { - // Encoding in opcode. - buf->push_back(0x1 << 6 | increment); - } else if (increment < 256) { - // Single byte delta. - buf->push_back(0x02); - buf->push_back(increment); - } else if (increment < 256 * 256) { - // Two byte delta. - buf->push_back(0x03); - buf->push_back(increment & 0xff); - buf->push_back((increment >> 8) & 0xff); - } else { - // Four byte delta. - buf->push_back(0x04); - Push32(buf, increment); - } -} - -void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset) { - buf->push_back(0x11); - EncodeUnsignedLeb128(reg, buf); - EncodeSignedLeb128(offset, buf); -} - -void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset) { - buf->push_back((0x2 << 6) | reg); - EncodeUnsignedLeb128(offset, buf); -} - -void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset) { - buf->push_back(0x0e); - EncodeUnsignedLeb128(offset, buf); -} - -void DW_CFA_remember_state(std::vector<uint8_t>* buf) { - buf->push_back(0x0a); -} - -void DW_CFA_restore_state(std::vector<uint8_t>* buf) { - buf->push_back(0x0b); -} - -void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit) { - // 'length' (filled in by other functions). - if (is_64bit) { - Push32(buf, 0xffffffff); // Indicates 64bit - Push32(buf, 0); - Push32(buf, 0); - } else { - Push32(buf, 0); - } - - // 'CIE_pointer' (filled in by linker). - if (is_64bit) { - Push32(buf, 0); - Push32(buf, 0); - } else { - Push32(buf, 0); - } - - // 'initial_location' (filled in by linker). - if (is_64bit) { - Push32(buf, 0); - Push32(buf, 0); - } else { - Push32(buf, 0); - } - - // 'address_range' (filled in by other functions). - if (is_64bit) { - Push32(buf, 0); - Push32(buf, 0); - } else { - Push32(buf, 0); - } - - // Augmentation length: 0 - buf->push_back(0); -} - -void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint64_t data, bool is_64bit) { - const size_t kOffsetOfAddressRange = is_64bit? 28 : 12; - CHECK(buf->size() >= kOffsetOfAddressRange + (is_64bit? 8 : 4)); - - uint8_t *p = buf->data() + kOffsetOfAddressRange; - if (is_64bit) { - p[0] = data; - p[1] = data >> 8; - p[2] = data >> 16; - p[3] = data >> 24; - p[4] = data >> 32; - p[5] = data >> 40; - p[6] = data >> 48; - p[7] = data >> 56; - } else { - p[0] = data; - p[1] = data >> 8; - p[2] = data >> 16; - p[3] = data >> 24; - } -} - -void WriteCFILength(std::vector<uint8_t>* buf, bool is_64bit) { - uint64_t length = is_64bit ? buf->size() - 12 : buf->size() - 4; - DCHECK_EQ((length & 0x3), 0U); - - uint8_t *p = is_64bit? buf->data() + 4 : buf->data(); - if (is_64bit) { - p[0] = length; - p[1] = length >> 8; - p[2] = length >> 16; - p[3] = length >> 24; - p[4] = length >> 32; - p[5] = length >> 40; - p[6] = length >> 48; - p[7] = length >> 56; - } else { - p[0] = length; - p[1] = length >> 8; - p[2] = length >> 16; - p[3] = length >> 24; - } -} - -void PadCFI(std::vector<uint8_t>* buf) { - while (buf->size() & 0x3) { - buf->push_back(0); - } -} - -} // namespace art diff --git a/compiler/utils/dwarf_cfi.h b/compiler/utils/dwarf_cfi.h deleted file mode 100644 index 0c8b1516dd..0000000000 --- a/compiler/utils/dwarf_cfi.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_DWARF_CFI_H_ -#define ART_COMPILER_UTILS_DWARF_CFI_H_ - -#include <vector> - -namespace art { - -/** - * @brief Enter a 'DW_CFA_advance_loc' into an FDE buffer - * @param buf FDE buffer. - * @param increment Amount by which to increase the current location. - */ -void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment); - -/** - * @brief Enter a 'DW_CFA_offset_extended_sf' into an FDE buffer - * @param buf FDE buffer. - * @param reg Register number. - * @param offset Offset of register address from CFA. - */ -void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset); - -/** - * @brief Enter a 'DW_CFA_offset' into an FDE buffer - * @param buf FDE buffer. - * @param reg Register number. - * @param offset Offset of register address from CFA. - */ -void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset); - -/** - * @brief Enter a 'DW_CFA_def_cfa_offset' into an FDE buffer - * @param buf FDE buffer. - * @param offset New offset of CFA. - */ -void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset); - -/** - * @brief Enter a 'DW_CFA_remember_state' into an FDE buffer - * @param buf FDE buffer. - */ -void DW_CFA_remember_state(std::vector<uint8_t>* buf); - -/** - * @brief Enter a 'DW_CFA_restore_state' into an FDE buffer - * @param buf FDE buffer. - */ -void DW_CFA_restore_state(std::vector<uint8_t>* buf); - -/** - * @brief Write FDE header into an FDE buffer - * @param buf FDE buffer. - * @param is_64bit If FDE is for 64bit application. - */ -void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit); - -/** - * @brief Set 'address_range' field of an FDE buffer - * @param buf FDE buffer. - * @param data Data value. - * @param is_64bit If FDE is for 64bit application. - */ -void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint64_t data, bool is_64bit); - -/** - * @brief Set 'length' field of an FDE buffer - * @param buf FDE buffer. - * @param is_64bit If FDE is for 64bit application. - */ -void WriteCFILength(std::vector<uint8_t>* buf, bool is_64bit); - -/** - * @brief Pad an FDE buffer with 0 until its size is a multiple of 4 - * @param buf FDE buffer. - */ -void PadCFI(std::vector<uint8_t>* buf); -} // namespace art - -#endif // ART_COMPILER_UTILS_DWARF_CFI_H_ diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index b5437b0eda..709a911f6a 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -536,6 +536,10 @@ void MipsAssembler::StoreDToOffset(DRegister reg, Register base, int32_t offset) Sdc1(reg, base, offset); } +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::MipsCore(static_cast<int>(reg)); +} + constexpr size_t kFramePointerSize = 4; void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, @@ -549,10 +553,12 @@ void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, // Push callee saves and return address int stack_offset = frame_size - kFramePointerSize; StoreToOffset(kStoreWord, RA, SP, stack_offset); + cfi_.RelOffset(DWARFReg(RA), stack_offset); for (int i = callee_save_regs.size() - 1; i >= 0; --i) { stack_offset -= kFramePointerSize; Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister(); StoreToOffset(kStoreWord, reg, SP, stack_offset); + cfi_.RelOffset(DWARFReg(reg), stack_offset); } // Write out Method*. @@ -568,31 +574,40 @@ void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void MipsAssembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + cfi_.RememberState(); // Pop callee saves and return address int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize; for (size_t i = 0; i < callee_save_regs.size(); ++i) { Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister(); LoadFromOffset(kLoadWord, reg, SP, stack_offset); + cfi_.Restore(DWARFReg(reg)); stack_offset += kFramePointerSize; } LoadFromOffset(kLoadWord, RA, SP, stack_offset); + cfi_.Restore(DWARFReg(RA)); // Decrease frame to required size. DecreaseFrameSize(frame_size); // Then jump to the return address. Jr(RA); + + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size); } void MipsAssembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); AddConstant(SP, SP, -adjust); + cfi_.AdjustCFAOffset(adjust); } void MipsAssembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); AddConstant(SP, SP, adjust); + cfi_.AdjustCFAOffset(-adjust); } void MipsAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) { diff --git a/compiler/utils/mips/managed_register_mips.h b/compiler/utils/mips/managed_register_mips.h index dd55cc4e6a..40d39e3386 100644 --- a/compiler/utils/mips/managed_register_mips.h +++ b/compiler/utils/mips/managed_register_mips.h @@ -18,6 +18,7 @@ #define ART_COMPILER_UTILS_MIPS_MANAGED_REGISTER_MIPS_H_ #include "constants_mips.h" +#include "dwarf/register.h" #include "utils/managed_register.h" namespace art { diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 233ae7db3c..282ab96ce4 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -568,6 +568,10 @@ void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, G } } +static dwarf::Reg DWARFReg(GpuRegister reg) { + return dwarf::Reg::Mips64Core(static_cast<int>(reg)); +} + constexpr size_t kFramePointerSize = 8; void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, @@ -581,10 +585,12 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, // Push callee saves and return address int stack_offset = frame_size - kFramePointerSize; StoreToOffset(kStoreDoubleword, RA, SP, stack_offset); + cfi_.RelOffset(DWARFReg(RA), stack_offset); for (int i = callee_save_regs.size() - 1; i >= 0; --i) { stack_offset -= kFramePointerSize; GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister(); StoreToOffset(kStoreDoubleword, reg, SP, stack_offset); + cfi_.RelOffset(DWARFReg(reg), stack_offset); } // Write out Method*. @@ -612,31 +618,40 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void Mips64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + cfi_.RememberState(); // Pop callee saves and return address int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize; for (size_t i = 0; i < callee_save_regs.size(); ++i) { GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister(); LoadFromOffset(kLoadDoubleword, reg, SP, stack_offset); + cfi_.Restore(DWARFReg(reg)); stack_offset += kFramePointerSize; } LoadFromOffset(kLoadDoubleword, RA, SP, stack_offset); + cfi_.Restore(DWARFReg(RA)); // Decrease frame to required size. DecreaseFrameSize(frame_size); // Then jump to the return address. Jr(RA); + + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size); } void Mips64Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); AddConstant64(SP, SP, -adjust); + cfi_.AdjustCFAOffset(adjust); } void Mips64Assembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); AddConstant64(SP, SP, adjust); + cfi_.AdjustCFAOffset(-adjust); } void Mips64Assembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) { @@ -1025,7 +1040,7 @@ void Mips64ExceptionSlowPath::Emit(Assembler* sasm) { __ Move(A0, scratch_.AsGpuRegister()); // Set up call to Thread::Current()->pDeliverException __ LoadFromOffset(kLoadDoubleword, T9, S1, - QUICK_ENTRYPOINT_OFFSET(4, pDeliverException).Int32Value()); + QUICK_ENTRYPOINT_OFFSET(8, pDeliverException).Int32Value()); __ Jr(T9); // Call never returns __ Break(); diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h index 924a928389..4c4705bbfb 100644 --- a/compiler/utils/mips64/managed_register_mips64.h +++ b/compiler/utils/mips64/managed_register_mips64.h @@ -18,6 +18,7 @@ #define ART_COMPILER_UTILS_MIPS64_MANAGED_REGISTER_MIPS64_H_ #include "constants_mips64.h" +#include "dwarf/register.h" #include "utils/managed_register.h" namespace art { diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 5773459ff5..f8bba07f84 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -20,7 +20,6 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "memory_region.h" #include "thread.h" -#include "utils/dwarf_cfi.h" namespace art { namespace x86 { @@ -695,6 +694,28 @@ void X86Assembler::ucomisd(XmmRegister a, XmmRegister b) { } +void X86Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x3A); + EmitUint8(0x0B); + EmitXmmRegisterOperand(dst, src); + EmitUint8(imm.value()); +} + + +void X86Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x3A); + EmitUint8(0x0A); + EmitXmmRegisterOperand(dst, src); + EmitUint8(imm.value()); +} + + void X86Assembler::sqrtsd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF2); @@ -1264,32 +1285,62 @@ void X86Assembler::decl(const Address& address) { void X86Assembler::shll(Register reg, const Immediate& imm) { - EmitGenericShift(4, reg, imm); + EmitGenericShift(4, Operand(reg), imm); } void X86Assembler::shll(Register operand, Register shifter) { - EmitGenericShift(4, operand, shifter); + EmitGenericShift(4, Operand(operand), shifter); +} + + +void X86Assembler::shll(const Address& address, const Immediate& imm) { + EmitGenericShift(4, address, imm); +} + + +void X86Assembler::shll(const Address& address, Register shifter) { + EmitGenericShift(4, address, shifter); } void X86Assembler::shrl(Register reg, const Immediate& imm) { - EmitGenericShift(5, reg, imm); + EmitGenericShift(5, Operand(reg), imm); } void X86Assembler::shrl(Register operand, Register shifter) { - EmitGenericShift(5, operand, shifter); + EmitGenericShift(5, Operand(operand), shifter); +} + + +void X86Assembler::shrl(const Address& address, const Immediate& imm) { + EmitGenericShift(5, address, imm); +} + + +void X86Assembler::shrl(const Address& address, Register shifter) { + EmitGenericShift(5, address, shifter); } void X86Assembler::sarl(Register reg, const Immediate& imm) { - EmitGenericShift(7, reg, imm); + EmitGenericShift(7, Operand(reg), imm); } void X86Assembler::sarl(Register operand, Register shifter) { - EmitGenericShift(7, operand, shifter); + EmitGenericShift(7, Operand(operand), shifter); +} + + +void X86Assembler::sarl(const Address& address, const Immediate& imm) { + EmitGenericShift(7, address, imm); +} + + +void X86Assembler::sarl(const Address& address, Register shifter) { + EmitGenericShift(7, address, shifter); } @@ -1302,6 +1353,15 @@ void X86Assembler::shld(Register dst, Register src, Register shifter) { } +void X86Assembler::shld(Register dst, Register src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0xA4); + EmitRegisterOperand(src, dst); + EmitUint8(imm.value() & 0xFF); +} + + void X86Assembler::shrd(Register dst, Register src, Register shifter) { DCHECK_EQ(ECX, shifter); AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -1311,6 +1371,15 @@ void X86Assembler::shrd(Register dst, Register src, Register shifter) { } +void X86Assembler::shrd(Register dst, Register src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0xAC); + EmitRegisterOperand(src, dst); + EmitUint8(imm.value() & 0xFF); +} + + void X86Assembler::negl(Register reg) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF7); @@ -1445,6 +1514,15 @@ void X86Assembler::cmpxchgl(const Address& address, Register reg) { EmitOperand(reg, address); } + +void X86Assembler::cmpxchg8b(const Address& address) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0xC7); + EmitOperand(1, address); +} + + void X86Assembler::mfence() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); @@ -1585,38 +1663,32 @@ void X86Assembler::EmitLabelLink(Label* label) { void X86Assembler::EmitGenericShift(int reg_or_opcode, - Register reg, + const Operand& operand, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK(imm.is_int8()); if (imm.value() == 1) { EmitUint8(0xD1); - EmitOperand(reg_or_opcode, Operand(reg)); + EmitOperand(reg_or_opcode, operand); } else { EmitUint8(0xC1); - EmitOperand(reg_or_opcode, Operand(reg)); + EmitOperand(reg_or_opcode, operand); EmitUint8(imm.value() & 0xFF); } } void X86Assembler::EmitGenericShift(int reg_or_opcode, - Register operand, + const Operand& operand, Register shifter) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK_EQ(shifter, ECX); EmitUint8(0xD3); - EmitOperand(reg_or_opcode, Operand(operand)); -} - -void X86Assembler::InitializeFrameDescriptionEntry() { - WriteFDEHeader(&cfi_info_, false /* is_64bit */); + EmitOperand(reg_or_opcode, operand); } -void X86Assembler::FinalizeFrameDescriptionEntry() { - WriteFDEAddressRange(&cfi_info_, buffer_.Size(), false /* is_64bit */); - PadCFI(&cfi_info_); - WriteCFILength(&cfi_info_, false /* is_64bit */); +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::X86Core(static_cast<int>(reg)); } constexpr size_t kFramePointerSize = 4; @@ -1624,54 +1696,33 @@ constexpr size_t kFramePointerSize = 4; void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& spill_regs, const ManagedRegisterEntrySpills& entry_spills) { - cfi_cfa_offset_ = kFramePointerSize; // Only return address on stack - cfi_pc_ = buffer_.Size(); // Nothing emitted yet - DCHECK_EQ(cfi_pc_, 0U); - - uint32_t reg_offset = 1; + DCHECK_EQ(buffer_.Size(), 0U); // Nothing emitted yet. + cfi_.SetCurrentCFAOffset(4); // Return address on stack. CHECK_ALIGNED(frame_size, kStackAlignment); int gpr_count = 0; for (int i = spill_regs.size() - 1; i >= 0; --i) { - x86::X86ManagedRegister spill = spill_regs.at(i).AsX86(); - DCHECK(spill.IsCpuRegister()); - pushl(spill.AsCpuRegister()); + Register spill = spill_regs.at(i).AsX86().AsCpuRegister(); + pushl(spill); gpr_count++; - - // DW_CFA_advance_loc - DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_); - cfi_pc_ = buffer_.Size(); - // DW_CFA_def_cfa_offset - cfi_cfa_offset_ += kFramePointerSize; - DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_); - // DW_CFA_offset reg offset - reg_offset++; - DW_CFA_offset(&cfi_info_, spill_regs.at(i).AsX86().DWARFRegId(), reg_offset); + cfi_.AdjustCFAOffset(kFramePointerSize); + cfi_.RelOffset(DWARFReg(spill), 0); } - // return address then method on stack + // return address then method on stack. int32_t adjust = frame_size - (gpr_count * kFramePointerSize) - sizeof(StackReference<mirror::ArtMethod>) /*method*/ - kFramePointerSize /*return address*/; addl(ESP, Immediate(-adjust)); - // DW_CFA_advance_loc - DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_); - cfi_pc_ = buffer_.Size(); - // DW_CFA_def_cfa_offset - cfi_cfa_offset_ += adjust; - DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_); - + cfi_.AdjustCFAOffset(adjust); pushl(method_reg.AsX86().AsCpuRegister()); - // DW_CFA_advance_loc - DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_); - cfi_pc_ = buffer_.Size(); - // DW_CFA_def_cfa_offset - cfi_cfa_offset_ += kFramePointerSize; - DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_); + cfi_.AdjustCFAOffset(kFramePointerSize); + DCHECK_EQ(static_cast<size_t>(cfi_.GetCurrentCFAOffset()), frame_size); for (size_t i = 0; i < entry_spills.size(); ++i) { ManagedRegisterSpill spill = entry_spills.at(i); if (spill.AsX86().IsCpuRegister()) { - movl(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsCpuRegister()); + int offset = frame_size + spill.getSpillOffset(); + movl(Address(ESP, offset), spill.AsX86().AsCpuRegister()); } else { DCHECK(spill.AsX86().IsXmmRegister()); if (spill.getSize() == 8) { @@ -1687,30 +1738,33 @@ void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void X86Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& spill_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); - addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) - - sizeof(StackReference<mirror::ArtMethod>))); + cfi_.RememberState(); + int adjust = frame_size - (spill_regs.size() * kFramePointerSize) - + sizeof(StackReference<mirror::ArtMethod>); + addl(ESP, Immediate(adjust)); + cfi_.AdjustCFAOffset(-adjust); for (size_t i = 0; i < spill_regs.size(); ++i) { - x86::X86ManagedRegister spill = spill_regs.at(i).AsX86(); - DCHECK(spill.IsCpuRegister()); - popl(spill.AsCpuRegister()); + Register spill = spill_regs.at(i).AsX86().AsCpuRegister(); + popl(spill); + cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize)); + cfi_.Restore(DWARFReg(spill)); } ret(); + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size); } void X86Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); addl(ESP, Immediate(-adjust)); - // DW_CFA_advance_loc - DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_); - cfi_pc_ = buffer_.Size(); - // DW_CFA_def_cfa_offset - cfi_cfa_offset_ += adjust; - DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_); + cfi_.AdjustCFAOffset(adjust); } void X86Assembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); addl(ESP, Immediate(adjust)); + cfi_.AdjustCFAOffset(-adjust); } void X86Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) { diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 6ccf2e365d..37acb6ef16 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -205,7 +205,7 @@ class Address : public Operand { class X86Assembler FINAL : public Assembler { public: - explicit X86Assembler() : cfi_cfa_offset_(0), cfi_pc_(0) {} + explicit X86Assembler() {} virtual ~X86Assembler() {} /* @@ -312,6 +312,9 @@ class X86Assembler FINAL : public Assembler { void ucomiss(XmmRegister a, XmmRegister b); void ucomisd(XmmRegister a, XmmRegister b); + void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm); + void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm); + void sqrtsd(XmmRegister dst, XmmRegister src); void sqrtss(XmmRegister dst, XmmRegister src); @@ -426,12 +429,20 @@ class X86Assembler FINAL : public Assembler { void shll(Register reg, const Immediate& imm); void shll(Register operand, Register shifter); + void shll(const Address& address, const Immediate& imm); + void shll(const Address& address, Register shifter); void shrl(Register reg, const Immediate& imm); void shrl(Register operand, Register shifter); + void shrl(const Address& address, const Immediate& imm); + void shrl(const Address& address, Register shifter); void sarl(Register reg, const Immediate& imm); void sarl(Register operand, Register shifter); + void sarl(const Address& address, const Immediate& imm); + void sarl(const Address& address, Register shifter); void shld(Register dst, Register src, Register shifter); + void shld(Register dst, Register src, const Immediate& imm); void shrd(Register dst, Register src, Register shifter); + void shrd(Register dst, Register src, const Immediate& imm); void negl(Register reg); void notl(Register reg); @@ -454,6 +465,7 @@ class X86Assembler FINAL : public Assembler { X86Assembler* lock(); void cmpxchgl(const Address& address, Register reg); + void cmpxchg8b(const Address& address); void mfence(); @@ -473,6 +485,10 @@ class X86Assembler FINAL : public Assembler { lock()->cmpxchgl(address, reg); } + void LockCmpxchg8b(const Address& address) { + lock()->cmpxchg8b(address); + } + // // Misc. functionality // @@ -596,12 +612,6 @@ class X86Assembler FINAL : public Assembler { // and branch to a ExceptionSlowPath if it is. void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; - void InitializeFrameDescriptionEntry() OVERRIDE; - void FinalizeFrameDescriptionEntry() OVERRIDE; - std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE { - return &cfi_info_; - } - private: inline void EmitUint8(uint8_t value); inline void EmitInt32(int32_t value); @@ -617,11 +627,8 @@ class X86Assembler FINAL : public Assembler { void EmitLabelLink(Label* label); void EmitNearLabelLink(Label* label); - void EmitGenericShift(int rm, Register reg, const Immediate& imm); - void EmitGenericShift(int rm, Register operand, Register shifter); - - std::vector<uint8_t> cfi_info_; - uint32_t cfi_cfa_offset_, cfi_pc_; + void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm); + void EmitGenericShift(int rm, const Operand& operand, Register shifter); DISALLOW_COPY_AND_ASSIGN(X86Assembler); }; diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index fccb510afb..dba3b6ba67 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -127,4 +127,49 @@ TEST_F(AssemblerX86Test, LoadLongConstant) { DriverStr(expected, "LoadLongConstant"); } +TEST_F(AssemblerX86Test, LockCmpxchgl) { + GetAssembler()->LockCmpxchgl(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12), + x86::Register(x86::ESI)); + GetAssembler()->LockCmpxchgl(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12), + x86::Register(x86::ESI)); + GetAssembler()->LockCmpxchgl(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12), + x86::Register(x86::EDI)); + GetAssembler()->LockCmpxchgl(x86::Address( + x86::Register(x86::EBP), 0), x86::Register(x86::ESI)); + GetAssembler()->LockCmpxchgl(x86::Address( + x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0), + x86::Register(x86::ESI)); + const char* expected = + "lock cmpxchgl %ESI, 0xc(%EDI,%EBX,4)\n" + "lock cmpxchgl %ESI, 0xc(%EDI,%ESI,4)\n" + "lock cmpxchgl %EDI, 0xc(%EDI,%ESI,4)\n" + "lock cmpxchgl %ESI, (%EBP)\n" + "lock cmpxchgl %ESI, (%EBP,%ESI,1)\n"; + + DriverStr(expected, "lock_cmpxchgl"); +} + +TEST_F(AssemblerX86Test, LockCmpxchg8b) { + GetAssembler()->LockCmpxchg8b(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12)); + GetAssembler()->LockCmpxchg8b(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12)); + GetAssembler()->LockCmpxchg8b(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12)); + GetAssembler()->LockCmpxchg8b(x86::Address(x86::Register(x86::EBP), 0)); + GetAssembler()->LockCmpxchg8b(x86::Address( + x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0)); + const char* expected = + "lock cmpxchg8b 0xc(%EDI,%EBX,4)\n" + "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n" + "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n" + "lock cmpxchg8b (%EBP)\n" + "lock cmpxchg8b (%EBP,%ESI,1)\n"; + + DriverStr(expected, "lock_cmpxchg8b"); +} + } // namespace art diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h index 5d46ee25cd..4e8c41e217 100644 --- a/compiler/utils/x86/managed_register_x86.h +++ b/compiler/utils/x86/managed_register_x86.h @@ -18,6 +18,7 @@ #define ART_COMPILER_UTILS_X86_MANAGED_REGISTER_X86_H_ #include "constants_x86.h" +#include "dwarf/register.h" #include "utils/managed_register.h" namespace art { @@ -88,14 +89,6 @@ const int kNumberOfAllocIds = kNumberOfCpuAllocIds + kNumberOfXmmAllocIds + // There is a one-to-one mapping between ManagedRegister and register id. class X86ManagedRegister : public ManagedRegister { public: - int DWARFRegId() const { - CHECK(IsCpuRegister()); - // For all the X86 registers we care about: - // EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, - // DWARF register id is the same as id_. - return static_cast<int>(id_); - } - ByteRegister AsByteRegister() const { CHECK(IsCpuRegister()); CHECK_LT(AsCpuRegister(), ESP); // ESP, EBP, ESI and EDI cannot be encoded as byte registers. diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index b8c757c05d..638659d635 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -20,7 +20,6 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "memory_region.h" #include "thread.h" -#include "utils/dwarf_cfi.h" namespace art { namespace x86_64 { @@ -210,7 +209,9 @@ void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) { void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitOptionalByteRegNormalizingRex32(dst, src); + // Byte register is only in the source register form, so we don't use + // EmitOptionalByteRegNormalizingRex32(dst, src); + EmitOptionalRex32(dst, src); EmitUint8(0x0F); EmitUint8(0xB6); EmitOperand(dst.LowBits(), src); @@ -228,7 +229,9 @@ void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) { void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitOptionalByteRegNormalizingRex32(dst, src); + // Byte register is only in the source register form, so we don't use + // EmitOptionalByteRegNormalizingRex32(dst, src); + EmitOptionalRex32(dst, src); EmitUint8(0x0F); EmitUint8(0xBE); EmitOperand(dst.LowBits(), src); @@ -796,6 +799,30 @@ void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) { } +void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x3A); + EmitUint8(0x0B); + EmitXmmRegisterOperand(dst.LowBits(), src); + EmitUint8(imm.value()); +} + + +void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x3A); + EmitUint8(0x0A); + EmitXmmRegisterOperand(dst.LowBits(), src); + EmitUint8(imm.value()); +} + + void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF2); @@ -1838,11 +1865,22 @@ X86_64Assembler* X86_64Assembler::lock() { void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(reg, address); + EmitUint8(0x0F); + EmitUint8(0xB1); + EmitOperand(reg.LowBits(), address); +} + + +void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitRex64(reg, address); EmitUint8(0x0F); EmitUint8(0xB1); EmitOperand(reg.LowBits(), address); } + void X86_64Assembler::mfence() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); @@ -1941,6 +1979,10 @@ void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) for (int i = 1; i < length; i++) { EmitUint8(operand.encoding_[i]); } + AssemblerFixup* fixup = operand.GetFixup(); + if (fixup != nullptr) { + EmitFixup(fixup); + } } @@ -2139,11 +2181,18 @@ void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) { } void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) { - EmitOptionalRex(true, false, dst.NeedsRex(), false, src.NeedsRex()); + // For src, SPL, BPL, SIL, DIL need the rex prefix. + bool force = src.AsRegister() > 3; + EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex()); } void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) { - uint8_t rex = 0x40 | operand.rex(); // REX.0000 + uint8_t rex = operand.rex(); + // For dst, SPL, BPL, SIL, DIL need the rex prefix. + bool force = dst.AsRegister() > 3; + if (force) { + rex |= 0x40; // REX.0000 + } if (dst.NeedsRex()) { rex |= 0x44; // REX.0R00 } @@ -2152,14 +2201,11 @@ void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const } } -void X86_64Assembler::InitializeFrameDescriptionEntry() { - WriteFDEHeader(&cfi_info_, true /* is_64bit */); +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::X86_64Core(static_cast<int>(reg)); } - -void X86_64Assembler::FinalizeFrameDescriptionEntry() { - WriteFDEAddressRange(&cfi_info_, buffer_.Size(), true /* is_64bit */); - PadCFI(&cfi_info_); - WriteCFILength(&cfi_info_, true /* is_64bit */); +static dwarf::Reg DWARFReg(FloatRegister reg) { + return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); } constexpr size_t kFramePointerSize = 8; @@ -2167,11 +2213,8 @@ constexpr size_t kFramePointerSize = 8; void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& spill_regs, const ManagedRegisterEntrySpills& entry_spills) { - cfi_cfa_offset_ = kFramePointerSize; // Only return address on stack - cfi_pc_ = buffer_.Size(); // Nothing emitted yet - DCHECK_EQ(cfi_pc_, 0U); - - uint32_t reg_offset = 1; + DCHECK_EQ(buffer_.Size(), 0U); // Nothing emitted yet. + cfi_.SetCurrentCFAOffset(8); // Return address on stack. CHECK_ALIGNED(frame_size, kStackAlignment); int gpr_count = 0; for (int i = spill_regs.size() - 1; i >= 0; --i) { @@ -2179,29 +2222,16 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, if (spill.IsCpuRegister()) { pushq(spill.AsCpuRegister()); gpr_count++; - - // DW_CFA_advance_loc - DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_); - cfi_pc_ = buffer_.Size(); - // DW_CFA_def_cfa_offset - cfi_cfa_offset_ += kFramePointerSize; - DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_); - // DW_CFA_offset reg offset - reg_offset++; - DW_CFA_offset(&cfi_info_, spill.DWARFRegId(), reg_offset); + cfi_.AdjustCFAOffset(kFramePointerSize); + cfi_.RelOffset(DWARFReg(spill.AsCpuRegister().AsRegister()), 0); } } - // return address then method on stack + // return address then method on stack. int64_t rest_of_frame = static_cast<int64_t>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize /*return address*/; subq(CpuRegister(RSP), Immediate(rest_of_frame)); - // DW_CFA_advance_loc - DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_); - cfi_pc_ = buffer_.Size(); - // DW_CFA_def_cfa_offset - cfi_cfa_offset_ += rest_of_frame; - DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_); + cfi_.AdjustCFAOffset(rest_of_frame); // spill xmms int64_t offset = rest_of_frame; @@ -2210,6 +2240,7 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, if (spill.IsXmmRegister()) { offset -= sizeof(double); movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister()); + cfi_.RelOffset(DWARFReg(spill.AsXmmRegister().AsFloatRegister()), offset); } } @@ -2241,6 +2272,7 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void X86_64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& spill_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + cfi_.RememberState(); int gpr_count = 0; // unspill xmms int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize; @@ -2249,34 +2281,38 @@ void X86_64Assembler::RemoveFrame(size_t frame_size, if (spill.IsXmmRegister()) { offset += sizeof(double); movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset)); + cfi_.Restore(DWARFReg(spill.AsXmmRegister().AsFloatRegister())); } else { gpr_count++; } } - addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize)); + int adjust = static_cast<int>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize; + addq(CpuRegister(RSP), Immediate(adjust)); + cfi_.AdjustCFAOffset(-adjust); for (size_t i = 0; i < spill_regs.size(); ++i) { x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64(); if (spill.IsCpuRegister()) { popq(spill.AsCpuRegister()); + cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize)); + cfi_.Restore(DWARFReg(spill.AsCpuRegister().AsRegister())); } } ret(); + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size); } void X86_64Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust))); - // DW_CFA_advance_loc - DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_); - cfi_pc_ = buffer_.Size(); - // DW_CFA_def_cfa_offset - cfi_cfa_offset_ += adjust; - DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_); + cfi_.AdjustCFAOffset(adjust); } void X86_64Assembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); addq(CpuRegister(RSP), Immediate(adjust)); + cfi_.AdjustCFAOffset(-adjust); } void X86_64Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) { @@ -2704,5 +2740,55 @@ void X86_64ExceptionSlowPath::Emit(Assembler *sasm) { #undef __ } +void X86_64Assembler::AddConstantArea() { + const std::vector<int32_t>& area = constant_area_.GetBuffer(); + for (size_t i = 0, e = area.size(); i < e; i++) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitInt32(area[i]); + } +} + +int ConstantArea::AddInt32(int32_t v) { + for (size_t i = 0, e = buffer_.size(); i < e; i++) { + if (v == buffer_[i]) { + return i * elem_size_; + } + } + + // Didn't match anything. + int result = buffer_.size() * elem_size_; + buffer_.push_back(v); + return result; +} + +int ConstantArea::AddInt64(int64_t v) { + int32_t v_low = v; + int32_t v_high = v >> 32; + if (buffer_.size() > 1) { + // Ensure we don't pass the end of the buffer. + for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) { + if (v_low == buffer_[i] && v_high == buffer_[i + 1]) { + return i * elem_size_; + } + } + } + + // Didn't match anything. + int result = buffer_.size() * elem_size_; + buffer_.push_back(v_low); + buffer_.push_back(v_high); + return result; +} + +int ConstantArea::AddDouble(double v) { + // Treat the value as a 64-bit integer value. + return AddInt64(bit_cast<int64_t, double>(v)); +} + +int ConstantArea::AddFloat(float v) { + // Treat the value as a 32-bit integer value. + return AddInt32(bit_cast<int32_t, float>(v)); +} + } // namespace x86_64 } // namespace art diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index e2fd5fbb80..15b8b15c74 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -97,9 +97,13 @@ class Operand : public ValueObject { && (reg.NeedsRex() == ((rex_ & 1) != 0)); // REX.000B bits match. } + AssemblerFixup* GetFixup() const { + return fixup_; + } + protected: // Operand can be sub classed (e.g: Address). - Operand() : rex_(0), length_(0) { } + Operand() : rex_(0), length_(0), fixup_(nullptr) { } void SetModRM(uint8_t mod_in, CpuRegister rm_in) { CHECK_EQ(mod_in & ~3, 0); @@ -136,12 +140,17 @@ class Operand : public ValueObject { length_ += disp_size; } + void SetFixup(AssemblerFixup* fixup) { + fixup_ = fixup; + } + private: uint8_t rex_; uint8_t length_; uint8_t encoding_[6]; + AssemblerFixup* fixup_; - explicit Operand(CpuRegister reg) : rex_(0), length_(0) { SetModRM(3, reg); } + explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); } // Get the operand encoding byte at the given index. uint8_t encoding_at(int index_in) const { @@ -226,12 +235,25 @@ class Address : public Operand { result.SetSIB(TIMES_1, CpuRegister(RSP), CpuRegister(RBP)); result.SetDisp32(addr); } else { + // RIP addressing is done using RBP as the base register. + // The value in RBP isn't used. Instead the offset is added to RIP. result.SetModRM(0, CpuRegister(RBP)); result.SetDisp32(addr); } return result; } + // An RIP relative address that will be fixed up later. + static Address RIP(AssemblerFixup* fixup) { + Address result; + // RIP addressing is done using RBP as the base register. + // The value in RBP isn't used. Instead the offset is added to RIP. + result.SetModRM(0, CpuRegister(RBP)); + result.SetDisp32(0); + result.SetFixup(fixup); + return result; + } + // If no_rip is true then the Absolute address isn't RIP relative. static Address Absolute(ThreadOffset<8> addr, bool no_rip = false) { return Absolute(addr.Int32Value(), no_rip); @@ -242,9 +264,46 @@ class Address : public Operand { }; +/** + * Class to handle constant area values. + */ +class ConstantArea { + public: + ConstantArea() {} + + // Add a double to the constant area, returning the offset into + // the constant area where the literal resides. + int AddDouble(double v); + + // Add a float to the constant area, returning the offset into + // the constant area where the literal resides. + int AddFloat(float v); + + // Add an int32_t to the constant area, returning the offset into + // the constant area where the literal resides. + int AddInt32(int32_t v); + + // Add an int64_t to the constant area, returning the offset into + // the constant area where the literal resides. + int AddInt64(int64_t v); + + int GetSize() const { + return buffer_.size() * elem_size_; + } + + const std::vector<int32_t>& GetBuffer() const { + return buffer_; + } + + private: + static constexpr size_t elem_size_ = sizeof(int32_t); + std::vector<int32_t> buffer_; +}; + + class X86_64Assembler FINAL : public Assembler { public: - X86_64Assembler() : cfi_cfa_offset_(0), cfi_pc_(0) {} + X86_64Assembler() {} virtual ~X86_64Assembler() {} /* @@ -353,6 +412,9 @@ class X86_64Assembler FINAL : public Assembler { void ucomiss(XmmRegister a, XmmRegister b); void ucomisd(XmmRegister a, XmmRegister b); + void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm); + void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm); + void sqrtsd(XmmRegister dst, XmmRegister src); void sqrtss(XmmRegister dst, XmmRegister src); @@ -515,6 +577,7 @@ class X86_64Assembler FINAL : public Assembler { X86_64Assembler* lock(); void cmpxchgl(const Address& address, CpuRegister reg); + void cmpxchgq(const Address& address, CpuRegister reg); void mfence(); @@ -537,6 +600,10 @@ class X86_64Assembler FINAL : public Assembler { lock()->cmpxchgl(address, reg); } + void LockCmpxchgq(const Address& address, CpuRegister reg) { + lock()->cmpxchgq(address, reg); + } + // // Misc. functionality // @@ -661,11 +728,27 @@ class X86_64Assembler FINAL : public Assembler { // and branch to a ExceptionSlowPath if it is. void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; - void InitializeFrameDescriptionEntry() OVERRIDE; - void FinalizeFrameDescriptionEntry() OVERRIDE; - std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE { - return &cfi_info_; - } + // Add a double to the constant area, returning the offset into + // the constant area where the literal resides. + int AddDouble(double v) { return constant_area_.AddDouble(v); } + + // Add a float to the constant area, returning the offset into + // the constant area where the literal resides. + int AddFloat(float v) { return constant_area_.AddFloat(v); } + + // Add an int32_t to the constant area, returning the offset into + // the constant area where the literal resides. + int AddInt32(int32_t v) { return constant_area_.AddInt32(v); } + + // Add an int64_t to the constant area, returning the offset into + // the constant area where the literal resides. + int AddInt64(int64_t v) { return constant_area_.AddInt64(v); } + + // Add the contents of the constant area to the assembler buffer. + void AddConstantArea(); + + // Is the constant area empty? Return true if there are no literals in the constant area. + bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; } private: void EmitUint8(uint8_t value); @@ -712,8 +795,7 @@ class X86_64Assembler FINAL : public Assembler { void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src); void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand); - std::vector<uint8_t> cfi_info_; - uint32_t cfi_cfa_offset_, cfi_pc_; + ConstantArea constant_area_; DISALLOW_COPY_AND_ASSIGN(X86_64Assembler); }; diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index c2052c7732..116190a832 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -174,6 +174,40 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, x86_64 secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14d"); secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15d"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "ax"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "bx"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "cx"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDX), "dx"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBP), "bp"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSP), "sp"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSI), "si"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDI), "di"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R8), "r8w"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R9), "r9w"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R10), "r10w"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R11), "r11w"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R12), "r12w"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13w"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14w"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15w"); + + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "al"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "bl"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "cl"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDX), "dl"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBP), "bpl"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSP), "spl"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSI), "sil"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDI), "dil"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R8), "r8b"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R9), "r9b"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R10), "r10b"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R11), "r11b"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R12), "r12b"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13b"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14b"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15b"); + fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM0)); fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM1)); fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM2)); @@ -216,9 +250,21 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, x86_64 return secondary_register_names_[reg]; } + std::string GetTertiaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE { + CHECK(tertiary_register_names_.find(reg) != tertiary_register_names_.end()); + return tertiary_register_names_[reg]; + } + + std::string GetQuaternaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE { + CHECK(quaternary_register_names_.find(reg) != quaternary_register_names_.end()); + return quaternary_register_names_[reg]; + } + private: std::vector<x86_64::CpuRegister*> registers_; std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> secondary_register_names_; + std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> tertiary_register_names_; + std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> quaternary_register_names_; std::vector<x86_64::XmmRegister*> fp_registers_; }; @@ -543,6 +589,56 @@ TEST_F(AssemblerX86_64Test, Xchgl) { // DriverStr(Repeatrr(&x86_64::X86_64Assembler::xchgl, "xchgl %{reg2}, %{reg1}"), "xchgl"); } +TEST_F(AssemblerX86_64Test, LockCmpxchgl) { + GetAssembler()->LockCmpxchgl(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgl(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgl(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::R8)); + GetAssembler()->LockCmpxchgl(x86_64::Address( + x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgl(x86_64::Address( + x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0), + x86_64::CpuRegister(x86_64::RSI)); + const char* expected = + "lock cmpxchgl %ESI, 0xc(%RDI,%RBX,4)\n" + "lock cmpxchgl %ESI, 0xc(%RDI,%R9,4)\n" + "lock cmpxchgl %R8d, 0xc(%RDI,%R9,4)\n" + "lock cmpxchgl %ESI, (%R13)\n" + "lock cmpxchgl %ESI, (%R13,%R9,1)\n"; + + DriverStr(expected, "lock_cmpxchgl"); +} + +TEST_F(AssemblerX86_64Test, LockCmpxchgq) { + GetAssembler()->LockCmpxchgq(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgq(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgq(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::R8)); + GetAssembler()->LockCmpxchgq(x86_64::Address( + x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgq(x86_64::Address( + x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0), + x86_64::CpuRegister(x86_64::RSI)); + const char* expected = + "lock cmpxchg %RSI, 0xc(%RDI,%RBX,4)\n" + "lock cmpxchg %RSI, 0xc(%RDI,%R9,4)\n" + "lock cmpxchg %R8, 0xc(%RDI,%R9,4)\n" + "lock cmpxchg %RSI, (%R13)\n" + "lock cmpxchg %RSI, (%R13,%R9,1)\n"; + + DriverStr(expected, "lock_cmpxchg"); +} + TEST_F(AssemblerX86_64Test, Movl) { GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address( x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); @@ -696,6 +792,14 @@ TEST_F(AssemblerX86_64Test, Sqrtsd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::sqrtsd, "sqrtsd %{reg2}, %{reg1}"), "sqrtsd"); } +TEST_F(AssemblerX86_64Test, Roundss) { + DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundss, 1, "roundss ${imm}, %{reg2}, %{reg1}"), "roundss"); +} + +TEST_F(AssemblerX86_64Test, Roundsd) { + DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundsd, 1, "roundsd ${imm}, %{reg2}, %{reg1}"), "roundsd"); +} + TEST_F(AssemblerX86_64Test, Xorps) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::xorps, "xorps %{reg2}, %{reg1}"), "xorps"); } @@ -820,31 +924,12 @@ std::string setcc_test_fn(AssemblerX86_64Test::Base* assembler_test, "l", "ge", "le" }; std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); - - std::string byte_regs[16]; - byte_regs[x86_64::RAX] = "al"; - byte_regs[x86_64::RBX] = "bl"; - byte_regs[x86_64::RCX] = "cl"; - byte_regs[x86_64::RDX] = "dl"; - byte_regs[x86_64::RBP] = "bpl"; - byte_regs[x86_64::RSP] = "spl"; - byte_regs[x86_64::RSI] = "sil"; - byte_regs[x86_64::RDI] = "dil"; - byte_regs[x86_64::R8] = "r8b"; - byte_regs[x86_64::R9] = "r9b"; - byte_regs[x86_64::R10] = "r10b"; - byte_regs[x86_64::R11] = "r11b"; - byte_regs[x86_64::R12] = "r12b"; - byte_regs[x86_64::R13] = "r13b"; - byte_regs[x86_64::R14] = "r14b"; - byte_regs[x86_64::R15] = "r15b"; - std::ostringstream str; for (auto reg : registers) { for (size_t i = 0; i < 15; ++i) { assembler->setcc(static_cast<x86_64::Condition>(i), *reg); - str << "set" << suffixes[i] << " %" << byte_regs[reg->AsRegister()] << "\n"; + str << "set" << suffixes[i] << " %" << assembler_test->GetQuaternaryRegisterName(*reg) << "\n"; } } @@ -975,4 +1060,12 @@ TEST_F(AssemblerX86_64Test, DecreaseFrame) { DriverFn(&decreaseframe_test_fn, "DecreaseFrame"); } +TEST_F(AssemblerX86_64Test, MovzxbRegs) { + DriverStr(Repeatrb(&x86_64::X86_64Assembler::movzxb, "movzbl %{reg2}, %{reg1}"), "movzxb"); +} + +TEST_F(AssemblerX86_64Test, MovsxbRegs) { + DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb"); +} + } // namespace art diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h index 3a96ad0b51..47bbb44fc8 100644 --- a/compiler/utils/x86_64/managed_register_x86_64.h +++ b/compiler/utils/x86_64/managed_register_x86_64.h @@ -18,6 +18,7 @@ #define ART_COMPILER_UTILS_X86_64_MANAGED_REGISTER_X86_64_H_ #include "constants_x86_64.h" +#include "dwarf/register.h" #include "utils/managed_register.h" namespace art { @@ -87,21 +88,6 @@ const int kNumberOfAllocIds = kNumberOfCpuAllocIds + kNumberOfXmmAllocIds + // There is a one-to-one mapping between ManagedRegister and register id. class X86_64ManagedRegister : public ManagedRegister { public: - int DWARFRegId() const { - CHECK(IsCpuRegister()); - switch (id_) { - case RAX: return 0; - case RDX: return 1; - case RCX: return 2; - case RBX: return 3; - case RSI: return 4; - case RDI: return 5; - case RBP: return 6; - case RSP: return 7; - default: return static_cast<int>(id_); // R8 ~ R15 - } - } - CpuRegister AsCpuRegister() const { CHECK(IsCpuRegister()); return CpuRegister(static_cast<Register>(id_)); |