diff options
117 files changed, 6495 insertions, 2234 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 0e2dad9355..d9d09bcc63 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -188,10 +188,12 @@ COMPILER_GTEST_COMMON_SRC_FILES := \ compiler/dex/local_value_numbering_test.cc \ compiler/dex/mir_graph_test.cc \ compiler/dex/mir_optimization_test.cc \ + compiler/dex/quick/quick_cfi_test.cc \ compiler/dwarf/dwarf_test.cc \ compiler/driver/compiler_driver_test.cc \ compiler/elf_writer_test.cc \ compiler/image_test.cc \ + compiler/jni/jni_cfi_test.cc \ compiler/jni/jni_compiler_test.cc \ compiler/linker/arm64/relative_patcher_arm64_test.cc \ compiler/linker/arm/relative_patcher_thumb2_test.cc \ @@ -212,6 +214,7 @@ COMPILER_GTEST_COMMON_SRC_FILES := \ compiler/optimizing/live_interval_test.cc \ compiler/optimizing/live_ranges_test.cc \ compiler/optimizing/nodes_test.cc \ + compiler/optimizing/optimizing_cfi_test.cc \ compiler/optimizing/parallel_move_test.cc \ compiler/optimizing/pretty_printer_test.cc \ compiler/optimizing/register_allocator_test.cc \ @@ -405,7 +408,7 @@ define define-art-gtest LOCAL_CPP_EXTENSION := $$(ART_CPP_EXTENSION) LOCAL_SRC_FILES := $$(art_gtest_filename) LOCAL_C_INCLUDES += $$(ART_C_INCLUDES) art/runtime $$(art_gtest_extra_c_includes) - LOCAL_SHARED_LIBRARIES += libartd $$(art_gtest_extra_shared_libraries) libart-gtest + LOCAL_SHARED_LIBRARIES += libartd $$(art_gtest_extra_shared_libraries) libart-gtest libart-disassembler LOCAL_WHOLE_STATIC_LIBRARIES += libsigchain LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk diff --git a/compiler/Android.mk b/compiler/Android.mk index 7611f50c65..ac95abdd8d 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -41,6 +41,7 @@ LIBART_COMPILER_SRC_FILES := \ dex/quick/gen_common.cc \ dex/quick/gen_invoke.cc \ dex/quick/gen_loadstore.cc \ + dex/quick/lazy_debug_frame_opcode_writer.cc \ dex/quick/local_optimizations.cc \ dex/quick/mips/assemble_mips.cc \ dex/quick/mips/call_mips.cc \ @@ -103,6 +104,7 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/code_generator_arm64.cc \ optimizing/code_generator_x86.cc \ optimizing/code_generator_x86_64.cc \ + optimizing/code_generator_utils.cc \ optimizing/constant_folding.cc \ optimizing/dead_code_elimination.cc \ optimizing/graph_checker.cc \ @@ -150,6 +152,7 @@ LIBART_COMPILER_SRC_FILES := \ buffered_output_stream.cc \ compiler.cc \ elf_writer.cc \ + elf_writer_debug.cc \ elf_writer_quick.cc \ file_output_stream.cc \ image_writer.cc \ diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h new file mode 100644 index 0000000000..918179290b --- /dev/null +++ b/compiler/cfi_test.h @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_CFI_TEST_H_ +#define ART_COMPILER_CFI_TEST_H_ + +#include <vector> +#include <memory> +#include <sstream> + +#include "arch/instruction_set.h" +#include "dwarf/dwarf_test.h" +#include "dwarf/headers.h" +#include "disassembler/disassembler.h" +#include "gtest/gtest.h" + +namespace art { + +class CFITest : public dwarf::DwarfTest { + public: + void GenerateExpected(FILE* f, InstructionSet isa, const char* isa_str, + const std::vector<uint8_t>& actual_asm, + const std::vector<uint8_t>& actual_cfi) { + std::vector<std::string> lines; + // Print the raw bytes. + fprintf(f, "static constexpr uint8_t expected_asm_%s[] = {", isa_str); + HexDump(f, actual_asm); + fprintf(f, "\n};\n"); + fprintf(f, "static constexpr uint8_t expected_cfi_%s[] = {", isa_str); + HexDump(f, actual_cfi); + fprintf(f, "\n};\n"); + // Pretty-print CFI opcodes. + constexpr bool is64bit = false; + dwarf::DebugFrameOpCodeWriter<> initial_opcodes; + dwarf::WriteEhFrameCIE(is64bit, dwarf::Reg(8), initial_opcodes, &eh_frame_data_); + dwarf::WriteEhFrameFDE(is64bit, 0, 0, actual_asm.size(), &actual_cfi, &eh_frame_data_); + ReformatCfi(Objdump(false, "-W"), &lines); + // Pretty-print assembly. + auto* opts = new DisassemblerOptions(false, actual_asm.data(), true); + std::unique_ptr<Disassembler> disasm(Disassembler::Create(isa, opts)); + std::stringstream stream; + const uint8_t* base = actual_asm.data() + (isa == kThumb2 ? 1 : 0); + disasm->Dump(stream, base, base + actual_asm.size()); + ReformatAsm(&stream, &lines); + // Print CFI and assembly interleaved. + std::stable_sort(lines.begin(), lines.end(), CompareByAddress); + for (const std::string& line : lines) { + fprintf(f, "// %s\n", line.c_str()); + } + fprintf(f, "\n"); + } + + private: + // Helper - get offset just past the end of given string. + static size_t FindEndOf(const std::string& str, const char* substr) { + size_t pos = str.find(substr); + CHECK_NE(std::string::npos, pos); + return pos + strlen(substr); + } + + // Spit to lines and remove raw instruction bytes. + static void ReformatAsm(std::stringstream* stream, + std::vector<std::string>* output) { + std::string line; + while (std::getline(*stream, line)) { + line = line.substr(0, FindEndOf(line, ": ")) + + line.substr(FindEndOf(line, "\t")); + size_t pos; + while ((pos = line.find(" ")) != std::string::npos) { + line = line.replace(pos, 2, " "); + } + while (!line.empty() && line.back() == ' ') { + line.pop_back(); + } + output->push_back(line); + } + } + + // Find interesting parts of objdump output and prefix the lines with address. + static void ReformatCfi(const std::vector<std::string>& lines, + std::vector<std::string>* output) { + std::string address; + for (const std::string& line : lines) { + if (line.find("DW_CFA_nop") != std::string::npos) { + // Ignore. + } else if (line.find("DW_CFA_advance_loc") != std::string::npos) { + // The last 8 characters are the address. + address = "0x" + line.substr(line.size() - 8); + } else if (line.find("DW_CFA_") != std::string::npos) { + std::string new_line(line); + // "bad register" warning is caused by always using host (x86) objdump. + const char* bad_reg = "bad register: "; + size_t pos; + if ((pos = new_line.find(bad_reg)) != std::string::npos) { + new_line = new_line.replace(pos, strlen(bad_reg), ""); + } + // Remove register names in parentheses since they have x86 names. + if ((pos = new_line.find(" (")) != std::string::npos) { + new_line = new_line.replace(pos, FindEndOf(new_line, ")") - pos, ""); + } + // Use the .cfi_ prefix. + new_line = ".cfi_" + new_line.substr(FindEndOf(new_line, "DW_CFA_")); + output->push_back(address + ": " + new_line); + } + } + } + + // Compare strings by the address prefix. + static bool CompareByAddress(const std::string& lhs, const std::string& rhs) { + EXPECT_EQ(lhs[10], ':'); + EXPECT_EQ(rhs[10], ':'); + return strncmp(lhs.c_str(), rhs.c_str(), 10) < 0; + } + + // Pretty-print byte array. 12 bytes per line. + static void HexDump(FILE* f, const std::vector<uint8_t>& data) { + for (size_t i = 0; i < data.size(); i++) { + fprintf(f, i % 12 == 0 ? "\n " : " "); // Whitespace. + fprintf(f, "0x%02X,", data[i]); + } + } +}; + +} // namespace art + +#endif // ART_COMPILER_CFI_TEST_H_ diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc index eeed877444..4f7a970fdd 100644 --- a/compiler/compiled_method.cc +++ b/compiler/compiled_method.cc @@ -188,39 +188,6 @@ CompiledMethod* CompiledMethod::SwapAllocCompiledMethod( return ret; } -CompiledMethod* CompiledMethod::SwapAllocCompiledMethodStackMap( - CompilerDriver* driver, - InstructionSet instruction_set, - const ArrayRef<const uint8_t>& quick_code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const ArrayRef<const uint8_t>& stack_map) { - SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator()); - CompiledMethod* ret = alloc.allocate(1); - alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask, - fp_spill_mask, nullptr, ArrayRef<const uint8_t>(), stack_map, - ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(), - ArrayRef<const LinkerPatch>()); - return ret; -} - -CompiledMethod* CompiledMethod::SwapAllocCompiledMethodCFI( - CompilerDriver* driver, - InstructionSet instruction_set, - const ArrayRef<const uint8_t>& quick_code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const ArrayRef<const uint8_t>& cfi_info) { - SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator()); - CompiledMethod* ret = alloc.allocate(1); - alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask, - fp_spill_mask, nullptr, ArrayRef<const uint8_t>(), - ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(), - cfi_info, ArrayRef<const LinkerPatch>()); - return ret; -} void CompiledMethod::ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m) { diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index 506b47b68a..480d021db0 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -320,7 +320,7 @@ class CompiledMethod FINAL : public CompiledCode { const ArrayRef<const uint8_t>& vmap_table, const ArrayRef<const uint8_t>& native_gc_map, const ArrayRef<const uint8_t>& cfi_info, - const ArrayRef<const LinkerPatch>& patches = ArrayRef<const LinkerPatch>()); + const ArrayRef<const LinkerPatch>& patches); virtual ~CompiledMethod(); @@ -336,24 +336,7 @@ class CompiledMethod FINAL : public CompiledCode { const ArrayRef<const uint8_t>& vmap_table, const ArrayRef<const uint8_t>& native_gc_map, const ArrayRef<const uint8_t>& cfi_info, - const ArrayRef<const LinkerPatch>& patches = ArrayRef<const LinkerPatch>()); - - static CompiledMethod* SwapAllocCompiledMethodStackMap( - CompilerDriver* driver, - InstructionSet instruction_set, - const ArrayRef<const uint8_t>& quick_code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const ArrayRef<const uint8_t>& stack_map); - - static CompiledMethod* SwapAllocCompiledMethodCFI(CompilerDriver* driver, - InstructionSet instruction_set, - const ArrayRef<const uint8_t>& quick_code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const ArrayRef<const uint8_t>& cfi_info); + const ArrayRef<const LinkerPatch>& patches); static void ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m); diff --git a/compiler/compiler.h b/compiler/compiler.h index a04641e3fa..94b0fe33db 100644 --- a/compiler/compiler.h +++ b/compiler/compiler.h @@ -63,13 +63,6 @@ class Compiler { virtual uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0; - virtual bool WriteElf(art::File* file, - OatWriter* oat_writer, - const std::vector<const art::DexFile*>& dex_files, - const std::string& android_root, - bool is_host) const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0; - uint64_t GetMaximumCompilationTimeBeforeWarning() const { return maximum_compilation_time_before_warning_; } @@ -107,9 +100,6 @@ class Compiler { return driver_; } - // Whether to produce 64-bit ELF files for 64-bit targets. Leave this off for now. - static constexpr bool kProduce64BitELFFiles = false; - private: CompilerDriver* const driver_; const uint64_t maximum_compilation_time_before_warning_; diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index d6c4b64708..85b13448da 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -1458,6 +1458,7 @@ class MIRGraph { friend class GvnDeadCodeEliminationTest; friend class LocalValueNumberingTest; friend class TopologicalSortOrderTest; + friend class QuickCFITest; }; } // namespace art diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index 518e3ea3c9..3d18af6169 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -355,7 +355,16 @@ void ArmMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) { FreeTemp(reg_card_no); } +static dwarf::Reg DwarfCoreReg(int num) { + return dwarf::Reg::ArmCore(num); +} + +static dwarf::Reg DwarfFpReg(int num) { + return dwarf::Reg::ArmFp(num); +} + void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); // empty stack. int spill_count = num_core_spills_ + num_fp_spills_; /* * On entry, r0, r1, r2 & r3 are live. Let the register allocation @@ -403,28 +412,32 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { } } /* Spill core callee saves */ - if (core_spill_mask_ == 0u) { - // Nothing to spill. - } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) { - // Spilling only low regs and/or LR, use 16-bit PUSH. - constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8; - NewLIR1(kThumbPush, - (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) | - ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift)); - } else if (IsPowerOfTwo(core_spill_mask_)) { - // kThumb2Push cannot be used to spill a single register. - NewLIR1(kThumb2Push1, CTZ(core_spill_mask_)); - } else { - NewLIR1(kThumb2Push, core_spill_mask_); + if (core_spill_mask_ != 0u) { + if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) { + // Spilling only low regs and/or LR, use 16-bit PUSH. + constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8; + NewLIR1(kThumbPush, + (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) | + ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift)); + } else if (IsPowerOfTwo(core_spill_mask_)) { + // kThumb2Push cannot be used to spill a single register. + NewLIR1(kThumb2Push1, CTZ(core_spill_mask_)); + } else { + NewLIR1(kThumb2Push, core_spill_mask_); + } + cfi_.AdjustCFAOffset(num_core_spills_ * kArmPointerSize); + cfi_.RelOffsetForMany(DwarfCoreReg(0), 0, core_spill_mask_, kArmPointerSize); } /* Need to spill any FP regs? */ - if (num_fp_spills_) { + if (num_fp_spills_ != 0u) { /* * NOTE: fp spills are a little different from core spills in that * they are pushed as a contiguous block. When promoting from * the fp set, we must allocate all singles from s16..highest-promoted */ NewLIR1(kThumb2VPushCS, num_fp_spills_); + cfi_.AdjustCFAOffset(num_fp_spills_ * kArmPointerSize); + cfi_.RelOffsetForMany(DwarfFpReg(0), 0, fp_spill_mask_, kArmPointerSize); } const int spill_size = spill_count * 4; @@ -445,12 +458,14 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR); } m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_); + m2l_->cfi().AdjustCFAOffset(-sp_displace_); m2l_->ClobberCallerSave(); ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow); // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes // codegen and target are in thumb2 mode. // NOTE: native pointer. m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC); + m2l_->cfi().AdjustCFAOffset(sp_displace_); } private: @@ -465,6 +480,7 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { // Need to restore LR since we used it as a temp. AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size)); OpRegCopy(rs_rARM_SP, rs_rARM_LR); // Establish stack + cfi_.AdjustCFAOffset(frame_size_without_spills); } else { /* * If the frame is small enough we are guaranteed to have enough space that remains to @@ -475,6 +491,7 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { MarkTemp(rs_rARM_LR); FreeTemp(rs_rARM_LR); OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills); + cfi_.AdjustCFAOffset(frame_size_without_spills); Clobber(rs_rARM_LR); UnmarkTemp(rs_rARM_LR); LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr); @@ -484,9 +501,11 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { // Implicit stack overflow check has already been done. Just make room on the // stack for the frame now. OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); + cfi_.AdjustCFAOffset(frame_size_without_spills); } } else { OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); + cfi_.AdjustCFAOffset(frame_size_without_spills); } FlushIns(ArgLocs, rl_method); @@ -507,7 +526,9 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { } void ArmMir2Lir::GenExitSequence() { + cfi_.RememberState(); int spill_count = num_core_spills_ + num_fp_spills_; + /* * In the exit path, r0/r1 are live - make sure they aren't * allocated by the register utilities as temps. @@ -515,34 +536,47 @@ void ArmMir2Lir::GenExitSequence() { LockTemp(rs_r0); LockTemp(rs_r1); - OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4)); + int adjust = frame_size_ - (spill_count * kArmPointerSize); + OpRegImm(kOpAdd, rs_rARM_SP, adjust); + cfi_.AdjustCFAOffset(-adjust); /* Need to restore any FP callee saves? */ if (num_fp_spills_) { NewLIR1(kThumb2VPopCS, num_fp_spills_); + cfi_.AdjustCFAOffset(-num_fp_spills_ * kArmPointerSize); + cfi_.RestoreMany(DwarfFpReg(0), fp_spill_mask_); } - if ((core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0) { - /* Unspill rARM_LR to rARM_PC */ + bool unspill_LR_to_PC = (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0; + if (unspill_LR_to_PC) { core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum()); core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum()); } - if (core_spill_mask_ == 0u) { - // Nothing to unspill. - } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) { - // Unspilling only low regs and/or PC, use 16-bit POP. - constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8; - NewLIR1(kThumbPop, - (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) | - ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift)); - } else if (IsPowerOfTwo(core_spill_mask_)) { - // kThumb2Pop cannot be used to unspill a single register. - NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_)); - } else { - NewLIR1(kThumb2Pop, core_spill_mask_); + if (core_spill_mask_ != 0u) { + if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) { + // Unspilling only low regs and/or PC, use 16-bit POP. + constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8; + NewLIR1(kThumbPop, + (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) | + ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift)); + } else if (IsPowerOfTwo(core_spill_mask_)) { + // kThumb2Pop cannot be used to unspill a single register. + NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_)); + } else { + NewLIR1(kThumb2Pop, core_spill_mask_); + } + // If we pop to PC, there is no further epilogue code. + if (!unspill_LR_to_PC) { + cfi_.AdjustCFAOffset(-num_core_spills_ * kArmPointerSize); + cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_); + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); // empty stack. + } } - if ((core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum())) == 0) { + if (!unspill_LR_to_PC) { /* We didn't pop to rARM_PC, so must do a bv rARM_LR */ NewLIR1(kThumbBx, rs_rARM_LR.GetReg()); } + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size_); } void ArmMir2Lir::GenSpecialExitSequence() { @@ -564,11 +598,16 @@ void ArmMir2Lir::GenSpecialEntryForSuspend() { NewLIR1(kThumbPush, (1u << rs_r0.GetRegNum()) | // ArtMethod* (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) | // Spills other than LR. (1u << 8)); // LR encoded for 16-bit push. + cfi_.AdjustCFAOffset(frame_size_); + // Do not generate CFI for scratch register r0. + cfi_.RelOffsetForMany(DwarfCoreReg(0), 4, core_spill_mask_, kArmPointerSize); } void ArmMir2Lir::GenSpecialExitForSuspend() { // Pop the frame. (ArtMethod* no longer needed but restore it anyway.) NewLIR1(kThumb2Pop, (1u << rs_r0.GetRegNum()) | core_spill_mask_); // 32-bit because of LR. + cfi_.AdjustCFAOffset(-frame_size_); + cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_); } static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) { diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index c3371cf329..25ea6941c0 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -1273,13 +1273,14 @@ void ArmMir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t if (pc_rel_temp_ != nullptr) { // Now, if the dex cache array base temp is used only once outside any loops (weight = 1), - // avoid the promotion, otherwise boost the weight by factor 4 because the full PC-relative - // load sequence is 4 instructions long. + // avoid the promotion, otherwise boost the weight by factor 3 because the full PC-relative + // load sequence is 4 instructions long and by promoting the PC base we save up to 3 + // instructions per use. int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low); if (core_counts[p_map_idx].count == 1) { core_counts[p_map_idx].count = 0; } else { - core_counts[p_map_idx].count *= 4; + core_counts[p_map_idx].count *= 3; } } } diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 6b47bba884..4abbd77d88 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -282,7 +282,13 @@ void Arm64Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) { FreeTemp(reg_card_no); } +static dwarf::Reg DwarfCoreReg(int num) { + return dwarf::Reg::Arm64Core(num); +} + void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); // empty stack. + /* * On entry, x0 to x7 are live. Let the register allocation * mechanism know so it doesn't try to use any of them when @@ -345,6 +351,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) if (spilled_already != frame_size_) { OpRegImm(kOpSub, rs_sp, frame_size_without_spills); + cfi_.AdjustCFAOffset(frame_size_without_spills); } if (!skip_overflow_check) { @@ -361,12 +368,14 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) GenerateTargetLabel(kPseudoThrowTarget); // Unwinds stack. m2l_->OpRegImm(kOpAdd, rs_sp, sp_displace_); + m2l_->cfi().AdjustCFAOffset(-sp_displace_); m2l_->ClobberCallerSave(); ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow); m2l_->LockTemp(rs_xIP0); m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_xIP0); m2l_->NewLIR1(kA64Br1x, rs_xIP0.GetReg()); m2l_->FreeTemp(rs_xIP0); + m2l_->cfi().AdjustCFAOffset(sp_displace_); } private: @@ -393,6 +402,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) } void Arm64Mir2Lir::GenExitSequence() { + cfi_.RememberState(); /* * In the exit path, r0/r1 are live - make sure they aren't * allocated by the register utilities as temps. @@ -403,6 +413,9 @@ void Arm64Mir2Lir::GenExitSequence() { // Finally return. NewLIR0(kA64Ret); + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size_); } void Arm64Mir2Lir::GenSpecialExitSequence() { @@ -419,11 +432,16 @@ void Arm64Mir2Lir::GenSpecialEntryForSuspend() { core_vmap_table_.clear(); fp_vmap_table_.clear(); NewLIR4(WIDE(kA64StpPre4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), -frame_size_ / 8); + cfi_.AdjustCFAOffset(frame_size_); + // Do not generate CFI for scratch register x0. + cfi_.RelOffset(DwarfCoreReg(rxLR), 8); } void Arm64Mir2Lir::GenSpecialExitForSuspend() { // Pop the frame. (ArtMethod* no longer needed but restore it anyway.) NewLIR4(WIDE(kA64LdpPost4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), frame_size_ / 8); + cfi_.AdjustCFAOffset(-frame_size_); + cfi_.Restore(DwarfCoreReg(rxLR)); } static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) { diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index a9d9f3d463..20f61f2261 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -1458,6 +1458,14 @@ static uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) { return reg_mask; } +static dwarf::Reg DwarfCoreReg(int num) { + return dwarf::Reg::Arm64Core(num); +} + +static dwarf::Reg DwarfFpReg(int num) { + return dwarf::Reg::Arm64Fp(num); +} + static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) { int reg1 = -1, reg2 = -1; const int reg_log2_size = 3; @@ -1466,9 +1474,12 @@ static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32 reg_mask = GenPairWise(reg_mask, & reg1, & reg2); if (UNLIKELY(reg2 < 0)) { m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), offset << reg_log2_size); } else { m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().RelOffset(DwarfCoreReg(reg2), offset << reg_log2_size); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), (offset + 1) << reg_log2_size); } } } @@ -1483,9 +1494,12 @@ static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t if (UNLIKELY(reg2 < 0)) { m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().RelOffset(DwarfFpReg(reg1), offset << reg_log2_size); } else { m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().RelOffset(DwarfFpReg(reg2), offset << reg_log2_size); + m2l->cfi().RelOffset(DwarfFpReg(reg1), (offset + 1) << reg_log2_size); } } } @@ -1493,6 +1507,7 @@ static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t static int SpillRegsPreSub(Arm64Mir2Lir* m2l, uint32_t core_reg_mask, uint32_t fp_reg_mask, int frame_size) { m2l->OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size); + m2l->cfi().AdjustCFAOffset(frame_size); int core_count = POPCOUNT(core_reg_mask); @@ -1552,11 +1567,15 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core RegStorage::FloatSolo64(reg1).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize); } else { m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg1).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfFpReg(reg1), 0); cur_offset = 0; // That core reg needs to go into the upper half. } } else { @@ -1564,10 +1583,15 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core fp_reg_mask = GenPairWise(fp_reg_mask, ®1, ®2); m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfFpReg(reg2), 0); + m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize); } else { fp_reg_mask = ExtractReg(fp_reg_mask, ®1); m2l->NewLIR4(WIDE(kA64StpPre4ffXD), rs_d0.GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize); } } } else { @@ -1580,12 +1604,19 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core core_reg_mask = ExtractReg(core_reg_mask, ®1); m2l->NewLIR4(WIDE(kA64StpPre4rrXD), rs_xzr.GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize); } else { core_reg_mask = GenPairWise(core_reg_mask, ®1, ®2); m2l->NewLIR4(WIDE(kA64StpPre4rrXD), RegStorage::Solo64(reg2).GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfCoreReg(reg2), 0); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize); } } + DCHECK_EQ(m2l->cfi().GetCurrentCFAOffset(), + static_cast<int>(all_offset * kArm64PointerSize)); if (fp_count != 0) { for (; fp_reg_mask != 0;) { @@ -1594,10 +1625,13 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core if (UNLIKELY(reg2 < 0)) { m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset); + m2l->cfi().RelOffset(DwarfFpReg(reg1), cur_offset * kArm64PointerSize); // Do not increment offset here, as the second half will be filled by a core reg. } else { m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset); + m2l->cfi().RelOffset(DwarfFpReg(reg2), cur_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfFpReg(reg1), (cur_offset + 1) * kArm64PointerSize); cur_offset += 2; } } @@ -1610,6 +1644,7 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core core_reg_mask = ExtractReg(core_reg_mask, ®1); m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset + 1); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize); cur_offset += 2; // Half-slot filled now. } } @@ -1620,6 +1655,8 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core core_reg_mask = GenPairWise(core_reg_mask, ®1, ®2); m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset); + m2l->cfi().RelOffset(DwarfCoreReg(reg2), cur_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize); } DCHECK_EQ(cur_offset, all_offset); @@ -1650,10 +1687,13 @@ static void UnSpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint reg_mask = GenPairWise(reg_mask, & reg1, & reg2); if (UNLIKELY(reg2 < 0)) { m2l->NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().Restore(DwarfCoreReg(reg1)); } else { DCHECK_LE(offset, 63); m2l->NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().Restore(DwarfCoreReg(reg2)); + m2l->cfi().Restore(DwarfCoreReg(reg1)); } } } @@ -1667,9 +1707,12 @@ static void UnSpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32 if (UNLIKELY(reg2 < 0)) { m2l->NewLIR3(WIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().Restore(DwarfFpReg(reg1)); } else { m2l->NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().Restore(DwarfFpReg(reg2)); + m2l->cfi().Restore(DwarfFpReg(reg1)); } } } @@ -1711,6 +1754,7 @@ void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t early_drop = RoundDown(early_drop, 16); OpRegImm64(kOpAdd, rs_sp, early_drop); + cfi_.AdjustCFAOffset(-early_drop); } // Unspill. @@ -1724,7 +1768,9 @@ void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t } // Drop the (rest of) the frame. - OpRegImm64(kOpAdd, rs_sp, frame_size - early_drop); + int adjust = frame_size - early_drop; + OpRegImm64(kOpAdd, rs_sp, adjust); + cfi_.AdjustCFAOffset(-adjust); } bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) { diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 232a2286e2..5ea36c2769 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -541,13 +541,11 @@ void Mir2Lir::InstallSwitchTables() { DCHECK(tab_rec->anchor->flags.fixup != kFixupNone); bx_offset = tab_rec->anchor->offset + 4; break; - case kX86: - bx_offset = 0; - break; case kX86_64: // RIP relative to switch table. bx_offset = tab_rec->offset; break; + case kX86: case kArm64: case kMips: case kMips64: @@ -1072,6 +1070,9 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena dex_cache_arrays_layout_(cu->compiler_driver->GetDexCacheArraysLayout(cu->dex_file)), pc_rel_temp_(nullptr), dex_cache_arrays_min_offset_(std::numeric_limits<uint32_t>::max()), + cfi_(&last_lir_insn_, + cu->compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols(), + arena), in_to_reg_storage_mapping_(arena) { switch_tables_.reserve(4); fill_array_data_.reserve(4); @@ -1164,7 +1165,7 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { ArrayRef<const uint8_t>(encoded_mapping_table_), ArrayRef<const uint8_t>(vmap_encoder.GetData()), ArrayRef<const uint8_t>(native_gc_map_), - ArrayRef<const uint8_t>(), + ArrayRef<const uint8_t>(*cfi_.Patch(code_buffer_.size())), ArrayRef<const LinkerPatch>(patches_)); } diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc b/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc new file mode 100644 index 0000000000..5cfb0ff557 --- /dev/null +++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lazy_debug_frame_opcode_writer.h" +#include "mir_to_lir.h" + +namespace art { +namespace dwarf { + +const ArenaVector<uint8_t>* LazyDebugFrameOpCodeWriter::Patch(size_t code_size) { + if (!this->enabled_) { + DCHECK(this->data()->empty()); + return this->data(); + } + if (!patched_) { + patched_ = true; + // Move our data buffer to temporary variable. + ArenaVector<uint8_t> old_opcodes(this->opcodes_.get_allocator()); + old_opcodes.swap(this->opcodes_); + // Refill our data buffer with patched opcodes. + this->opcodes_.reserve(old_opcodes.size() + advances_.size() + 4); + size_t pos = 0; + for (auto advance : advances_) { + DCHECK_GE(advance.pos, pos); + // Copy old data up to the point when advance was issued. + this->opcodes_.insert(this->opcodes_.end(), + old_opcodes.begin() + pos, + old_opcodes.begin() + advance.pos); + pos = advance.pos; + // This may be null if there is no slow-path code after return. + LIR* next_lir = NEXT_LIR(advance.last_lir_insn); + // Insert the advance command with its final offset. + Base::AdvancePC(next_lir != nullptr ? next_lir->offset : code_size); + } + // Copy the final segment. + this->opcodes_.insert(this->opcodes_.end(), + old_opcodes.begin() + pos, + old_opcodes.end()); + Base::AdvancePC(code_size); + } + return this->data(); +} + +} // namespace dwarf +} // namespace art diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.h b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h new file mode 100644 index 0000000000..94ffd7f957 --- /dev/null +++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_ +#define ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_ + +#include "base/arena_allocator.h" +#include "base/arena_containers.h" +#include "dwarf/debug_frame_opcode_writer.h" + +namespace art { +struct LIR; +namespace dwarf { + +// When we are generating the CFI code, we do not know the instuction offsets, +// this class stores the LIR references and patches the instruction stream later. +class LazyDebugFrameOpCodeWriter FINAL + : public DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> { + typedef DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> Base; + public: + // This method is implicitely called the by opcode writers. + virtual void ImplicitlyAdvancePC() OVERRIDE { + DCHECK_EQ(patched_, false); + DCHECK_EQ(this->current_pc_, 0); + advances_.push_back({this->data()->size(), *last_lir_insn_}); + } + + const ArenaVector<uint8_t>* Patch(size_t code_size); + + explicit LazyDebugFrameOpCodeWriter(LIR** last_lir_insn, bool enable_writes, + ArenaAllocator* allocator) + : Base(enable_writes, allocator->Adapter()), + last_lir_insn_(last_lir_insn), + advances_(allocator->Adapter()), + patched_(false) { + } + + private: + typedef struct { + size_t pos; + LIR* last_lir_insn; + } Advance; + + using Base::data; // Hidden. Use Patch method instead. + + LIR** last_lir_insn_; + ArenaVector<Advance> advances_; + bool patched_; + + DISALLOW_COPY_AND_ASSIGN(LazyDebugFrameOpCodeWriter); +}; + +} // namespace dwarf +} // namespace art + +#endif // ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_ diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc index c932df6dc9..05570e4bde 100644 --- a/compiler/dex/quick/mips/call_mips.cc +++ b/compiler/dex/quick/mips/call_mips.cc @@ -238,7 +238,12 @@ void MipsMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) { FreeTemp(reg_card_no); } +static dwarf::Reg DwarfCoreReg(int num) { + return dwarf::Reg::MipsCore(num); +} + void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); int spill_count = num_core_spills_ + num_fp_spills_; /* * On entry, A0, A1, A2 & A3 are live. On Mips64, A4, A5, A6 & A7 are also live. @@ -304,10 +309,12 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) // RA is offset 0 since we push in reverse order. m2l_->LoadWordDisp(m2l_->TargetPtrReg(kSp), 0, m2l_->TargetPtrReg(kLr)); m2l_->OpRegImm(kOpAdd, m2l_->TargetPtrReg(kSp), sp_displace_); + m2l_->cfi().AdjustCFAOffset(-sp_displace_); m2l_->ClobberCallerSave(); RegStorage r_tgt = m2l_->CallHelperSetup(kQuickThrowStackOverflow); // Doesn't clobber LR. m2l_->CallHelper(r_tgt, kQuickThrowStackOverflow, false /* MarkSafepointPC */, false /* UseLink */); + m2l_->cfi().AdjustCFAOffset(sp_displace_); } private: @@ -318,8 +325,10 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_count * ptr_size)); // TODO: avoid copy for small frame sizes. OpRegCopy(rs_sp, new_sp); // Establish stack. + cfi_.AdjustCFAOffset(frame_sub); } else { OpRegImm(kOpSub, rs_sp, frame_sub); + cfi_.AdjustCFAOffset(frame_sub); } FlushIns(ArgLocs, rl_method); @@ -337,6 +346,7 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) } void MipsMir2Lir::GenExitSequence() { + cfi_.RememberState(); /* * In the exit path, rMIPS_RET0/rMIPS_RET1 are live - make sure they aren't * allocated by the register utilities as temps. @@ -346,6 +356,9 @@ void MipsMir2Lir::GenExitSequence() { UnSpillCoreRegs(); OpReg(kOpBx, TargetPtrReg(kLr)); + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size_); } void MipsMir2Lir::GenSpecialExitSequence() { @@ -364,15 +377,20 @@ void MipsMir2Lir::GenSpecialEntryForSuspend() { fp_vmap_table_.clear(); const RegStorage rs_sp = TargetPtrReg(kSp); OpRegImm(kOpSub, rs_sp, frame_size_); + cfi_.AdjustCFAOffset(frame_size_); StoreWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr)); + cfi_.RelOffset(DwarfCoreReg(rRA), frame_size_ - (cu_->target64 ? 8 : 4)); StoreWordDisp(rs_sp, 0, TargetPtrReg(kArg0)); + // Do not generate CFI for scratch register A0. } void MipsMir2Lir::GenSpecialExitForSuspend() { // Pop the frame. Don't pop ArtMethod*, it's no longer needed. const RegStorage rs_sp = TargetPtrReg(kSp); LoadWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr)); + cfi_.Restore(DwarfCoreReg(rRA)); OpRegImm(kOpAdd, rs_sp, frame_size_); + cfi_.AdjustCFAOffset(-frame_size_); } /* @@ -385,73 +403,73 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED, Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); if (direct_code != 0 && direct_method != 0) { switch (state) { - case 0: // Get the current Method* [sets kArg0] - if (direct_code != static_cast<uintptr_t>(-1)) { - if (cu->target64) { - cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code); + case 0: // Get the current Method* [sets kArg0] + if (direct_code != static_cast<uintptr_t>(-1)) { + if (cu->target64) { + cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code); + } else { + cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code); + } } else { - cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code); + cg->LoadCodeAddress(target_method, type, kInvokeTgt); } - } else { - cg->LoadCodeAddress(target_method, type, kInvokeTgt); - } - if (direct_method != static_cast<uintptr_t>(-1)) { - if (cu->target64) { - cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method); + if (direct_method != static_cast<uintptr_t>(-1)) { + if (cu->target64) { + cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method); + } else { + cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method); + } } else { - cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method); + cg->LoadMethodAddress(target_method, type, kArg0); } - } else { - cg->LoadMethodAddress(target_method, type, kArg0); - } - break; - default: - return -1; + break; + default: + return -1; } } else { RegStorage arg0_ref = cg->TargetReg(kArg0, kRef); switch (state) { - case 0: // Get the current Method* [sets kArg0] - // TUNING: we can save a reg copy if Method* has been promoted. - cg->LoadCurrMethodDirect(arg0_ref); - break; - case 1: // Get method->dex_cache_resolved_methods_ - cg->LoadRefDisp(arg0_ref, - mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), - arg0_ref, - kNotVolatile); - // Set up direct code if known. - if (direct_code != 0) { - if (direct_code != static_cast<uintptr_t>(-1)) { - if (cu->target64) { - cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code); + case 0: // Get the current Method* [sets kArg0] + // TUNING: we can save a reg copy if Method* has been promoted. + cg->LoadCurrMethodDirect(arg0_ref); + break; + case 1: // Get method->dex_cache_resolved_methods_ + cg->LoadRefDisp(arg0_ref, + mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), + arg0_ref, + kNotVolatile); + // Set up direct code if known. + if (direct_code != 0) { + if (direct_code != static_cast<uintptr_t>(-1)) { + if (cu->target64) { + cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code); + } else { + cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code); + } } else { - cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code); + CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds()); + cg->LoadCodeAddress(target_method, type, kInvokeTgt); } - } else { - CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds()); - cg->LoadCodeAddress(target_method, type, kInvokeTgt); } - } - break; - case 2: // Grab target method* - CHECK_EQ(cu->dex_file, target_method.dex_file); - cg->LoadRefDisp(arg0_ref, - mirror::ObjectArray<mirror::Object>:: - OffsetOfElement(target_method.dex_method_index).Int32Value(), - arg0_ref, - kNotVolatile); - break; - case 3: // Grab the code from the method* - if (direct_code == 0) { - int32_t offset = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - InstructionSetPointerSize(cu->instruction_set)).Int32Value(); - // Get the compiled code address [use *alt_from or kArg0, set kInvokeTgt] - cg->LoadWordDisp(arg0_ref, offset, cg->TargetPtrReg(kInvokeTgt)); - } - break; - default: - return -1; + break; + case 2: // Grab target method* + CHECK_EQ(cu->dex_file, target_method.dex_file); + cg->LoadRefDisp(arg0_ref, + mirror::ObjectArray<mirror::Object>:: + OffsetOfElement(target_method.dex_method_index).Int32Value(), + arg0_ref, + kNotVolatile); + break; + case 3: // Grab the code from the method* + if (direct_code == 0) { + int32_t offset = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + InstructionSetPointerSize(cu->instruction_set)).Int32Value(); + // Get the compiled code address [use *alt_from or kArg0, set kInvokeTgt] + cg->LoadWordDisp(arg0_ref, offset, cg->TargetPtrReg(kInvokeTgt)); + } + break; + default: + return -1; } } return state + 1; diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc index 626b36ea28..1ca8bb618b 100644 --- a/compiler/dex/quick/mips/int_mips.cc +++ b/compiler/dex/quick/mips/int_mips.cc @@ -237,12 +237,12 @@ void MipsMir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { // note the operands are swapped for the mtc1 and mthc1 instr. // Here if dest is fp reg and src is core reg. if (fpuIs32Bit_) { - NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg()); - NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg()); + NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg()); + NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg()); } else { - r_dest = Fp64ToSolo32(r_dest); - NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetReg()); - NewLIR2(kMipsMthc1, r_src.GetHighReg(), r_dest.GetReg()); + r_dest = Fp64ToSolo32(r_dest); + NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetReg()); + NewLIR2(kMipsMthc1, r_src.GetHighReg(), r_dest.GetReg()); } } } else { @@ -309,7 +309,13 @@ RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg1, RegStor RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) { RegStorage t_reg = AllocTemp(); - NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit); + // lit is guarantee to be a 16-bit constant + if (IsUint<16>(lit)) { + NewLIR3(kMipsOri, t_reg.GetReg(), rZERO, lit); + } else { + // Addiu will sign extend the entire width (32 or 64) of the register. + NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit); + } RegLocation rl_result = GenDivRem(rl_dest, reg1, t_reg, is_div); FreeTemp(t_reg); return rl_result; @@ -815,20 +821,20 @@ void MipsMir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, } OpKind op = kOpBkpt; switch (opcode) { - case Instruction::SHL_LONG: - case Instruction::SHL_LONG_2ADDR: - op = kOpLsl; - break; - case Instruction::SHR_LONG: - case Instruction::SHR_LONG_2ADDR: - op = kOpAsr; - break; - case Instruction::USHR_LONG: - case Instruction::USHR_LONG_2ADDR: - op = kOpLsr; - break; - default: - LOG(FATAL) << "Unexpected case: " << opcode; + case Instruction::SHL_LONG: + case Instruction::SHL_LONG_2ADDR: + op = kOpLsl; + break; + case Instruction::SHR_LONG: + case Instruction::SHR_LONG_2ADDR: + op = kOpAsr; + break; + case Instruction::USHR_LONG: + case Instruction::USHR_LONG_2ADDR: + op = kOpLsr; + break; + default: + LOG(FATAL) << "Unexpected case: " << opcode; } rl_shift = LoadValue(rl_shift, kCoreReg); rl_src1 = LoadValueWide(rl_src1, kCoreReg); diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index a94fad7534..4c0bd8378b 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -830,6 +830,10 @@ LIR* MipsMir2Lir::GenAtomic64Store(RegStorage r_base, int displacement, RegStora return OpReg(kOpBlx, r_tgt); } +static dwarf::Reg DwarfCoreReg(int num) { + return dwarf::Reg::MipsCore(num); +} + void MipsMir2Lir::SpillCoreRegs() { if (num_core_spills_ == 0) { return; @@ -839,11 +843,13 @@ void MipsMir2Lir::SpillCoreRegs() { int offset = num_core_spills_ * ptr_size; const RegStorage rs_sp = TargetPtrReg(kSp); OpRegImm(kOpSub, rs_sp, offset); + cfi_.AdjustCFAOffset(offset); for (int reg = 0; mask; mask >>= 1, reg++) { if (mask & 0x1) { offset -= ptr_size; StoreWordDisp(rs_sp, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg)); + cfi_.RelOffset(DwarfCoreReg(reg), offset); } } } @@ -861,9 +867,11 @@ void MipsMir2Lir::UnSpillCoreRegs() { offset -= ptr_size; LoadWordDisp(rs_sp, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg)); + cfi_.Restore(DwarfCoreReg(reg)); } } OpRegImm(kOpAdd, rs_sp, frame_size_); + cfi_.AdjustCFAOffset(-frame_size_); } bool MipsMir2Lir::IsUnconditionalBranch(LIR* lir) { diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index ed8e21e817..961cd4f06b 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -1253,11 +1253,14 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { AppendLIR(NewLIR0(kPseudoPrologueBegin)); GenEntrySequence(&mir_graph_->reg_location_[start_vreg], mir_graph_->GetMethodLoc()); AppendLIR(NewLIR0(kPseudoPrologueEnd)); + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_); } else if (bb->block_type == kExitBlock) { ResetRegPool(); + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_); AppendLIR(NewLIR0(kPseudoEpilogueBegin)); GenExitSequence(); AppendLIR(NewLIR0(kPseudoEpilogueEnd)); + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_); } for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 1624c84437..db59714742 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -29,6 +29,7 @@ #include "dex/quick/resource_mask.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "invoke_type.h" +#include "lazy_debug_frame_opcode_writer.h" #include "leb128.h" #include "safe_map.h" #include "utils/array_ref.h" @@ -635,7 +636,7 @@ class Mir2Lir { RegisterClass ShortyToRegClass(char shorty_type); RegisterClass LocToRegClass(RegLocation loc); int ComputeFrameSize(); - virtual void Materialize(); + void Materialize(); virtual CompiledMethod* GetCompiledMethod(); void MarkSafepointPC(LIR* inst); void MarkSafepointPCAfter(LIR* after); @@ -776,7 +777,7 @@ class Mir2Lir { */ virtual RegLocation EvalLoc(RegLocation loc, int reg_class, bool update); - void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight); + virtual void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight); virtual void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs); void DumpCounts(const RefCounts* arr, int size, const char* msg); virtual void DoPromotion(); @@ -1508,6 +1509,12 @@ class Mir2Lir { return 0; } + /** + * @brief Buffer of DWARF's Call Frame Information opcodes. + * @details It is used by debuggers and other tools to unwind the call stack. + */ + dwarf::LazyDebugFrameOpCodeWriter& cfi() { return cfi_; } + protected: Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); @@ -1770,6 +1777,13 @@ class Mir2Lir { // Update references from prev_mir to mir. void UpdateReferenceVRegs(MIR* mir, MIR* prev_mir, BitVector* references); + /** + * Returns true if the frame spills the given core register. + */ + bool CoreSpillMaskContains(int reg) { + return (core_spill_mask_ & (1u << reg)) != 0; + } + public: // TODO: add accessors for these. LIR* literal_list_; // Constants. @@ -1858,6 +1872,8 @@ class Mir2Lir { // if pc_rel_temp_ isn't nullptr. uint32_t dex_cache_arrays_min_offset_; + dwarf::LazyDebugFrameOpCodeWriter cfi_; + // ABI support class ShortyArg { public: @@ -1917,6 +1933,8 @@ class Mir2Lir { private: static bool SizeMatchesTypeForEntrypoint(OpSize size, Primitive::Type type); + + friend class QuickCFITest; }; // Class Mir2Lir } // namespace art diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc new file mode 100644 index 0000000000..2e62166b7b --- /dev/null +++ b/compiler/dex/quick/quick_cfi_test.cc @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vector> +#include <memory> + +#include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" +#include "cfi_test.h" +#include "dex/compiler_ir.h" +#include "dex/mir_graph.h" +#include "dex/pass_manager.h" +#include "dex/quick/dex_file_to_method_inliner_map.h" +#include "dex/quick/quick_compiler.h" +#include "dex/quick/mir_to_lir.h" +#include "dex/verification_results.h" +#include "driver/compiler_driver.h" +#include "driver/compiler_options.h" +#include "gtest/gtest.h" + +#include "dex/quick/quick_cfi_test_expected.inc" + +namespace art { + +// Run the tests only on host. +#ifndef HAVE_ANDROID_OS + +class QuickCFITest : public CFITest { + public: + // Enable this flag to generate the expected outputs. + static constexpr bool kGenerateExpected = false; + + void TestImpl(InstructionSet isa, const char* isa_str, + const std::vector<uint8_t>& expected_asm, + const std::vector<uint8_t>& expected_cfi) { + // Setup simple compiler context. + ArenaPool pool; + ArenaAllocator arena(&pool); + CompilerOptions compiler_options( + CompilerOptions::kDefaultCompilerFilter, + CompilerOptions::kDefaultHugeMethodThreshold, + CompilerOptions::kDefaultLargeMethodThreshold, + CompilerOptions::kDefaultSmallMethodThreshold, + CompilerOptions::kDefaultTinyMethodThreshold, + CompilerOptions::kDefaultNumDexMethodsThreshold, + true, // generate_gdb_information. + false, + CompilerOptions::kDefaultTopKProfileThreshold, + false, + true, // include_debug_symbols. + false, + false, + false, + false, + nullptr, + new PassManagerOptions(), + nullptr, + false); + VerificationResults verification_results(&compiler_options); + DexFileToMethodInlinerMap method_inliner_map; + std::unique_ptr<const InstructionSetFeatures> isa_features; + std::string error; + isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); + CompilerDriver driver(&compiler_options, &verification_results, &method_inliner_map, + Compiler::kQuick, isa, isa_features.get(), + false, 0, 0, 0, false, false, "", 0, -1, ""); + ClassLinker* linker = nullptr; + CompilationUnit cu(&pool, isa, &driver, linker); + DexFile::CodeItem code_item { 0, 0, 0, 0, 0, 0, { 0 } }; // NOLINT + cu.mir_graph.reset(new MIRGraph(&cu, &arena)); + cu.mir_graph->current_code_item_ = &code_item; + + // Generate empty method with some spills. + std::unique_ptr<Mir2Lir> m2l(QuickCompiler::GetCodeGenerator(&cu, nullptr)); + m2l->frame_size_ = 64u; + m2l->CompilerInitializeRegAlloc(); + for (const auto& info : m2l->reg_pool_->core_regs_) { + if (m2l->num_core_spills_ < 2 && !info->IsTemp() && !info->InUse()) { + m2l->core_spill_mask_ |= 1 << info->GetReg().GetReg(); + m2l->num_core_spills_++; + } + } + for (const auto& info : m2l->reg_pool_->sp_regs_) { + if (m2l->num_fp_spills_ < 2 && !info->IsTemp() && !info->InUse()) { + m2l->fp_spill_mask_ |= 1 << info->GetReg().GetReg(); + m2l->num_fp_spills_++; + } + } + m2l->AdjustSpillMask(); + m2l->GenEntrySequence(NULL, m2l->LocCReturnRef()); + m2l->GenExitSequence(); + m2l->HandleSlowPaths(); + m2l->AssembleLIR(); + std::vector<uint8_t> actual_asm(m2l->code_buffer_.begin(), m2l->code_buffer_.end()); + auto const& cfi_data = m2l->cfi().Patch(actual_asm.size()); + std::vector<uint8_t> actual_cfi(cfi_data->begin(), cfi_data->end()); + EXPECT_EQ(m2l->cfi().GetCurrentPC(), static_cast<int>(actual_asm.size())); + + if (kGenerateExpected) { + GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi); + } else { + EXPECT_EQ(expected_asm, actual_asm); + EXPECT_EQ(expected_cfi, actual_cfi); + } + } +}; + +#define TEST_ISA(isa) \ + TEST_F(QuickCFITest, isa) { \ + std::vector<uint8_t> expected_asm(expected_asm_##isa, \ + expected_asm_##isa + arraysize(expected_asm_##isa)); \ + std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \ + expected_cfi_##isa + arraysize(expected_cfi_##isa)); \ + TestImpl(isa, #isa, expected_asm, expected_cfi); \ + } + +TEST_ISA(kThumb2) +TEST_ISA(kArm64) +TEST_ISA(kX86) +TEST_ISA(kX86_64) +TEST_ISA(kMips) +TEST_ISA(kMips64) + +#endif // HAVE_ANDROID_OS + +} // namespace art diff --git a/compiler/dex/quick/quick_cfi_test_expected.inc b/compiler/dex/quick/quick_cfi_test_expected.inc new file mode 100644 index 0000000000..634fdeead0 --- /dev/null +++ b/compiler/dex/quick/quick_cfi_test_expected.inc @@ -0,0 +1,217 @@ +static constexpr uint8_t expected_asm_kThumb2[] = { + 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0, + 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kThumb2[] = { + 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14, + 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42, + 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x44, 0x0B, 0x0E, + 0x40, +}; +// 0x00000000: push {r5, r6, lr} +// 0x00000002: .cfi_def_cfa_offset: 12 +// 0x00000002: .cfi_offset: r5 at cfa-12 +// 0x00000002: .cfi_offset: r6 at cfa-8 +// 0x00000002: .cfi_offset: r14 at cfa-4 +// 0x00000002: vpush.f32 {s16-s17} +// 0x00000006: .cfi_def_cfa_offset: 20 +// 0x00000006: .cfi_offset_extended: r80 at cfa-20 +// 0x00000006: .cfi_offset_extended: r81 at cfa-16 +// 0x00000006: sub sp, sp, #44 +// 0x00000008: .cfi_def_cfa_offset: 64 +// 0x00000008: str r0, [sp, #0] +// 0x0000000a: .cfi_remember_state +// 0x0000000a: add sp, sp, #44 +// 0x0000000c: .cfi_def_cfa_offset: 20 +// 0x0000000c: vpop.f32 {s16-s17} +// 0x00000010: .cfi_def_cfa_offset: 12 +// 0x00000010: .cfi_restore_extended: r80 +// 0x00000010: .cfi_restore_extended: r81 +// 0x00000010: pop {r5, r6, pc} +// 0x00000012: lsls r0, r0, #0 +// 0x00000014: .cfi_restore_state +// 0x00000014: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kArm64[] = { + 0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF4, 0xD7, 0x02, 0xA9, + 0xFE, 0x1F, 0x00, 0xF9, 0xE0, 0x03, 0x00, 0xB9, 0xE8, 0xA7, 0x41, 0x6D, + 0xF4, 0xD7, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, + 0xC0, 0x03, 0x5F, 0xD6, +}; +static constexpr uint8_t expected_cfi_kArm64[] = { + 0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x94, + 0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06, + 0x49, 0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, + 0x40, +}; +// 0x00000000: sub sp, sp, #0x40 (64) +// 0x00000004: .cfi_def_cfa_offset: 64 +// 0x00000004: stp d8, d9, [sp, #24] +// 0x00000008: .cfi_offset_extended: r72 at cfa-40 +// 0x00000008: .cfi_offset_extended: r73 at cfa-32 +// 0x00000008: stp x20, x21, [sp, #40] +// 0x0000000c: .cfi_offset: r20 at cfa-24 +// 0x0000000c: .cfi_offset: r21 at cfa-16 +// 0x0000000c: str lr, [sp, #56] +// 0x00000010: .cfi_offset: r30 at cfa-8 +// 0x00000010: str w0, [sp] +// 0x00000014: .cfi_remember_state +// 0x00000014: ldp d8, d9, [sp, #24] +// 0x00000018: .cfi_restore_extended: r72 +// 0x00000018: .cfi_restore_extended: r73 +// 0x00000018: ldp x20, x21, [sp, #40] +// 0x0000001c: .cfi_restore: r20 +// 0x0000001c: .cfi_restore: r21 +// 0x0000001c: ldr lr, [sp, #56] +// 0x00000020: .cfi_restore: r30 +// 0x00000020: add sp, sp, #0x40 (64) +// 0x00000024: .cfi_def_cfa_offset: 0 +// 0x00000024: ret +// 0x00000028: .cfi_restore_state +// 0x00000028: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kX86[] = { + 0x83, 0xEC, 0x3C, 0x89, 0x6C, 0x24, 0x34, 0x89, 0x74, 0x24, 0x38, 0x89, + 0x04, 0x24, 0x8B, 0x6C, 0x24, 0x34, 0x8B, 0x74, 0x24, 0x38, 0x83, 0xC4, + 0x3C, 0xC3, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kX86[] = { + 0x43, 0x0E, 0x40, 0x44, 0x85, 0x03, 0x44, 0x86, 0x02, 0x43, 0x0A, 0x44, + 0xC5, 0x44, 0xC6, 0x43, 0x0E, 0x04, 0x43, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: sub esp, 60 +// 0x00000003: .cfi_def_cfa_offset: 64 +// 0x00000003: mov [esp + 52], ebp +// 0x00000007: .cfi_offset: r5 at cfa-12 +// 0x00000007: mov [esp + 56], esi +// 0x0000000b: .cfi_offset: r6 at cfa-8 +// 0x0000000b: mov [esp], eax +// 0x0000000e: .cfi_remember_state +// 0x0000000e: mov ebp, [esp + 52] +// 0x00000012: .cfi_restore: r5 +// 0x00000012: mov esi, [esp + 56] +// 0x00000016: .cfi_restore: r6 +// 0x00000016: add esp, 60 +// 0x00000019: .cfi_def_cfa_offset: 4 +// 0x00000019: ret +// 0x0000001a: addb [eax], al +// 0x0000001c: .cfi_restore_state +// 0x0000001c: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kX86_64[] = { + 0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0x5C, 0x24, 0x28, 0x48, 0x89, 0x6C, + 0x24, 0x30, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, + 0x11, 0x6C, 0x24, 0x20, 0x48, 0x8B, 0xC7, 0x89, 0x3C, 0x24, 0x48, 0x8B, + 0x5C, 0x24, 0x28, 0x48, 0x8B, 0x6C, 0x24, 0x30, 0xF2, 0x44, 0x0F, 0x10, + 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24, 0x20, 0x48, 0x83, + 0xC4, 0x38, 0xC3, 0x00, +}; +static constexpr uint8_t expected_cfi_kX86_64[] = { + 0x44, 0x0E, 0x40, 0x45, 0x83, 0x06, 0x45, 0x86, 0x04, 0x47, 0x9D, 0x0A, + 0x47, 0x9E, 0x08, 0x46, 0x0A, 0x45, 0xC3, 0x45, 0xC6, 0x47, 0xDD, 0x47, + 0xDE, 0x44, 0x0E, 0x08, 0x42, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: subq rsp, 56 +// 0x00000004: .cfi_def_cfa_offset: 64 +// 0x00000004: movq [rsp + 40], rbx +// 0x00000009: .cfi_offset: r3 at cfa-24 +// 0x00000009: movq [rsp + 48], rbp +// 0x0000000e: .cfi_offset: r6 at cfa-16 +// 0x0000000e: movsd [rsp + 24], xmm12 +// 0x00000015: .cfi_offset: r29 at cfa-40 +// 0x00000015: movsd [rsp + 32], xmm13 +// 0x0000001c: .cfi_offset: r30 at cfa-32 +// 0x0000001c: movq rax, rdi +// 0x0000001f: mov [rsp], edi +// 0x00000022: .cfi_remember_state +// 0x00000022: movq rbx, [rsp + 40] +// 0x00000027: .cfi_restore: r3 +// 0x00000027: movq rbp, [rsp + 48] +// 0x0000002c: .cfi_restore: r6 +// 0x0000002c: movsd xmm12, [rsp + 24] +// 0x00000033: .cfi_restore: r29 +// 0x00000033: movsd xmm13, [rsp + 32] +// 0x0000003a: .cfi_restore: r30 +// 0x0000003a: addq rsp, 56 +// 0x0000003e: .cfi_def_cfa_offset: 8 +// 0x0000003e: ret +// 0x0000003f: addb al, al +// 0x00000040: .cfi_restore_state +// 0x00000040: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips[] = { + 0xF4, 0xFF, 0xBD, 0x27, 0x08, 0x00, 0xB2, 0xAF, 0x04, 0x00, 0xB3, 0xAF, + 0x00, 0x00, 0xBF, 0xAF, 0xCC, 0xFF, 0xBD, 0x27, 0x25, 0x10, 0x80, 0x00, + 0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xB2, 0x8F, 0x38, 0x00, 0xB3, 0x8F, + 0x34, 0x00, 0xBF, 0x8F, 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, + 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips[] = { + 0x44, 0x0E, 0x0C, 0x44, 0x92, 0x01, 0x44, 0x93, 0x02, 0x44, 0x9F, 0x03, + 0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44, + 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: addiu r29, r29, -12 +// 0x00000004: .cfi_def_cfa_offset: 12 +// 0x00000004: sw r18, +8(r29) +// 0x00000008: .cfi_offset: r18 at cfa-4 +// 0x00000008: sw r19, +4(r29) +// 0x0000000c: .cfi_offset: r19 at cfa-8 +// 0x0000000c: sw r31, +0(r29) +// 0x00000010: .cfi_offset: r31 at cfa-12 +// 0x00000010: addiu r29, r29, -52 +// 0x00000014: .cfi_def_cfa_offset: 64 +// 0x00000014: or r2, r4, r0 +// 0x00000018: sw r4, +0(r29) +// 0x0000001c: .cfi_remember_state +// 0x0000001c: lw r18, +60(r29) +// 0x00000020: .cfi_restore: r18 +// 0x00000020: lw r19, +56(r29) +// 0x00000024: .cfi_restore: r19 +// 0x00000024: lw r31, +52(r29) +// 0x00000028: .cfi_restore: r31 +// 0x00000028: addiu r29, r29, 64 +// 0x0000002c: .cfi_def_cfa_offset: 0 +// 0x0000002c: jalr r0, r31 +// 0x00000030: nop +// 0x00000034: .cfi_restore_state +// 0x00000034: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips64[] = { + 0xE8, 0xFF, 0xBD, 0x67, 0x10, 0x00, 0xB2, 0xFF, 0x08, 0x00, 0xB3, 0xFF, + 0x00, 0x00, 0xBF, 0xFF, 0xD8, 0xFF, 0xBD, 0x67, 0x25, 0x10, 0x80, 0x00, + 0x00, 0x00, 0xA4, 0xAF, 0x38, 0x00, 0xB2, 0xDF, 0x30, 0x00, 0xB3, 0xDF, + 0x28, 0x00, 0xBF, 0xDF, 0x40, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, + 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips64[] = { + 0x44, 0x0E, 0x18, 0x44, 0x92, 0x02, 0x44, 0x93, 0x04, 0x44, 0x9F, 0x06, + 0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44, + 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: daddiu r29, r29, -24 +// 0x00000004: .cfi_def_cfa_offset: 24 +// 0x00000004: sd r18, +16(r29) +// 0x00000008: .cfi_offset: r18 at cfa-8 +// 0x00000008: sd r19, +8(r29) +// 0x0000000c: .cfi_offset: r19 at cfa-16 +// 0x0000000c: sd r31, +0(r29) +// 0x00000010: .cfi_offset: r31 at cfa-24 +// 0x00000010: daddiu r29, r29, -40 +// 0x00000014: .cfi_def_cfa_offset: 64 +// 0x00000014: or r2, r4, r0 +// 0x00000018: sw r4, +0(r29) +// 0x0000001c: .cfi_remember_state +// 0x0000001c: ld r18, +56(r29) +// 0x00000020: .cfi_restore: r18 +// 0x00000020: ld r19, +48(r29) +// 0x00000024: .cfi_restore: r19 +// 0x00000024: ld r31, +40(r29) +// 0x00000028: .cfi_restore: r31 +// 0x00000028: daddiu r29, r29, 64 +// 0x0000002c: .cfi_def_cfa_offset: 0 +// 0x0000002c: jr r31 +// 0x00000030: nop +// 0x00000034: .cfi_restore_state +// 0x00000034: .cfi_def_cfa_offset: 64 + diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc index 01652d6560..fc3e687469 100644 --- a/compiler/dex/quick/quick_compiler.cc +++ b/compiler/dex/quick/quick_compiler.cc @@ -793,21 +793,7 @@ uintptr_t QuickCompiler::GetEntryPointOf(mirror::ArtMethod* method) const { InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet()))); } -bool QuickCompiler::WriteElf(art::File* file, - OatWriter* oat_writer, - const std::vector<const art::DexFile*>& dex_files, - const std::string& android_root, - bool is_host) const { - if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) { - return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, - *GetCompilerDriver()); - } else { - return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, - *GetCompilerDriver()); - } -} - -Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const { +Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) { UNUSED(compilation_unit); Mir2Lir* mir_to_lir = nullptr; switch (cu->instruction_set) { diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h index 5153a9e82e..8d2c324a70 100644 --- a/compiler/dex/quick/quick_compiler.h +++ b/compiler/dex/quick/quick_compiler.h @@ -52,15 +52,7 @@ class QuickCompiler : public Compiler { uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - bool WriteElf(art::File* file, - OatWriter* oat_writer, - const std::vector<const art::DexFile*>& dex_files, - const std::string& android_root, - bool is_host) const - OVERRIDE - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - - Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const; + static Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit); void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE; diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 118ab1d843..af19f5eaed 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -544,7 +544,6 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86CallI, kCall, IS_UNARY_OP | IS_BRANCH, { 0, 0, 0xE8, 0, 0, 0, 0, 4, false }, "CallI", "!0d" }, { kX86Ret, kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xC3, 0, 0, 0, 0, 0, false }, "Ret", "" }, - { kX86StartOfMethod, kMacro, IS_UNARY_OP | REG_DEF0 | SETS_CCODES, { 0, 0, 0, 0, 0, 0, 0, 0, false }, "StartOfMethod", "!0r" }, { kX86PcRelLoadRA, kPcRel, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0, false }, "PcRelLoadRA", "!0r,[!1r+!2r<<!3d+!4p]" }, { kX86PcRelAdr, kPcRel, IS_LOAD | IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB8, 0, 0, 0, 0, 4, false }, "PcRelAdr", "!0r,!1p" }, { kX86RepneScasw, kNullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0, false }, "RepNE ScasW", "" }, @@ -865,13 +864,6 @@ size_t X86Mir2Lir::GetInsnSize(LIR* lir) { DCHECK_EQ(entry->opcode, kX86PcRelAdr); return 5; // opcode with reg + 4 byte immediate } - case kMacro: // lir operands - 0: reg - DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod)); - return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ + - ComputeSize(&X86Mir2Lir::EncodingMap[cu_->target64 ? kX86Sub64RI : kX86Sub32RI], - lir->operands[0], NO_REG, NO_REG, 0) - - // Shorter ax encoding. - (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0); case kUnimplemented: break; } @@ -1586,8 +1578,8 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t int32_t raw_index, int scale, int32_t table_or_disp) { int disp; if (entry->opcode == kX86PcRelLoadRA) { - const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(table_or_disp); - disp = tab_rec->offset; + const SwitchTable* tab_rec = UnwrapPointer<SwitchTable>(table_or_disp); + disp = tab_rec->offset - tab_rec->anchor->offset; } else { DCHECK(entry->opcode == kX86PcRelAdr); const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(raw_base_or_table); @@ -1621,23 +1613,6 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t DCHECK_EQ(0, entry->skeleton.ax_opcode); } -void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset) { - DCHECK_EQ(entry->opcode, kX86StartOfMethod) << entry->name; - DCHECK_EQ(false, entry->skeleton.r8_form); - EmitPrefix(entry, raw_reg, NO_REG, NO_REG); - code_buffer_.push_back(0xE8); // call +0 - code_buffer_.push_back(0); - code_buffer_.push_back(0); - code_buffer_.push_back(0); - code_buffer_.push_back(0); - - uint8_t low_reg = LowRegisterBits(raw_reg); - code_buffer_.push_back(0x58 + low_reg); // pop reg - - EmitRegImm(&X86Mir2Lir::EncodingMap[cu_->target64 ? kX86Sub64RI : kX86Sub32RI], - raw_reg, offset + 5 /* size of call +0 */); -} - void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) { UNIMPLEMENTED(WARNING) << "encoding kind for " << entry->name << " " << BuildInsnString(entry->fmt, lir, 0); @@ -1780,7 +1755,8 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { // Offset is relative to next instruction. lir->operands[2] = target - (lir->offset + lir->flags.size); } else { - lir->operands[2] = target; + const LIR* anchor = UnwrapPointer<LIR>(lir->operands[4]); + lir->operands[2] = target - anchor->offset; int newSize = GetInsnSize(lir); if (newSize != lir->flags.size) { lir->flags.size = newSize; @@ -1951,9 +1927,6 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { EmitPcRel(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3], lir->operands[4]); break; - case kMacro: // lir operands - 0: reg - EmitMacro(entry, lir->operands[0], lir->offset); - break; case kNop: // TODO: these instruction kinds are missing implementations. case kThreadReg: case kRegArrayImm: @@ -2044,9 +2017,13 @@ void X86Mir2Lir::AssembleLIR() { cu_->NewTimingSplit("Assemble"); // We will remove the method address if we never ended up using it - if (store_method_addr_ && !store_method_addr_used_) { - setup_method_address_[0]->flags.is_nop = true; - setup_method_address_[1]->flags.is_nop = true; + if (pc_rel_base_reg_.Valid() && !pc_rel_base_reg_used_) { + if (kIsDebugBuild) { + LOG(WARNING) << "PC-relative addressing base promoted but unused in " + << PrettyMethod(cu_->method_idx, *cu_->dex_file); + } + setup_pc_rel_base_reg_->flags.is_nop = true; + NEXT_LIR(setup_pc_rel_base_reg_)->flags.is_nop = true; } AssignOffsets(); diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index 18fae17d70..d7a5eb04db 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -97,29 +97,23 @@ void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocat // Add the offset from the table to the table base. OpRegReg(kOpAdd, addr_for_jump, table_base); + tab_rec->anchor = nullptr; // Unused for x86-64. } else { - // Materialize a pointer to the switch table. - RegStorage start_of_method_reg; - if (base_of_code_ != nullptr) { - // We can use the saved value. - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - rl_method = LoadValue(rl_method, kCoreReg); - start_of_method_reg = rl_method.reg; - store_method_addr_used_ = true; - } else { - start_of_method_reg = AllocTempRef(); - NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg()); - } + // Get the PC to a register and get the anchor. + LIR* anchor; + RegStorage r_pc = GetPcAndAnchor(&anchor); + // Load the displacement from the switch table. addr_for_jump = AllocTemp(); - NewLIR5(kX86PcRelLoadRA, addr_for_jump.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(), + NewLIR5(kX86PcRelLoadRA, addr_for_jump.GetReg(), r_pc.GetReg(), keyReg.GetReg(), 2, WrapPointer(tab_rec)); - // Add displacement to start of method. - OpRegReg(kOpAdd, addr_for_jump, start_of_method_reg); + // Add displacement and r_pc to get the address. + OpRegReg(kOpAdd, addr_for_jump, r_pc); + tab_rec->anchor = anchor; } // ..and go! - tab_rec->anchor = NewLIR1(kX86JmpR, addr_for_jump.GetReg()); + NewLIR1(kX86JmpR, addr_for_jump.GetReg()); /* branch_over target here */ LIR* target = NewLIR0(kPseudoTargetLabel); @@ -150,6 +144,10 @@ void X86Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) { FreeTemp(reg_card_no); } +static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) { + return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num); +} + void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { /* * On entry, rX86_ARG0, rX86_ARG1, rX86_ARG2 are live. Let the register @@ -184,7 +182,9 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { } /* Build frame, return address already on stack */ + cfi_.SetCurrentCFAOffset(GetInstructionSetPointerSize(cu_->instruction_set)); OpRegImm(kOpSub, rs_rSP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set)); + cfi_.DefCFAOffset(frame_size_); /* Spill core callee saves */ SpillCoreRegs(); @@ -201,10 +201,12 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { GenerateTargetLabel(kPseudoThrowTarget); const RegStorage local_rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; m2l_->OpRegImm(kOpAdd, local_rs_rSP, sp_displace_); + m2l_->cfi().AdjustCFAOffset(-sp_displace_); m2l_->ClobberCallerSave(); // Assumes codegen and target are in thumb2 mode. m2l_->CallHelper(RegStorage::InvalidReg(), kQuickThrowStackOverflow, false /* MarkSafepointPC */, false /* UseLink */); + m2l_->cfi().AdjustCFAOffset(sp_displace_); } private: @@ -235,14 +237,12 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { FlushIns(ArgLocs, rl_method); - if (base_of_code_ != nullptr) { - RegStorage method_start = TargetPtrReg(kArg0); - // We have been asked to save the address of the method start for later use. - setup_method_address_[0] = NewLIR1(kX86StartOfMethod, method_start.GetReg()); - int displacement = SRegOffset(base_of_code_->s_reg_low); - // Native pointer - must be natural word size. - setup_method_address_[1] = StoreBaseDisp(rs_rSP, displacement, method_start, - cu_->target64 ? k64 : k32, kNotVolatile); + // We can promote the PC of an anchor for PC-relative addressing to a register + // if it's used at least twice. Without investigating where we should lazily + // load the reference, we conveniently load it after flushing inputs. + if (pc_rel_base_reg_.Valid()) { + DCHECK(!cu_->target64); + setup_pc_rel_base_reg_ = OpLoadPc(pc_rel_base_reg_); } FreeTemp(arg0); @@ -251,6 +251,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { } void X86Mir2Lir::GenExitSequence() { + cfi_.RememberState(); /* * In the exit path, rX86_RET0/rX86_RET1 are live - make sure they aren't * allocated by the register utilities as temps. @@ -264,7 +265,12 @@ void X86Mir2Lir::GenExitSequence() { const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; int adjust = frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set); OpRegImm(kOpAdd, rs_rSP, adjust); + cfi_.AdjustCFAOffset(-adjust); + // There is only the return PC on the stack now. NewLIR0(kX86Ret); + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size_); } void X86Mir2Lir::GenSpecialExitSequence() { @@ -275,6 +281,8 @@ void X86Mir2Lir::GenSpecialEntryForSuspend() { // Keep 16-byte stack alignment, there's already the return address, so // - for 32-bit push EAX, i.e. ArtMethod*, ESI, EDI, // - for 64-bit push RAX, i.e. ArtMethod*. + const int kRegSize = cu_->target64 ? 8 : 4; + cfi_.SetCurrentCFAOffset(kRegSize); // Return address. if (!cu_->target64) { DCHECK(!IsTemp(rs_rSI)); DCHECK(!IsTemp(rs_rDI)); @@ -292,17 +300,29 @@ void X86Mir2Lir::GenSpecialEntryForSuspend() { fp_vmap_table_.clear(); if (!cu_->target64) { NewLIR1(kX86Push32R, rs_rDI.GetReg()); + cfi_.AdjustCFAOffset(kRegSize); + cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()), 0); NewLIR1(kX86Push32R, rs_rSI.GetReg()); + cfi_.AdjustCFAOffset(kRegSize); + cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()), 0); } NewLIR1(kX86Push32R, TargetReg(kArg0, kRef).GetReg()); // ArtMethod* + cfi_.AdjustCFAOffset(kRegSize); + // Do not generate CFI for scratch register. } void X86Mir2Lir::GenSpecialExitForSuspend() { + const int kRegSize = cu_->target64 ? 8 : 4; // Pop the frame. (ArtMethod* no longer needed but restore it anyway.) NewLIR1(kX86Pop32R, TargetReg(kArg0, kRef).GetReg()); // ArtMethod* + cfi_.AdjustCFAOffset(-kRegSize); if (!cu_->target64) { NewLIR1(kX86Pop32R, rs_rSI.GetReg()); + cfi_.AdjustCFAOffset(-kRegSize); + cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum())); NewLIR1(kX86Pop32R, rs_rDI.GetReg()); + cfi_.AdjustCFAOffset(-kRegSize); + cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum())); } } diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index a98a99ec4e..72580a3e39 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -28,7 +28,7 @@ namespace art { -class X86Mir2Lir : public Mir2Lir { +class X86Mir2Lir FINAL : public Mir2Lir { protected: class InToRegStorageX86_64Mapper : public InToRegStorageMapper { public: @@ -375,6 +375,10 @@ class X86Mir2Lir : public Mir2Lir { */ LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE; + void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) OVERRIDE; + void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) OVERRIDE; + void DoPromotion() OVERRIDE; + /* * @brief Handle x86 specific literals */ @@ -488,7 +492,6 @@ class X86Mir2Lir : public Mir2Lir { void EmitCallThread(const X86EncodingMap* entry, int32_t disp); void EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base_or_table, int32_t raw_index, int scale, int32_t table_or_disp); - void EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset); void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir); void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val, ConditionCode ccode); @@ -859,12 +862,6 @@ class X86Mir2Lir : public Mir2Lir { void SpillFPRegs(); /* - * @brief Perform MIR analysis before compiling method. - * @note Invokes Mir2LiR::Materialize after analysis. - */ - void Materialize(); - - /* * Mir2Lir's UpdateLoc() looks to see if the Dalvik value is currently live in any temp register * without regard to data type. In practice, this can result in UpdateLoc returning a * location record for a Dalvik float value in a core register, and vis-versa. For targets @@ -878,67 +875,39 @@ class X86Mir2Lir : public Mir2Lir { RegLocation UpdateLocWideTyped(RegLocation loc); /* - * @brief Analyze MIR before generating code, to prepare for the code generation. - */ - void AnalyzeMIR(); - - /* - * @brief Analyze one basic block. - * @param bb Basic block to analyze. - */ - void AnalyzeBB(BasicBlock* bb); - - /* - * @brief Analyze one extended MIR instruction - * @param opcode MIR instruction opcode. - * @param bb Basic block containing instruction. - * @param mir Extended instruction to analyze. - */ - void AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir); - - /* - * @brief Analyze one MIR instruction - * @param opcode MIR instruction opcode. - * @param bb Basic block containing instruction. - * @param mir Instruction to analyze. - */ - virtual void AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir); - - /* * @brief Analyze one MIR float/double instruction * @param opcode MIR instruction opcode. - * @param bb Basic block containing instruction. * @param mir Instruction to analyze. + * @return true iff the instruction needs to load a literal using PC-relative addressing. */ - virtual void AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir); + bool AnalyzeFPInstruction(int opcode, MIR* mir); /* * @brief Analyze one use of a double operand. * @param rl_use Double RegLocation for the operand. + * @return true iff the instruction needs to load a literal using PC-relative addressing. */ - void AnalyzeDoubleUse(RegLocation rl_use); + bool AnalyzeDoubleUse(RegLocation rl_use); /* * @brief Analyze one invoke-static MIR instruction - * @param opcode MIR instruction opcode. - * @param bb Basic block containing instruction. * @param mir Instruction to analyze. + * @return true iff the instruction needs to load a literal using PC-relative addressing. */ - void AnalyzeInvokeStatic(int opcode, BasicBlock* bb, MIR* mir); + bool AnalyzeInvokeStaticIntrinsic(MIR* mir); // Information derived from analysis of MIR - // The compiler temporary for the code address of the method. - CompilerTemp *base_of_code_; - - // Have we decided to compute a ptr to code and store in temporary VR? - bool store_method_addr_; + // The base register for PC-relative addressing if promoted (32-bit only). + RegStorage pc_rel_base_reg_; - // Have we used the stored method address? - bool store_method_addr_used_; + // Have we actually used the pc_rel_base_reg_? + bool pc_rel_base_reg_used_; - // Instructions to remove if we didn't use the stored method address. - LIR* setup_method_address_[2]; + // Pointer to the "call +0" insn that sets up the promoted register for PC-relative addressing. + // The anchor "pop" insn is NEXT_LIR(setup_pc_rel_base_reg_). The whole "call +0; pop <reg>" + // sequence will be removed in AssembleLIR() if we do not actually use PC-relative addressing. + LIR* setup_pc_rel_base_reg_; // There are 2 chained insns (no reordering allowed). // Instructions needing patching with Method* values. ArenaVector<LIR*> method_address_insns_; @@ -992,6 +961,14 @@ class X86Mir2Lir : public Mir2Lir { uintptr_t direct_code, uintptr_t direct_method, InvokeType type); + LIR* OpLoadPc(RegStorage r_dest); + RegStorage GetPcAndAnchor(LIR** anchor, RegStorage r_tmp = RegStorage::InvalidReg()); + + // When we don't know the proper offset for the value, pick one that will force + // 4 byte offset. We will fix this up in the assembler or linker later to have + // the right value. + static constexpr int kDummy32BitOffset = 256; + static const X86EncodingMap EncodingMap[kX86Last]; friend std::ostream& operator<<(std::ostream& os, const X86OpCode& rhs); diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc index d8616a7bf3..cfe0480c54 100755 --- a/compiler/dex/quick/x86/fp_x86.cc +++ b/compiler/dex/quick/x86/fp_x86.cc @@ -756,24 +756,6 @@ bool X86Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) branch_nan->target = NewLIR0(kPseudoTargetLabel); LoadConstantWide(rl_result.reg, INT64_C(0x7ff8000000000000)); - // The base_of_code_ compiler temp is non-null when it is reserved - // for being able to do data accesses relative to method start. - if (base_of_code_ != nullptr) { - // Loading from the constant pool may have used base of code register. - // However, the code here generates logic in diamond shape and not all - // paths load base of code register. Therefore, we ensure it is clobbered so - // that the temp caching system does not believe it is live at merge point. - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - if (rl_method.wide) { - rl_method = UpdateLocWide(rl_method); - } else { - rl_method = UpdateLoc(rl_method); - } - if (rl_method.location == kLocPhysReg) { - Clobber(rl_method.reg); - } - } - LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0); // Handle Min/Max. Copy greater/lesser value from src2. branch_cond1->target = NewLIR0(kPseudoTargetLabel); diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 5def5c8bb0..1043815e10 100755 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -830,6 +830,10 @@ RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, return rl_result; } +static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) { + return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num); +} + bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); @@ -928,6 +932,7 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { // Do we have a free register for intermediate calculations? RegStorage tmp = AllocTemp(false); + const int kRegSize = cu_->target64 ? 8 : 4; if (tmp == RegStorage::InvalidReg()) { /* * No, will use 'edi'. @@ -946,6 +951,11 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { IsTemp(rl_result.reg.GetHigh())); tmp = rs_rDI; NewLIR1(kX86Push32R, tmp.GetReg()); + cfi_.AdjustCFAOffset(kRegSize); + // Record cfi only if it is not already spilled. + if (!CoreSpillMaskContains(tmp.GetReg())) { + cfi_.RelOffset(DwarfCoreReg(cu_->target64, tmp.GetReg()), 0); + } } // Now we are ready to do calculations. @@ -957,6 +967,10 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { // Let's put pop 'edi' here to break a bit the dependency chain. if (tmp == rs_rDI) { NewLIR1(kX86Pop32R, tmp.GetReg()); + cfi_.AdjustCFAOffset(-kRegSize); + if (!CoreSpillMaskContains(tmp.GetReg())) { + cfi_.Restore(DwarfCoreReg(cu_->target64, tmp.GetReg())); + } } else { FreeTemp(tmp); } @@ -1104,6 +1118,7 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { // If is_long, high half is in info->args[5] RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object // If is_long, high half is in info->args[7] + const int kRegSize = cu_->target64 ? 8 : 4; if (is_long && cu_->target64) { // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX. @@ -1125,7 +1140,6 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { FreeTemp(rs_r0q); } else if (is_long) { // TODO: avoid unnecessary loads of SI and DI when the values are in registers. - // TODO: CFI support. FlushAllRegs(); LockCallTemps(); RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX); @@ -1148,11 +1162,21 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { NewLIR1(kX86Push32R, rs_rDI.GetReg()); MarkTemp(rs_rDI); LockTemp(rs_rDI); + cfi_.AdjustCFAOffset(kRegSize); + // Record cfi only if it is not already spilled. + if (!CoreSpillMaskContains(rs_rDI.GetReg())) { + cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0); + } } if (push_si) { NewLIR1(kX86Push32R, rs_rSI.GetReg()); MarkTemp(rs_rSI); LockTemp(rs_rSI); + cfi_.AdjustCFAOffset(kRegSize); + // Record cfi only if it is not already spilled. + if (!CoreSpillMaskContains(rs_rSI.GetReg())) { + cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetReg()), 0); + } } ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u); @@ -1183,11 +1207,19 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { FreeTemp(rs_rSI); UnmarkTemp(rs_rSI); NewLIR1(kX86Pop32R, rs_rSI.GetReg()); + cfi_.AdjustCFAOffset(-kRegSize); + if (!CoreSpillMaskContains(rs_rSI.GetReg())) { + cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum())); + } } if (push_di) { FreeTemp(rs_rDI); UnmarkTemp(rs_rDI); NewLIR1(kX86Pop32R, rs_rDI.GetReg()); + cfi_.AdjustCFAOffset(-kRegSize); + if (!CoreSpillMaskContains(rs_rDI.GetReg())) { + cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum())); + } } FreeCallTemps(); } else { @@ -1324,11 +1356,6 @@ bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) { return true; } -// When we don't know the proper offset for the value, pick one that will force -// 4 byte offset. We will fix this up in the assembler or linker later to have -// the right value. -static constexpr int kDummy32BitOffset = 256; - void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { if (cu_->target64) { // We can do this directly using RIP addressing. @@ -1339,27 +1366,48 @@ void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { return; } - CHECK(base_of_code_ != nullptr); - - // Address the start of the method - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - if (rl_method.wide) { - LoadValueDirectWideFixed(rl_method, reg); - } else { - LoadValueDirectFixed(rl_method, reg); - } - store_method_addr_used_ = true; + // Get the PC to a register and get the anchor. + LIR* anchor; + RegStorage r_pc = GetPcAndAnchor(&anchor); // Load the proper value from the literal area. ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); - LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), reg.GetReg(), kDummy32BitOffset); + LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), r_pc.GetReg(), kDummy32BitOffset); + res->operands[4] = WrapPointer(anchor); res->target = target; res->flags.fixup = kFixupLoad; } bool X86Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const { - // TODO: Implement for 32-bit. - return cu_->target64 && dex_cache_arrays_layout_.Valid(); + return dex_cache_arrays_layout_.Valid(); +} + +LIR* X86Mir2Lir::OpLoadPc(RegStorage r_dest) { + DCHECK(!cu_->target64); + LIR* call = NewLIR1(kX86CallI, 0); + call->flags.fixup = kFixupLabel; + LIR* pop = NewLIR1(kX86Pop32R, r_dest.GetReg()); + pop->flags.fixup = kFixupLabel; + DCHECK(NEXT_LIR(call) == pop); + return call; +} + +RegStorage X86Mir2Lir::GetPcAndAnchor(LIR** anchor, RegStorage r_tmp) { + if (pc_rel_base_reg_.Valid()) { + DCHECK(setup_pc_rel_base_reg_ != nullptr); + *anchor = NEXT_LIR(setup_pc_rel_base_reg_); + DCHECK(*anchor != nullptr); + DCHECK_EQ((*anchor)->opcode, kX86Pop32R); + pc_rel_base_reg_used_ = true; + return pc_rel_base_reg_; + } else { + RegStorage r_pc = r_tmp.Valid() ? r_tmp : AllocTempRef(); + LIR* load_pc = OpLoadPc(r_pc); + *anchor = NEXT_LIR(load_pc); + DCHECK(*anchor != nullptr); + DCHECK_EQ((*anchor)->opcode, kX86Pop32R); + return r_pc; + } } void X86Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, @@ -1369,11 +1417,18 @@ void X86Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, mov->flags.fixup = kFixupLabel; mov->operands[3] = WrapPointer(dex_file); mov->operands[4] = offset; + mov->target = mov; // Used for pc_insn_offset (not used by x86-64 relative patcher). dex_cache_access_insns_.push_back(mov); } else { - // TODO: Implement for 32-bit. - LOG(FATAL) << "Unimplemented."; - UNREACHABLE(); + // Get the PC to a register and get the anchor. Use r_dest for the temp if needed. + LIR* anchor; + RegStorage r_pc = GetPcAndAnchor(&anchor, r_dest); + LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), r_pc.GetReg(), kDummy32BitOffset); + mov->flags.fixup = kFixupLabel; + mov->operands[3] = WrapPointer(dex_file); + mov->operands[4] = offset; + mov->target = anchor; // Used for pc_insn_offset. + dex_cache_access_insns_.push_back(mov); } } diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 081f80fed6..a16e242d08 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -724,6 +724,14 @@ int X86Mir2Lir::NumReservableVectorRegisters(bool long_or_fp) { return long_or_fp ? num_vector_temps - 2 : num_vector_temps - 1; } +static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) { + return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num); +} + +static dwarf::Reg DwarfFpReg(bool is_x86_64, int num) { + return is_x86_64 ? dwarf::Reg::X86_64Fp(num) : dwarf::Reg::X86Fp(num); +} + void X86Mir2Lir::SpillCoreRegs() { if (num_core_spills_ == 0) { return; @@ -734,11 +742,11 @@ void X86Mir2Lir::SpillCoreRegs() { frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); OpSize size = cu_->target64 ? k64 : k32; const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; - for (int reg = 0; mask; mask >>= 1, reg++) { - if (mask & 0x1) { - StoreBaseDisp(rs_rSP, offset, - cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg), - size, kNotVolatile); + for (int reg = 0; mask != 0u; mask >>= 1, reg++) { + if ((mask & 0x1) != 0u) { + RegStorage r_src = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg); + StoreBaseDisp(rs_rSP, offset, r_src, size, kNotVolatile); + cfi_.RelOffset(DwarfCoreReg(cu_->target64, reg), offset); offset += GetInstructionSetPointerSize(cu_->instruction_set); } } @@ -753,10 +761,11 @@ void X86Mir2Lir::UnSpillCoreRegs() { int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); OpSize size = cu_->target64 ? k64 : k32; const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; - for (int reg = 0; mask; mask >>= 1, reg++) { - if (mask & 0x1) { - LoadBaseDisp(rs_rSP, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg), - size, kNotVolatile); + for (int reg = 0; mask != 0u; mask >>= 1, reg++) { + if ((mask & 0x1) != 0u) { + RegStorage r_dest = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg); + LoadBaseDisp(rs_rSP, offset, r_dest, size, kNotVolatile); + cfi_.Restore(DwarfCoreReg(cu_->target64, reg)); offset += GetInstructionSetPointerSize(cu_->instruction_set); } } @@ -770,9 +779,10 @@ void X86Mir2Lir::SpillFPRegs() { int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_)); const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; - for (int reg = 0; mask; mask >>= 1, reg++) { - if (mask & 0x1) { + for (int reg = 0; mask != 0u; mask >>= 1, reg++) { + if ((mask & 0x1) != 0u) { StoreBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg), k64, kNotVolatile); + cfi_.RelOffset(DwarfFpReg(cu_->target64, reg), offset); offset += sizeof(double); } } @@ -785,10 +795,11 @@ void X86Mir2Lir::UnSpillFPRegs() { int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_)); const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; - for (int reg = 0; mask; mask >>= 1, reg++) { - if (mask & 0x1) { + for (int reg = 0; mask != 0u; mask >>= 1, reg++) { + if ((mask & 0x1) != 0u) { LoadBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg), k64, kNotVolatile); + cfi_.Restore(DwarfFpReg(cu_->target64, reg)); offset += sizeof(double); } } @@ -824,7 +835,9 @@ RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatil X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) : Mir2Lir(cu, mir_graph, arena), in_to_reg_storage_x86_64_mapper_(this), in_to_reg_storage_x86_mapper_(this), - base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false), + pc_rel_base_reg_(RegStorage::InvalidReg()), + pc_rel_base_reg_used_(false), + setup_pc_rel_base_reg_(nullptr), method_address_insns_(arena->Adapter()), class_type_address_insns_(arena->Adapter()), call_method_insns_(arena->Adapter()), @@ -833,12 +846,11 @@ X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* method_address_insns_.reserve(100); class_type_address_insns_.reserve(100); call_method_insns_.reserve(100); - store_method_addr_used_ = false; - for (int i = 0; i < kX86Last; i++) { - DCHECK_EQ(X86Mir2Lir::EncodingMap[i].opcode, i) - << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name - << " is wrong: expecting " << i << ", seeing " - << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode); + for (int i = 0; i < kX86Last; i++) { + DCHECK_EQ(X86Mir2Lir::EncodingMap[i].opcode, i) + << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name + << " is wrong: expecting " << i << ", seeing " + << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode); } } @@ -923,14 +935,6 @@ void X86Mir2Lir::DumpRegLocation(RegLocation loc) { << ", orig: " << loc.orig_sreg; } -void X86Mir2Lir::Materialize() { - // A good place to put the analysis before starting. - AnalyzeMIR(); - - // Now continue with regular code generation. - Mir2Lir::Materialize(); -} - void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type, SpecialTargetRegister symbolic_reg) { /* @@ -1105,7 +1109,8 @@ void X86Mir2Lir::InstallLiteralPools() { // The offset to patch is the last 4 bytes of the instruction. int patch_offset = p->offset + p->flags.size - 4; DCHECK(!p->flags.is_nop); - patches_.push_back(LinkerPatch::DexCacheArrayPatch(patch_offset, dex_file, p->offset, offset)); + patches_.push_back(LinkerPatch::DexCacheArrayPatch(patch_offset, dex_file, + p->target->offset, offset)); } // And do the normal processing. @@ -1315,6 +1320,11 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { if (!cu_->target64) { // EDI is promotable in 32-bit mode. NewLIR1(kX86Push32R, rs_rDI.GetReg()); + cfi_.AdjustCFAOffset(4); + // Record cfi only if it is not already spilled. + if (!CoreSpillMaskContains(rs_rDI.GetReg())) { + cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0); + } } if (zero_based) { @@ -1410,8 +1420,13 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { // And join up at the end. all_done->target = NewLIR0(kPseudoTargetLabel); - if (!cu_->target64) + if (!cu_->target64) { NewLIR1(kX86Pop32R, rs_rDI.GetReg()); + cfi_.AdjustCFAOffset(-4); + if (!CoreSpillMaskContains(rs_rDI.GetReg())) { + cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetReg())); + } + } // Out of line code returns here. if (slowpath_branch != nullptr) { @@ -1560,20 +1575,17 @@ void X86Mir2Lir::AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir) { LIR* load; ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); if (cu_->target64) { - load = NewLIR3(opcode, reg, kRIPReg, 256 /* bogus */); + load = NewLIR3(opcode, reg, kRIPReg, kDummy32BitOffset); } else { - // Address the start of the method. - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - if (rl_method.wide) { - rl_method = LoadValueWide(rl_method, kCoreReg); - } else { - rl_method = LoadValue(rl_method, kCoreReg); + // Get the PC to a register and get the anchor. + LIR* anchor; + RegStorage r_pc = GetPcAndAnchor(&anchor); + + load = NewLIR3(opcode, reg, r_pc.GetReg(), kDummy32BitOffset); + load->operands[4] = WrapPointer(anchor); + if (IsTemp(r_pc)) { + FreeTemp(r_pc); } - - load = NewLIR3(opcode, reg, rl_method.reg.GetReg(), 256 /* bogus */); - - // The literal pool needs position independent logic. - store_method_addr_used_ = true; } load->flags.fixup = kFixupLoad; load->target = data_target; diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index 893b98a49d..efcb9eefb5 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -17,6 +17,7 @@ #include "codegen_x86.h" #include "base/logging.h" +#include "dex/mir_graph.h" #include "dex/quick/mir_to_lir-inl.h" #include "dex/dataflow_iterator-inl.h" #include "dex/quick/dex_file_method_inliner.h" @@ -574,7 +575,7 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { DCHECK(r_dest.IsDouble()); if (value == 0) { return NewLIR2(kX86XorpdRR, low_reg_val, low_reg_val); - } else if (base_of_code_ != nullptr || cu_->target64) { + } else if (pc_rel_base_reg_.Valid() || cu_->target64) { // We will load the value from the literal area. LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi); if (data_target == NULL) { @@ -589,17 +590,16 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { if (cu_->target64) { res = NewLIR3(kX86MovsdRM, low_reg_val, kRIPReg, 256 /* bogus */); } else { - // Address the start of the method. - RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); - if (rl_method.wide) { - rl_method = LoadValueWide(rl_method, kCoreReg); - } else { - rl_method = LoadValue(rl_method, kCoreReg); - } + // Get the PC to a register and get the anchor. + LIR* anchor; + RegStorage r_pc = GetPcAndAnchor(&anchor); - res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val), + res = LoadBaseDisp(r_pc, kDummy32BitOffset, RegStorage::FloatSolo64(low_reg_val), kDouble, kNotVolatile); - store_method_addr_used_ = true; + res->operands[4] = WrapPointer(anchor); + if (IsTemp(r_pc)) { + FreeTemp(r_pc); + } } res->target = data_target; res->flags.fixup = kFixupLoad; @@ -954,82 +954,14 @@ LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegS return branch; } -void X86Mir2Lir::AnalyzeMIR() { - // Assume we don't need a pointer to the base of the code. - cu_->NewTimingSplit("X86 MIR Analysis"); - store_method_addr_ = false; - - // Walk the MIR looking for interesting items. - PreOrderDfsIterator iter(mir_graph_); - BasicBlock* curr_bb = iter.Next(); - while (curr_bb != NULL) { - AnalyzeBB(curr_bb); - curr_bb = iter.Next(); - } - - // Did we need a pointer to the method code? Not in 64 bit mode. - base_of_code_ = nullptr; - - // store_method_addr_ must be false for x86_64, since RIP addressing is used. - CHECK(!(cu_->target64 && store_method_addr_)); - if (store_method_addr_) { - base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false); - DCHECK(base_of_code_ != nullptr); - } -} - -void X86Mir2Lir::AnalyzeBB(BasicBlock* bb) { - if (bb->block_type == kDead) { - // Ignore dead blocks +void X86Mir2Lir::AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) { + if (cu_->target64) { + Mir2Lir::AnalyzeMIR(core_counts, mir, weight); return; } - for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { - int opcode = mir->dalvikInsn.opcode; - if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) { - AnalyzeExtendedMIR(opcode, bb, mir); - } else { - AnalyzeMIR(opcode, bb, mir); - } - } -} - - -void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir) { - switch (opcode) { - // Instructions referencing doubles. - case kMirOpFusedCmplDouble: - case kMirOpFusedCmpgDouble: - AnalyzeFPInstruction(opcode, bb, mir); - break; - case kMirOpConstVector: - if (!cu_->target64) { - store_method_addr_ = true; - } - break; - case kMirOpPackedMultiply: - case kMirOpPackedShiftLeft: - case kMirOpPackedSignedShiftRight: - case kMirOpPackedUnsignedShiftRight: - if (!cu_->target64) { - // Byte emulation requires constants from the literal pool. - OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16); - if (opsize == kSignedByte || opsize == kUnsignedByte) { - store_method_addr_ = true; - } - } - break; - default: - // Ignore the rest. - break; - } -} - -void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) { - // Looking for - // - Do we need a pointer to the code (used for packed switches and double lits)? - // 64 bit uses RIP addressing instead. - + int opcode = mir->dalvikInsn.opcode; + bool uses_pc_rel_load = false; switch (opcode) { // Instructions referencing doubles. case Instruction::CMPL_DOUBLE: @@ -1045,34 +977,62 @@ void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) { case Instruction::MUL_DOUBLE_2ADDR: case Instruction::DIV_DOUBLE_2ADDR: case Instruction::REM_DOUBLE_2ADDR: - AnalyzeFPInstruction(opcode, bb, mir); + case kMirOpFusedCmplDouble: + case kMirOpFusedCmpgDouble: + uses_pc_rel_load = AnalyzeFPInstruction(opcode, mir); break; - // Packed switches and array fills need a pointer to the base of the method. - case Instruction::FILL_ARRAY_DATA: + // Packed switch needs the PC-relative pointer if it's large. case Instruction::PACKED_SWITCH: - if (!cu_->target64) { - store_method_addr_ = true; + if (mir_graph_->GetTable(mir, mir->dalvikInsn.vB)[1] > kSmallSwitchThreshold) { + uses_pc_rel_load = true; } break; + + case kMirOpConstVector: + uses_pc_rel_load = true; + break; + case kMirOpPackedMultiply: + case kMirOpPackedShiftLeft: + case kMirOpPackedSignedShiftRight: + case kMirOpPackedUnsignedShiftRight: + { + // Byte emulation requires constants from the literal pool. + OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16); + if (opsize == kSignedByte || opsize == kUnsignedByte) { + uses_pc_rel_load = true; + } + } + break; + case Instruction::INVOKE_STATIC: case Instruction::INVOKE_STATIC_RANGE: - AnalyzeInvokeStatic(opcode, bb, mir); - break; + if (mir_graph_->GetMethodLoweringInfo(mir).IsIntrinsic()) { + uses_pc_rel_load = AnalyzeInvokeStaticIntrinsic(mir); + break; + } + FALLTHROUGH_INTENDED; default: - // Other instructions are not interesting yet. + Mir2Lir::AnalyzeMIR(core_counts, mir, weight); break; } + + if (uses_pc_rel_load) { + DCHECK(pc_rel_temp_ != nullptr); + core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight; + } } -void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir) { - UNUSED(bb); +bool X86Mir2Lir::AnalyzeFPInstruction(int opcode, MIR* mir) { + DCHECK(!cu_->target64); // Look at all the uses, and see if they are double constants. uint64_t attrs = MIRGraph::GetDataFlowAttributes(static_cast<Instruction::Code>(opcode)); int next_sreg = 0; if (attrs & DF_UA) { if (attrs & DF_A_WIDE) { - AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg)); + if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) { + return true; + } next_sreg += 2; } else { next_sreg++; @@ -1080,7 +1040,9 @@ void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir) { } if (attrs & DF_UB) { if (attrs & DF_B_WIDE) { - AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg)); + if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) { + return true; + } next_sreg += 2; } else { next_sreg++; @@ -1088,15 +1050,39 @@ void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir) { } if (attrs & DF_UC) { if (attrs & DF_C_WIDE) { - AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg)); + if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) { + return true; + } } } + return false; } -void X86Mir2Lir::AnalyzeDoubleUse(RegLocation use) { +inline bool X86Mir2Lir::AnalyzeDoubleUse(RegLocation use) { // If this is a double literal, we will want it in the literal pool on 32b platforms. - if (use.is_const && !cu_->target64) { - store_method_addr_ = true; + DCHECK(!cu_->target64); + return use.is_const; +} + +bool X86Mir2Lir::AnalyzeInvokeStaticIntrinsic(MIR* mir) { + // 64 bit RIP addressing doesn't need this analysis. + DCHECK(!cu_->target64); + + // Retrieve the type of the intrinsic. + MethodReference method_ref = mir_graph_->GetMethodLoweringInfo(mir).GetTargetMethod(); + DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr); + DexFileMethodInliner* method_inliner = + cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(method_ref.dex_file); + InlineMethod method; + bool is_intrinsic = method_inliner->IsIntrinsic(method_ref.dex_method_index, &method); + DCHECK(is_intrinsic); + + switch (method.opcode) { + case kIntrinsicAbsDouble: + case kIntrinsicMinMaxDouble: + return true; + default: + return false; } } @@ -1128,37 +1114,47 @@ RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc) { return loc; } -void X86Mir2Lir::AnalyzeInvokeStatic(int opcode, BasicBlock* bb, MIR* mir) { - UNUSED(opcode, bb); - - // 64 bit RIP addressing doesn't need store_method_addr_ set. +LIR* X86Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) { + UNUSED(r_tgt); // Call to absolute memory location doesn't need a temporary target register. if (cu_->target64) { - return; + return OpThreadMem(op, GetThreadOffset<8>(trampoline)); + } else { + return OpThreadMem(op, GetThreadOffset<4>(trampoline)); } +} - uint32_t index = mir->dalvikInsn.vB; - DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr); - DexFileMethodInliner* method_inliner = - cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file); - InlineMethod method; - if (method_inliner->IsIntrinsic(index, &method)) { - switch (method.opcode) { - case kIntrinsicAbsDouble: - case kIntrinsicMinMaxDouble: - store_method_addr_ = true; - break; - default: - break; +void X86Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) { + // Start with the default counts. + Mir2Lir::CountRefs(core_counts, fp_counts, num_regs); + + if (pc_rel_temp_ != nullptr) { + // Now, if the dex cache array base temp is used only once outside any loops (weight = 1), + // avoid the promotion, otherwise boost the weight by factor 2 because the full PC-relative + // load sequence is 3 instructions long and by promoting the PC base we save 2 instructions + // per use. + int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low); + if (core_counts[p_map_idx].count == 1) { + core_counts[p_map_idx].count = 0; + } else { + core_counts[p_map_idx].count *= 2; } } } -LIR* X86Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) { - UNUSED(r_tgt); // Call to absolute memory location doesn't need a temporary target register. - if (cu_->target64) { - return OpThreadMem(op, GetThreadOffset<8>(trampoline)); - } else { - return OpThreadMem(op, GetThreadOffset<4>(trampoline)); +void X86Mir2Lir::DoPromotion() { + if (!cu_->target64) { + pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false); + } + + Mir2Lir::DoPromotion(); + + if (pc_rel_temp_ != nullptr) { + // Now, if the dex cache array base temp is promoted, remember the register but + // always remove the temp's stack location to avoid unnecessarily bloating the stack. + pc_rel_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg; + DCHECK(!pc_rel_base_reg_.Valid() || !pc_rel_base_reg_.IsFloat()); + mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_); + pc_rel_temp_ = nullptr; } } diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 7dea09a579..57db0158e4 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -635,8 +635,6 @@ enum X86OpCode { kX86CallT, // call fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp kX86CallI, // call <relative> - 0: disp; Used for core.oat linking only kX86Ret, // ret; no lir operands - kX86StartOfMethod, // call 0; pop reg; sub reg, # - generate start of method into reg - // lir operands - 0: reg kX86PcRelLoadRA, // mov reg, [base + index * scale + PC relative displacement] // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table kX86PcRelAdr, // mov reg, PC relative displacement; lir operands - 0: reg, 1: table @@ -670,7 +668,6 @@ enum X86EncodingKind { kRegMemCond, // RM instruction kind followed by a condition. kJmp, kJcc, kCall, // Branch instruction kinds. kPcRel, // Operation with displacement that is PC relative - kMacro, // An instruction composing multiple others kUnimplemented // Encoding used when an instruction isn't yet implemented. }; diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index c2b837512c..f263f6d329 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -40,6 +40,7 @@ #include "dex/verified_method.h" #include "dex/quick/dex_file_method_inliner.h" #include "driver/compiler_options.h" +#include "elf_writer_quick.h" #include "jni_internal.h" #include "object_lock.h" #include "profiler.h" @@ -72,6 +73,9 @@ namespace art { static constexpr bool kTimeCompileMethod = !kIsDebugBuild; +// Whether to produce 64-bit ELF files for 64-bit targets. Leave this off for now. +static constexpr bool kProduce64BitELFFiles = false; + static double Percentage(size_t x, size_t y) { return 100.0 * (static_cast<double>(x)) / (static_cast<double>(x + y)); } @@ -2368,7 +2372,11 @@ bool CompilerDriver::WriteElf(const std::string& android_root, OatWriter* oat_writer, art::File* file) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - return compiler_->WriteElf(file, oat_writer, dex_files, android_root, is_host); + if (kProduce64BitELFFiles && Is64BitInstructionSet(GetInstructionSet())) { + return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, *this); + } else { + return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, *this); + } } bool CompilerDriver::SkipCompilation(const std::string& method_name) { diff --git a/compiler/dwarf/debug_frame_opcode_writer.h b/compiler/dwarf/debug_frame_opcode_writer.h index cc4ef8fde1..d0d182106f 100644 --- a/compiler/dwarf/debug_frame_opcode_writer.h +++ b/compiler/dwarf/debug_frame_opcode_writer.h @@ -20,6 +20,7 @@ #include "dwarf.h" #include "register.h" #include "writer.h" +#include "utils.h" namespace art { namespace dwarf { @@ -41,45 +42,51 @@ class DebugFrameOpCodeWriter : private Writer<Allocator> { static constexpr int kCodeAlignmentFactor = 1; // Explicitely advance the program counter to given location. - void AdvancePC(int absolute_pc) { + void ALWAYS_INLINE AdvancePC(int absolute_pc) { DCHECK_GE(absolute_pc, current_pc_); - int delta = FactorCodeOffset(absolute_pc - current_pc_); - if (delta != 0) { - if (delta <= 0x3F) { - this->PushUint8(DW_CFA_advance_loc | delta); - } else if (delta <= UINT8_MAX) { - this->PushUint8(DW_CFA_advance_loc1); - this->PushUint8(delta); - } else if (delta <= UINT16_MAX) { - this->PushUint8(DW_CFA_advance_loc2); - this->PushUint16(delta); - } else { - this->PushUint8(DW_CFA_advance_loc4); - this->PushUint32(delta); + if (UNLIKELY(enabled_)) { + int delta = FactorCodeOffset(absolute_pc - current_pc_); + if (delta != 0) { + if (delta <= 0x3F) { + this->PushUint8(DW_CFA_advance_loc | delta); + } else if (delta <= UINT8_MAX) { + this->PushUint8(DW_CFA_advance_loc1); + this->PushUint8(delta); + } else if (delta <= UINT16_MAX) { + this->PushUint8(DW_CFA_advance_loc2); + this->PushUint16(delta); + } else { + this->PushUint8(DW_CFA_advance_loc4); + this->PushUint32(delta); + } } + current_pc_ = absolute_pc; } - current_pc_ = absolute_pc; } // Override this method to automatically advance the PC before each opcode. virtual void ImplicitlyAdvancePC() { } // Common alias in assemblers - spill relative to current stack pointer. - void RelOffset(Reg reg, int offset) { + void ALWAYS_INLINE RelOffset(Reg reg, int offset) { Offset(reg, offset - current_cfa_offset_); } // Common alias in assemblers - increase stack frame size. - void AdjustCFAOffset(int delta) { + void ALWAYS_INLINE AdjustCFAOffset(int delta) { DefCFAOffset(current_cfa_offset_ + delta); } // Custom alias - spill many registers based on bitmask. - void RelOffsetForMany(Reg reg_base, int offset, uint32_t reg_mask, - int reg_size) { + void ALWAYS_INLINE RelOffsetForMany(Reg reg_base, int offset, + uint32_t reg_mask, int reg_size) { DCHECK(reg_size == 4 || reg_size == 8); - for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) { - if ((reg_mask & 1) != 0u) { + if (UNLIKELY(enabled_)) { + for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) { + // Skip zero bits and go to the set bit. + int num_zeros = CTZ(reg_mask); + i += num_zeros; + reg_mask >>= num_zeros; RelOffset(Reg(reg_base.num() + i), offset); offset += reg_size; } @@ -87,171 +94,214 @@ class DebugFrameOpCodeWriter : private Writer<Allocator> { } // Custom alias - unspill many registers based on bitmask. - void RestoreMany(Reg reg_base, uint32_t reg_mask) { - for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) { - if ((reg_mask & 1) != 0u) { + void ALWAYS_INLINE RestoreMany(Reg reg_base, uint32_t reg_mask) { + if (UNLIKELY(enabled_)) { + for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) { + // Skip zero bits and go to the set bit. + int num_zeros = CTZ(reg_mask); + i += num_zeros; + reg_mask >>= num_zeros; Restore(Reg(reg_base.num() + i)); } } } - void Nop() { - this->PushUint8(DW_CFA_nop); + void ALWAYS_INLINE Nop() { + if (UNLIKELY(enabled_)) { + this->PushUint8(DW_CFA_nop); + } } - void Offset(Reg reg, int offset) { - ImplicitlyAdvancePC(); - int factored_offset = FactorDataOffset(offset); // May change sign. - if (factored_offset >= 0) { - if (0 <= reg.num() && reg.num() <= 0x3F) { - this->PushUint8(DW_CFA_offset | reg.num()); - this->PushUleb128(factored_offset); + void ALWAYS_INLINE Offset(Reg reg, int offset) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + int factored_offset = FactorDataOffset(offset); // May change sign. + if (factored_offset >= 0) { + if (0 <= reg.num() && reg.num() <= 0x3F) { + this->PushUint8(DW_CFA_offset | reg.num()); + this->PushUleb128(factored_offset); + } else { + this->PushUint8(DW_CFA_offset_extended); + this->PushUleb128(reg.num()); + this->PushUleb128(factored_offset); + } } else { - this->PushUint8(DW_CFA_offset_extended); + uses_dwarf3_features_ = true; + this->PushUint8(DW_CFA_offset_extended_sf); this->PushUleb128(reg.num()); - this->PushUleb128(factored_offset); + this->PushSleb128(factored_offset); } - } else { - uses_dwarf3_features_ = true; - this->PushUint8(DW_CFA_offset_extended_sf); - this->PushUleb128(reg.num()); - this->PushSleb128(factored_offset); } } - void Restore(Reg reg) { - ImplicitlyAdvancePC(); - if (0 <= reg.num() && reg.num() <= 0x3F) { - this->PushUint8(DW_CFA_restore | reg.num()); - } else { - this->PushUint8(DW_CFA_restore_extended); - this->PushUleb128(reg.num()); + void ALWAYS_INLINE Restore(Reg reg) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + if (0 <= reg.num() && reg.num() <= 0x3F) { + this->PushUint8(DW_CFA_restore | reg.num()); + } else { + this->PushUint8(DW_CFA_restore_extended); + this->PushUleb128(reg.num()); + } } } - void Undefined(Reg reg) { - ImplicitlyAdvancePC(); - this->PushUint8(DW_CFA_undefined); - this->PushUleb128(reg.num()); + void ALWAYS_INLINE Undefined(Reg reg) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + this->PushUint8(DW_CFA_undefined); + this->PushUleb128(reg.num()); + } } - void SameValue(Reg reg) { - ImplicitlyAdvancePC(); - this->PushUint8(DW_CFA_same_value); - this->PushUleb128(reg.num()); + void ALWAYS_INLINE SameValue(Reg reg) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + this->PushUint8(DW_CFA_same_value); + this->PushUleb128(reg.num()); + } } // The previous value of "reg" is stored in register "new_reg". - void Register(Reg reg, Reg new_reg) { - ImplicitlyAdvancePC(); - this->PushUint8(DW_CFA_register); - this->PushUleb128(reg.num()); - this->PushUleb128(new_reg.num()); - } - - void RememberState() { - // Note that we do not need to advance the PC. - this->PushUint8(DW_CFA_remember_state); - } - - void RestoreState() { - ImplicitlyAdvancePC(); - this->PushUint8(DW_CFA_restore_state); + void ALWAYS_INLINE Register(Reg reg, Reg new_reg) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + this->PushUint8(DW_CFA_register); + this->PushUleb128(reg.num()); + this->PushUleb128(new_reg.num()); + } } - void DefCFA(Reg reg, int offset) { - ImplicitlyAdvancePC(); - if (offset >= 0) { - this->PushUint8(DW_CFA_def_cfa); - this->PushUleb128(reg.num()); - this->PushUleb128(offset); // Non-factored. - } else { - uses_dwarf3_features_ = true; - this->PushUint8(DW_CFA_def_cfa_sf); - this->PushUleb128(reg.num()); - this->PushSleb128(FactorDataOffset(offset)); + void ALWAYS_INLINE RememberState() { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + this->PushUint8(DW_CFA_remember_state); } - current_cfa_offset_ = offset; } - void DefCFARegister(Reg reg) { - ImplicitlyAdvancePC(); - this->PushUint8(DW_CFA_def_cfa_register); - this->PushUleb128(reg.num()); + void ALWAYS_INLINE RestoreState() { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + this->PushUint8(DW_CFA_restore_state); + } } - void DefCFAOffset(int offset) { - if (current_cfa_offset_ != offset) { + void ALWAYS_INLINE DefCFA(Reg reg, int offset) { + if (UNLIKELY(enabled_)) { ImplicitlyAdvancePC(); if (offset >= 0) { - this->PushUint8(DW_CFA_def_cfa_offset); + this->PushUint8(DW_CFA_def_cfa); + this->PushUleb128(reg.num()); this->PushUleb128(offset); // Non-factored. } else { uses_dwarf3_features_ = true; - this->PushUint8(DW_CFA_def_cfa_offset_sf); + this->PushUint8(DW_CFA_def_cfa_sf); + this->PushUleb128(reg.num()); this->PushSleb128(FactorDataOffset(offset)); } - current_cfa_offset_ = offset; } + current_cfa_offset_ = offset; } - void ValOffset(Reg reg, int offset) { - ImplicitlyAdvancePC(); - uses_dwarf3_features_ = true; - int factored_offset = FactorDataOffset(offset); // May change sign. - if (factored_offset >= 0) { - this->PushUint8(DW_CFA_val_offset); - this->PushUleb128(reg.num()); - this->PushUleb128(factored_offset); - } else { - this->PushUint8(DW_CFA_val_offset_sf); + void ALWAYS_INLINE DefCFARegister(Reg reg) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + this->PushUint8(DW_CFA_def_cfa_register); this->PushUleb128(reg.num()); - this->PushSleb128(factored_offset); } } - void DefCFAExpression(void* expr, int expr_size) { - ImplicitlyAdvancePC(); - uses_dwarf3_features_ = true; - this->PushUint8(DW_CFA_def_cfa_expression); - this->PushUleb128(expr_size); - this->PushData(expr, expr_size); + void ALWAYS_INLINE DefCFAOffset(int offset) { + if (UNLIKELY(enabled_)) { + if (current_cfa_offset_ != offset) { + ImplicitlyAdvancePC(); + if (offset >= 0) { + this->PushUint8(DW_CFA_def_cfa_offset); + this->PushUleb128(offset); // Non-factored. + } else { + uses_dwarf3_features_ = true; + this->PushUint8(DW_CFA_def_cfa_offset_sf); + this->PushSleb128(FactorDataOffset(offset)); + } + } + } + // Uncoditional so that the user can still get and check the value. + current_cfa_offset_ = offset; } - void Expression(Reg reg, void* expr, int expr_size) { - ImplicitlyAdvancePC(); - uses_dwarf3_features_ = true; - this->PushUint8(DW_CFA_expression); - this->PushUleb128(reg.num()); - this->PushUleb128(expr_size); - this->PushData(expr, expr_size); + void ALWAYS_INLINE ValOffset(Reg reg, int offset) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + uses_dwarf3_features_ = true; + int factored_offset = FactorDataOffset(offset); // May change sign. + if (factored_offset >= 0) { + this->PushUint8(DW_CFA_val_offset); + this->PushUleb128(reg.num()); + this->PushUleb128(factored_offset); + } else { + this->PushUint8(DW_CFA_val_offset_sf); + this->PushUleb128(reg.num()); + this->PushSleb128(factored_offset); + } + } } - void ValExpression(Reg reg, void* expr, int expr_size) { - ImplicitlyAdvancePC(); - uses_dwarf3_features_ = true; - this->PushUint8(DW_CFA_val_expression); - this->PushUleb128(reg.num()); - this->PushUleb128(expr_size); - this->PushData(expr, expr_size); + void ALWAYS_INLINE DefCFAExpression(void * expr, int expr_size) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + uses_dwarf3_features_ = true; + this->PushUint8(DW_CFA_def_cfa_expression); + this->PushUleb128(expr_size); + this->PushData(expr, expr_size); + } } - int GetCurrentCFAOffset() const { - return current_cfa_offset_; + void ALWAYS_INLINE Expression(Reg reg, void * expr, int expr_size) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + uses_dwarf3_features_ = true; + this->PushUint8(DW_CFA_expression); + this->PushUleb128(reg.num()); + this->PushUleb128(expr_size); + this->PushData(expr, expr_size); + } } - void SetCurrentCFAOffset(int offset) { - current_cfa_offset_ = offset; + void ALWAYS_INLINE ValExpression(Reg reg, void * expr, int expr_size) { + if (UNLIKELY(enabled_)) { + ImplicitlyAdvancePC(); + uses_dwarf3_features_ = true; + this->PushUint8(DW_CFA_val_expression); + this->PushUleb128(reg.num()); + this->PushUleb128(expr_size); + this->PushData(expr, expr_size); + } } + bool IsEnabled() const { return enabled_; } + + void SetEnabled(bool value) { enabled_ = value; } + + int GetCurrentPC() const { return current_pc_; } + + int GetCurrentCFAOffset() const { return current_cfa_offset_; } + + void SetCurrentCFAOffset(int offset) { current_cfa_offset_ = offset; } + using Writer<Allocator>::data; - DebugFrameOpCodeWriter(const Allocator& alloc = Allocator()) + DebugFrameOpCodeWriter(bool enabled = true, + const Allocator& alloc = Allocator()) : Writer<Allocator>(&opcodes_), + enabled_(enabled), opcodes_(alloc), current_cfa_offset_(0), current_pc_(0), uses_dwarf3_features_(false) { + if (enabled) { + // Best guess based on couple of observed outputs. + opcodes_.reserve(16); + } } virtual ~DebugFrameOpCodeWriter() { } @@ -267,6 +317,7 @@ class DebugFrameOpCodeWriter : private Writer<Allocator> { return offset / kCodeAlignmentFactor; } + bool enabled_; // If disabled all writes are no-ops. std::vector<uint8_t, Allocator> opcodes_; int current_cfa_offset_; int current_pc_; diff --git a/compiler/dwarf/debug_frame_writer.h b/compiler/dwarf/debug_frame_writer.h deleted file mode 100644 index 6de45f5526..0000000000 --- a/compiler/dwarf/debug_frame_writer.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DWARF_DEBUG_FRAME_WRITER_H_ -#define ART_COMPILER_DWARF_DEBUG_FRAME_WRITER_H_ - -#include "debug_frame_opcode_writer.h" -#include "dwarf.h" -#include "writer.h" - -namespace art { -namespace dwarf { - -// Writer for the .eh_frame section (which extends .debug_frame specification). -template<typename Allocator = std::allocator<uint8_t>> -class DebugFrameWriter FINAL : private Writer<Allocator> { - public: - void WriteCIE(Reg return_address_register, - const uint8_t* initial_opcodes, - int initial_opcodes_size) { - DCHECK(cie_header_start_ == ~0u); - cie_header_start_ = this->data()->size(); - this->PushUint32(0); // Length placeholder. - this->PushUint32(0); // CIE id. - this->PushUint8(1); // Version. - this->PushString("zR"); - this->PushUleb128(DebugFrameOpCodeWriter<Allocator>::kCodeAlignmentFactor); - this->PushSleb128(DebugFrameOpCodeWriter<Allocator>::kDataAlignmentFactor); - this->PushUleb128(return_address_register.num()); // ubyte in DWARF2. - this->PushUleb128(1); // z: Augmentation data size. - if (use_64bit_address_) { - this->PushUint8(0x04); // R: ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata8). - } else { - this->PushUint8(0x03); // R: ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4). - } - this->PushData(initial_opcodes, initial_opcodes_size); - this->Pad(use_64bit_address_ ? 8 : 4); - this->UpdateUint32(cie_header_start_, this->data()->size() - cie_header_start_ - 4); - } - - void WriteCIE(Reg return_address_register, - const DebugFrameOpCodeWriter<Allocator>& opcodes) { - WriteCIE(return_address_register, opcodes.data()->data(), opcodes.data()->size()); - } - - void WriteFDE(uint64_t initial_address, - uint64_t address_range, - const uint8_t* unwind_opcodes, - int unwind_opcodes_size) { - DCHECK(cie_header_start_ != ~0u); - size_t fde_header_start = this->data()->size(); - this->PushUint32(0); // Length placeholder. - this->PushUint32(this->data()->size() - cie_header_start_); // 'CIE_pointer' - if (use_64bit_address_) { - this->PushUint64(initial_address); - this->PushUint64(address_range); - } else { - this->PushUint32(initial_address); - this->PushUint32(address_range); - } - this->PushUleb128(0); // Augmentation data size. - this->PushData(unwind_opcodes, unwind_opcodes_size); - this->Pad(use_64bit_address_ ? 8 : 4); - this->UpdateUint32(fde_header_start, this->data()->size() - fde_header_start - 4); - } - - DebugFrameWriter(std::vector<uint8_t, Allocator>* buffer, bool use_64bit_address) - : Writer<Allocator>(buffer), - use_64bit_address_(use_64bit_address), - cie_header_start_(~0u) { - } - - private: - bool use_64bit_address_; - size_t cie_header_start_; - - DISALLOW_COPY_AND_ASSIGN(DebugFrameWriter); -}; - -} // namespace dwarf -} // namespace art - -#endif // ART_COMPILER_DWARF_DEBUG_FRAME_WRITER_H_ diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h new file mode 100644 index 0000000000..c0350b6f8a --- /dev/null +++ b/compiler/dwarf/debug_info_entry_writer.h @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_ +#define ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_ + +#include <unordered_map> + +#include "dwarf.h" +#include "leb128.h" +#include "writer.h" + +namespace art { +namespace dwarf { + +// 32-bit FNV-1a hash function which we use to find duplicate abbreviations. +// See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function +template< typename Allocator > +struct FNVHash { + size_t operator()(const std::vector<uint8_t, Allocator>& v) const { + uint32_t hash = 2166136261u; + for (size_t i = 0; i < v.size(); i++) { + hash = (hash ^ v[i]) * 16777619u; + } + return hash; + } +}; + +/* + * Writer for debug information entries (DIE). + * It also handles generation of abbreviations. + * + * Usage: + * StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes); + * WriteStrp(DW_AT_producer, "Compiler name", debug_str); + * StartTag(DW_TAG_subprogram, DW_CHILDREN_no); + * WriteStrp(DW_AT_name, "Foo", debug_str); + * EndTag(); + * EndTag(); + */ +template< typename Allocator = std::allocator<uint8_t> > +class DebugInfoEntryWriter FINAL : private Writer<Allocator> { + public: + // Start debugging information entry. + void StartTag(Tag tag, Children children) { + DCHECK(has_children) << "This tag can not have nested tags"; + if (inside_entry_) { + // Write abbrev code for the previous entry. + this->UpdateUleb128(abbrev_code_offset_, EndAbbrev()); + inside_entry_ = false; + } + StartAbbrev(tag, children); + // Abbrev code placeholder of sufficient size. + abbrev_code_offset_ = this->data()->size(); + this->PushUleb128(NextAbbrevCode()); + depth_++; + inside_entry_ = true; + has_children = (children == DW_CHILDREN_yes); + } + + // End debugging information entry. + void EndTag() { + DCHECK_GT(depth_, 0); + if (inside_entry_) { + // Write abbrev code for this tag. + this->UpdateUleb128(abbrev_code_offset_, EndAbbrev()); + inside_entry_ = false; + } + if (has_children) { + this->PushUint8(0); // End of children. + } + depth_--; + has_children = true; // Parent tag obviously has children. + } + + void WriteAddr(Attribute attrib, uint64_t value) { + AddAbbrevAttribute(attrib, DW_FORM_addr); + if (is64bit_) { + this->PushUint64(value); + } else { + this->PushUint32(value); + } + } + + void WriteBlock(Attribute attrib, const void* ptr, int size) { + AddAbbrevAttribute(attrib, DW_FORM_block); + this->PushUleb128(size); + this->PushData(ptr, size); + } + + void WriteData1(Attribute attrib, uint8_t value) { + AddAbbrevAttribute(attrib, DW_FORM_data1); + this->PushUint8(value); + } + + void WriteData2(Attribute attrib, uint16_t value) { + AddAbbrevAttribute(attrib, DW_FORM_data2); + this->PushUint16(value); + } + + void WriteData4(Attribute attrib, uint32_t value) { + AddAbbrevAttribute(attrib, DW_FORM_data4); + this->PushUint32(value); + } + + void WriteData8(Attribute attrib, uint64_t value) { + AddAbbrevAttribute(attrib, DW_FORM_data8); + this->PushUint64(value); + } + + void WriteSdata(Attribute attrib, int value) { + AddAbbrevAttribute(attrib, DW_FORM_sdata); + this->PushSleb128(value); + } + + void WriteUdata(Attribute attrib, int value) { + AddAbbrevAttribute(attrib, DW_FORM_udata); + this->PushUleb128(value); + } + + void WriteUdata(Attribute attrib, uint32_t value) { + AddAbbrevAttribute(attrib, DW_FORM_udata); + this->PushUleb128(value); + } + + void WriteFlag(Attribute attrib, bool value) { + AddAbbrevAttribute(attrib, DW_FORM_flag); + this->PushUint8(value ? 1 : 0); + } + + void WriteRef4(Attribute attrib, int cu_offset) { + AddAbbrevAttribute(attrib, DW_FORM_ref4); + this->PushUint32(cu_offset); + } + + void WriteRef(Attribute attrib, int cu_offset) { + AddAbbrevAttribute(attrib, DW_FORM_ref_udata); + this->PushUleb128(cu_offset); + } + + void WriteString(Attribute attrib, const char* value) { + AddAbbrevAttribute(attrib, DW_FORM_string); + this->PushString(value); + } + + void WriteStrp(Attribute attrib, int address) { + AddAbbrevAttribute(attrib, DW_FORM_strp); + this->PushUint32(address); + } + + void WriteStrp(Attribute attrib, const char* value, std::vector<uint8_t>* debug_str) { + AddAbbrevAttribute(attrib, DW_FORM_strp); + int address = debug_str->size(); + debug_str->insert(debug_str->end(), value, value + strlen(value) + 1); + this->PushUint32(address); + } + + bool is64bit() const { return is64bit_; } + + using Writer<Allocator>::data; + + DebugInfoEntryWriter(bool is64bitArch, + std::vector<uint8_t, Allocator>* debug_abbrev, + const Allocator& alloc = Allocator()) + : Writer<Allocator>(&entries_), + debug_abbrev_(debug_abbrev), + current_abbrev_(alloc), + abbrev_codes_(alloc), + entries_(alloc), + is64bit_(is64bitArch) { + debug_abbrev_.PushUint8(0); // Add abbrev table terminator. + } + + ~DebugInfoEntryWriter() { + DCHECK_EQ(depth_, 0); + } + + private: + // Start abbreviation declaration. + void StartAbbrev(Tag tag, Children children) { + DCHECK(!inside_entry_); + current_abbrev_.clear(); + EncodeUnsignedLeb128(¤t_abbrev_, tag); + current_abbrev_.push_back(children); + } + + // Add attribute specification. + void AddAbbrevAttribute(Attribute name, Form type) { + DCHECK(inside_entry_) << "Call StartTag before adding attributes."; + EncodeUnsignedLeb128(¤t_abbrev_, name); + EncodeUnsignedLeb128(¤t_abbrev_, type); + } + + int NextAbbrevCode() { + return 1 + abbrev_codes_.size(); + } + + // End abbreviation declaration and return its code. + int EndAbbrev() { + DCHECK(inside_entry_); + auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_), + NextAbbrevCode())); + int abbrev_code = it.first->second; + if (UNLIKELY(it.second)) { // Inserted new entry. + const std::vector<uint8_t, Allocator>& abbrev = it.first->first; + debug_abbrev_.Pop(); // Remove abbrev table terminator. + debug_abbrev_.PushUleb128(abbrev_code); + debug_abbrev_.PushData(abbrev.data(), abbrev.size()); + debug_abbrev_.PushUint8(0); // Attribute list end. + debug_abbrev_.PushUint8(0); // Attribute list end. + debug_abbrev_.PushUint8(0); // Add abbrev table terminator. + } + return abbrev_code; + } + + private: + // Fields for writing and deduplication of abbrevs. + Writer<Allocator> debug_abbrev_; + std::vector<uint8_t, Allocator> current_abbrev_; + std::unordered_map<std::vector<uint8_t, Allocator>, int, + FNVHash<Allocator> > abbrev_codes_; + + // Fields for writing of debugging information entries. + std::vector<uint8_t, Allocator> entries_; + bool is64bit_; + int depth_ = 0; + size_t abbrev_code_offset_ = 0; // Location to patch once we know the code. + bool inside_entry_ = false; // Entry ends at first child (if any). + bool has_children = true; +}; + +} // namespace dwarf +} // namespace art + +#endif // ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_ diff --git a/compiler/dwarf/debug_line_writer.h b/compiler/dwarf/debug_line_writer.h deleted file mode 100644 index 4b7d8d9d92..0000000000 --- a/compiler/dwarf/debug_line_writer.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DWARF_DEBUG_LINE_WRITER_H_ -#define ART_COMPILER_DWARF_DEBUG_LINE_WRITER_H_ - -#include "debug_line_opcode_writer.h" -#include "dwarf.h" -#include "writer.h" -#include <string> - -namespace art { -namespace dwarf { - -// Writer for the .debug_line section (DWARF-3). -template<typename Allocator = std::allocator<uint8_t>> -class DebugLineWriter FINAL : private Writer<Allocator> { - public: - struct FileEntry { - std::string file_name; - int directory_index; - int modification_time; - int file_size; - }; - - void WriteTable(const std::vector<std::string>& include_directories, - const std::vector<FileEntry>& files, - const DebugLineOpCodeWriter<Allocator>& opcodes) { - size_t header_start = this->data()->size(); - this->PushUint32(0); // Section-length placeholder. - // Claim DWARF-2 version even though we use some DWARF-3 features. - // DWARF-2 consumers will ignore the unknown opcodes. - // This is what clang currently does. - this->PushUint16(2); // .debug_line version. - size_t header_length_pos = this->data()->size(); - this->PushUint32(0); // Header-length placeholder. - this->PushUint8(1 << opcodes.GetCodeFactorBits()); - this->PushUint8(DebugLineOpCodeWriter<Allocator>::kDefaultIsStmt ? 1 : 0); - this->PushInt8(DebugLineOpCodeWriter<Allocator>::kLineBase); - this->PushUint8(DebugLineOpCodeWriter<Allocator>::kLineRange); - this->PushUint8(DebugLineOpCodeWriter<Allocator>::kOpcodeBase); - static const int opcode_lengths[DebugLineOpCodeWriter<Allocator>::kOpcodeBase] = { - 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 }; - for (int i = 1; i < DebugLineOpCodeWriter<Allocator>::kOpcodeBase; i++) { - this->PushUint8(opcode_lengths[i]); - } - for (const std::string& directory : include_directories) { - this->PushData(directory.data(), directory.size() + 1); - } - this->PushUint8(0); // Terminate include_directories list. - for (const FileEntry& file : files) { - this->PushData(file.file_name.data(), file.file_name.size() + 1); - this->PushUleb128(file.directory_index); - this->PushUleb128(file.modification_time); - this->PushUleb128(file.file_size); - } - this->PushUint8(0); // Terminate file list. - this->UpdateUint32(header_length_pos, this->data()->size() - header_length_pos - 4); - this->PushData(opcodes.data()->data(), opcodes.data()->size()); - this->UpdateUint32(header_start, this->data()->size() - header_start - 4); - } - - explicit DebugLineWriter(std::vector<uint8_t, Allocator>* buffer) - : Writer<Allocator>(buffer) { - } - - private: - DISALLOW_COPY_AND_ASSIGN(DebugLineWriter); -}; - -} // namespace dwarf -} // namespace art - -#endif // ART_COMPILER_DWARF_DEBUG_LINE_WRITER_H_ diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc index f3553bcc99..ec18e96b4b 100644 --- a/compiler/dwarf/dwarf_test.cc +++ b/compiler/dwarf/dwarf_test.cc @@ -17,9 +17,9 @@ #include "dwarf_test.h" #include "dwarf/debug_frame_opcode_writer.h" -#include "dwarf/debug_frame_writer.h" +#include "dwarf/debug_info_entry_writer.h" #include "dwarf/debug_line_opcode_writer.h" -#include "dwarf/debug_line_writer.h" +#include "dwarf/headers.h" #include "gtest/gtest.h" namespace art { @@ -118,22 +118,20 @@ TEST_F(DwarfTest, DebugFrame) { DW_CHECK_NEXT("DW_CFA_restore: r2 (edx)"); DW_CHECK_NEXT("DW_CFA_restore: r5 (ebp)"); - DebugFrameWriter<> eh_frame(&eh_frame_data_, is64bit); DebugFrameOpCodeWriter<> initial_opcodes; - eh_frame.WriteCIE(Reg(is64bit ? 16 : 8), // Return address register. - initial_opcodes); // Initial opcodes. - eh_frame.WriteFDE(0x01000000, 0x01000000, - opcodes.data()->data(), opcodes.data()->size()); + WriteEhFrameCIE(is64bit, Reg(is64bit ? 16 : 8), initial_opcodes, &eh_frame_data_); + WriteEhFrameFDE(is64bit, 0, 0x01000000, 0x01000000, opcodes.data(), &eh_frame_data_); CheckObjdumpOutput(is64bit, "-W"); } -TEST_F(DwarfTest, DebugFrame64) { - const bool is64bit = true; - DebugFrameWriter<> eh_frame(&eh_frame_data_, is64bit); - DebugFrameOpCodeWriter<> no_opcodes; - eh_frame.WriteCIE(Reg(16), no_opcodes); - eh_frame.WriteFDE(0x0100000000000000, 0x0200000000000000, - no_opcodes.data()->data(), no_opcodes.data()->size()); +// TODO: objdump seems to have trouble with 64bit CIE length. +TEST_F(DwarfTest, DISABLED_DebugFrame64) { + constexpr bool is64bit = true; + DebugFrameOpCodeWriter<> initial_opcodes; + WriteEhFrameCIE(is64bit, Reg(16), initial_opcodes, &eh_frame_data_); + DebugFrameOpCodeWriter<> opcodes; + WriteEhFrameFDE(is64bit, 0, 0x0100000000000000, 0x0200000000000000, + opcodes.data(), &eh_frame_data_); DW_CHECK("FDE cie=00000000 pc=100000000000000..300000000000000"); CheckObjdumpOutput(is64bit, "-W"); } @@ -147,7 +145,7 @@ TEST_F(DwarfTest, DebugLine) { include_directories.push_back("/path/to/source"); DW_CHECK("/path/to/source"); - std::vector<DebugLineWriter<>::FileEntry> files { + std::vector<FileEntry> files { { "file0.c", 0, 1000, 2000 }, { "file1.c", 1, 1000, 2000 }, { "file2.c", 1, 1000, 2000 }, @@ -186,8 +184,7 @@ TEST_F(DwarfTest, DebugLine) { DW_CHECK_NEXT("Entry\tDir\tTime\tSize\tName"); DW_CHECK_NEXT("1\t0\t1000\t2000\tfile.c"); - DebugLineWriter<> debug_line(&debug_line_data_); - debug_line.WriteTable(include_directories, files, opcodes); + WriteDebugLineTable(include_directories, files, opcodes, &debug_line_data_); CheckObjdumpOutput(is64bit, "-W"); } @@ -221,14 +218,63 @@ TEST_F(DwarfTest, DebugLineSpecialOpcodes) { EXPECT_LT(opcodes.data()->size(), num_rows * 3); std::vector<std::string> directories; - std::vector<DebugLineWriter<>::FileEntry> files { - { "file.c", 0, 1000, 2000 }, - }; - DebugLineWriter<> debug_line(&debug_line_data_); - debug_line.WriteTable(directories, files, opcodes); + std::vector<FileEntry> files { { "file.c", 0, 1000, 2000 } }; // NOLINT + WriteDebugLineTable(directories, files, opcodes, &debug_line_data_); CheckObjdumpOutput(is64bit, "-W -WL"); } +TEST_F(DwarfTest, DebugInfo) { + constexpr bool is64bit = false; + DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_data_); + DW_CHECK("Contents of the .debug_info section:"); + info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes); + DW_CHECK("Abbrev Number: 1 (DW_TAG_compile_unit)"); + info.WriteStrp(dwarf::DW_AT_producer, "Compiler name", &debug_str_data_); + DW_CHECK_NEXT("DW_AT_producer : (indirect string, offset: 0x0): Compiler name"); + info.WriteAddr(dwarf::DW_AT_low_pc, 0x01000000); + DW_CHECK_NEXT("DW_AT_low_pc : 0x1000000"); + info.WriteAddr(dwarf::DW_AT_high_pc, 0x02000000); + DW_CHECK_NEXT("DW_AT_high_pc : 0x2000000"); + info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no); + DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)"); + info.WriteStrp(dwarf::DW_AT_name, "Foo", &debug_str_data_); + DW_CHECK_NEXT("DW_AT_name : (indirect string, offset: 0xe): Foo"); + info.WriteAddr(dwarf::DW_AT_low_pc, 0x01010000); + DW_CHECK_NEXT("DW_AT_low_pc : 0x1010000"); + info.WriteAddr(dwarf::DW_AT_high_pc, 0x01020000); + DW_CHECK_NEXT("DW_AT_high_pc : 0x1020000"); + info.EndTag(); // DW_TAG_subprogram + info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no); + DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)"); + info.WriteStrp(dwarf::DW_AT_name, "Bar", &debug_str_data_); + DW_CHECK_NEXT("DW_AT_name : (indirect string, offset: 0x12): Bar"); + info.WriteAddr(dwarf::DW_AT_low_pc, 0x01020000); + DW_CHECK_NEXT("DW_AT_low_pc : 0x1020000"); + info.WriteAddr(dwarf::DW_AT_high_pc, 0x01030000); + DW_CHECK_NEXT("DW_AT_high_pc : 0x1030000"); + info.EndTag(); // DW_TAG_subprogram + info.EndTag(); // DW_TAG_compile_unit + // Test that previous list was properly terminated and empty children. + info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes); + info.EndTag(); // DW_TAG_compile_unit + + // The abbrev table is just side product, but check it as well. + DW_CHECK("Abbrev Number: 3 (DW_TAG_compile_unit)"); + DW_CHECK("Contents of the .debug_abbrev section:"); + DW_CHECK("1 DW_TAG_compile_unit [has children]"); + DW_CHECK_NEXT("DW_AT_producer DW_FORM_strp"); + DW_CHECK_NEXT("DW_AT_low_pc DW_FORM_addr"); + DW_CHECK_NEXT("DW_AT_high_pc DW_FORM_addr"); + DW_CHECK("2 DW_TAG_subprogram [no children]"); + DW_CHECK_NEXT("DW_AT_name DW_FORM_strp"); + DW_CHECK_NEXT("DW_AT_low_pc DW_FORM_addr"); + DW_CHECK_NEXT("DW_AT_high_pc DW_FORM_addr"); + DW_CHECK("3 DW_TAG_compile_unit [has children]"); + + dwarf::WriteDebugInfoCU(0 /* debug_abbrev_offset */, info, &debug_info_data_); + CheckObjdumpOutput(is64bit, "-W"); +} + #endif // HAVE_ANDROID_OS } // namespace dwarf diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h new file mode 100644 index 0000000000..d866b91ae7 --- /dev/null +++ b/compiler/dwarf/headers.h @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DWARF_HEADERS_H_ +#define ART_COMPILER_DWARF_HEADERS_H_ + +#include "debug_frame_opcode_writer.h" +#include "debug_info_entry_writer.h" +#include "debug_line_opcode_writer.h" +#include "register.h" +#include "writer.h" + +namespace art { +namespace dwarf { + +// Write common information entry (CIE) to .eh_frame section. +template<typename Allocator> +void WriteEhFrameCIE(bool is64bit, Reg return_address_register, + const DebugFrameOpCodeWriter<Allocator>& opcodes, + std::vector<uint8_t>* eh_frame) { + Writer<> writer(eh_frame); + size_t cie_header_start_ = writer.data()->size(); + if (is64bit) { + // TODO: This is not related to being 64bit. + writer.PushUint32(0xffffffff); + writer.PushUint64(0); // Length placeholder. + writer.PushUint64(0); // CIE id. + } else { + writer.PushUint32(0); // Length placeholder. + writer.PushUint32(0); // CIE id. + } + writer.PushUint8(1); // Version. + writer.PushString("zR"); + writer.PushUleb128(DebugFrameOpCodeWriter<Allocator>::kCodeAlignmentFactor); + writer.PushSleb128(DebugFrameOpCodeWriter<Allocator>::kDataAlignmentFactor); + writer.PushUleb128(return_address_register.num()); // ubyte in DWARF2. + writer.PushUleb128(1); // z: Augmentation data size. + if (is64bit) { + writer.PushUint8(0x04); // R: ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata8). + } else { + writer.PushUint8(0x03); // R: ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4). + } + writer.PushData(opcodes.data()); + writer.Pad(is64bit ? 8 : 4); + if (is64bit) { + writer.UpdateUint64(cie_header_start_ + 4, writer.data()->size() - cie_header_start_ - 12); + } else { + writer.UpdateUint32(cie_header_start_, writer.data()->size() - cie_header_start_ - 4); + } +} + +// Write frame description entry (FDE) to .eh_frame section. +template<typename Allocator> +void WriteEhFrameFDE(bool is64bit, size_t cie_offset, + uint64_t initial_address, uint64_t address_range, + const std::vector<uint8_t, Allocator>* opcodes, + std::vector<uint8_t>* eh_frame) { + Writer<> writer(eh_frame); + size_t fde_header_start = writer.data()->size(); + if (is64bit) { + // TODO: This is not related to being 64bit. + writer.PushUint32(0xffffffff); + writer.PushUint64(0); // Length placeholder. + uint64_t cie_pointer = writer.data()->size() - cie_offset; + writer.PushUint64(cie_pointer); + } else { + writer.PushUint32(0); // Length placeholder. + uint32_t cie_pointer = writer.data()->size() - cie_offset; + writer.PushUint32(cie_pointer); + } + if (is64bit) { + writer.PushUint64(initial_address); + writer.PushUint64(address_range); + } else { + writer.PushUint32(initial_address); + writer.PushUint32(address_range); + } + writer.PushUleb128(0); // Augmentation data size. + writer.PushData(opcodes); + writer.Pad(is64bit ? 8 : 4); + if (is64bit) { + writer.UpdateUint64(fde_header_start + 4, writer.data()->size() - fde_header_start - 12); + } else { + writer.UpdateUint32(fde_header_start, writer.data()->size() - fde_header_start - 4); + } +} + +// Write compilation unit (CU) to .debug_info section. +template<typename Allocator> +void WriteDebugInfoCU(uint32_t debug_abbrev_offset, + const DebugInfoEntryWriter<Allocator>& entries, + std::vector<uint8_t>* debug_info) { + Writer<> writer(debug_info); + size_t start = writer.data()->size(); + writer.PushUint32(0); // Length placeholder. + writer.PushUint16(3); // Version. + writer.PushUint32(debug_abbrev_offset); + writer.PushUint8(entries.is64bit() ? 8 : 4); + writer.PushData(entries.data()); + writer.UpdateUint32(start, writer.data()->size() - start - 4); +} + +struct FileEntry { + std::string file_name; + int directory_index; + int modification_time; + int file_size; +}; + +// Write line table to .debug_line section. +template<typename Allocator> +void WriteDebugLineTable(const std::vector<std::string>& include_directories, + const std::vector<FileEntry>& files, + const DebugLineOpCodeWriter<Allocator>& opcodes, + std::vector<uint8_t>* debug_line) { + Writer<> writer(debug_line); + size_t header_start = writer.data()->size(); + writer.PushUint32(0); // Section-length placeholder. + // Claim DWARF-2 version even though we use some DWARF-3 features. + // DWARF-2 consumers will ignore the unknown opcodes. + // This is what clang currently does. + writer.PushUint16(2); // .debug_line version. + size_t header_length_pos = writer.data()->size(); + writer.PushUint32(0); // Header-length placeholder. + writer.PushUint8(1 << opcodes.GetCodeFactorBits()); + writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kDefaultIsStmt ? 1 : 0); + writer.PushInt8(DebugLineOpCodeWriter<Allocator>::kLineBase); + writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kLineRange); + writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kOpcodeBase); + static const int opcode_lengths[DebugLineOpCodeWriter<Allocator>::kOpcodeBase] = { + 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 }; + for (int i = 1; i < DebugLineOpCodeWriter<Allocator>::kOpcodeBase; i++) { + writer.PushUint8(opcode_lengths[i]); + } + for (const std::string& directory : include_directories) { + writer.PushData(directory.data(), directory.size() + 1); + } + writer.PushUint8(0); // Terminate include_directories list. + for (const FileEntry& file : files) { + writer.PushData(file.file_name.data(), file.file_name.size() + 1); + writer.PushUleb128(file.directory_index); + writer.PushUleb128(file.modification_time); + writer.PushUleb128(file.file_size); + } + writer.PushUint8(0); // Terminate file list. + writer.UpdateUint32(header_length_pos, writer.data()->size() - header_length_pos - 4); + writer.PushData(opcodes.data()->data(), opcodes.data()->size()); + writer.UpdateUint32(header_start, writer.data()->size() - header_start - 4); +} + +} // namespace dwarf +} // namespace art + +#endif // ART_COMPILER_DWARF_HEADERS_H_ diff --git a/compiler/dwarf/writer.h b/compiler/dwarf/writer.h index d8e29f0986..3b9c55866a 100644 --- a/compiler/dwarf/writer.h +++ b/compiler/dwarf/writer.h @@ -116,6 +116,11 @@ class Writer { data_->insert(data_->end(), p, p + size); } + template<typename Allocator2> + void PushData(const std::vector<uint8_t, Allocator2>* buffer) { + data_->insert(data_->end(), buffer->begin(), buffer->end()); + } + void UpdateUint32(size_t offset, uint32_t value) { DCHECK_LT(offset + 3, data_->size()); (*data_)[offset + 0] = (value >> 0) & 0xFF; @@ -136,6 +141,15 @@ class Writer { (*data_)[offset + 7] = (value >> 56) & 0xFF; } + void UpdateUleb128(size_t offset, uint32_t value) { + DCHECK_LE(offset + UnsignedLeb128Size(value), data_->size()); + UpdateUnsignedLeb128(data_->data() + offset, value); + } + + void Pop() { + return data_->pop_back(); + } + void Pad(int alignment) { DCHECK_NE(alignment, 0); data_->resize(RoundUp(data_->size(), alignment), 0); diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc new file mode 100644 index 0000000000..5e8e24b035 --- /dev/null +++ b/compiler/elf_writer_debug.cc @@ -0,0 +1,360 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "elf_writer_debug.h" + +#include "compiled_method.h" +#include "driver/compiler_driver.h" +#include "dex_file-inl.h" +#include "dwarf/headers.h" +#include "dwarf/register.h" +#include "oat_writer.h" + +namespace art { +namespace dwarf { + +static void WriteEhFrameCIE(InstructionSet isa, std::vector<uint8_t>* eh_frame) { + // Scratch registers should be marked as undefined. This tells the + // debugger that its value in the previous frame is not recoverable. + bool is64bit = Is64BitInstructionSet(isa); + switch (isa) { + case kArm: + case kThumb2: { + DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(Reg::ArmCore(13), 0); // R13(SP). + // core registers. + for (int reg = 0; reg < 13; reg++) { + if (reg < 4 || reg == 12) { + opcodes.Undefined(Reg::ArmCore(reg)); + } else { + opcodes.SameValue(Reg::ArmCore(reg)); + } + } + // fp registers. + for (int reg = 0; reg < 32; reg++) { + if (reg < 16) { + opcodes.Undefined(Reg::ArmFp(reg)); + } else { + opcodes.SameValue(Reg::ArmFp(reg)); + } + } + auto return_address_reg = Reg::ArmCore(14); // R14(LR). + WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame); + return; + } + case kArm64: { + DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(Reg::Arm64Core(31), 0); // R31(SP). + // core registers. + for (int reg = 0; reg < 30; reg++) { + if (reg < 8 || reg == 16 || reg == 17) { + opcodes.Undefined(Reg::Arm64Core(reg)); + } else { + opcodes.SameValue(Reg::Arm64Core(reg)); + } + } + // fp registers. + for (int reg = 0; reg < 32; reg++) { + if (reg < 8 || reg >= 16) { + opcodes.Undefined(Reg::Arm64Fp(reg)); + } else { + opcodes.SameValue(Reg::Arm64Fp(reg)); + } + } + auto return_address_reg = Reg::Arm64Core(30); // R30(LR). + WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame); + return; + } + case kMips: + case kMips64: { + DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(Reg::MipsCore(29), 0); // R29(SP). + // core registers. + for (int reg = 1; reg < 26; reg++) { + if (reg < 16 || reg == 24 || reg == 25) { // AT, V*, A*, T*. + opcodes.Undefined(Reg::MipsCore(reg)); + } else { + opcodes.SameValue(Reg::MipsCore(reg)); + } + } + auto return_address_reg = Reg::MipsCore(31); // R31(RA). + WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame); + return; + } + case kX86: { + DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(Reg::X86Core(4), 4); // R4(ESP). + opcodes.Offset(Reg::X86Core(8), -4); // R8(EIP). + // core registers. + for (int reg = 0; reg < 8; reg++) { + if (reg <= 3) { + opcodes.Undefined(Reg::X86Core(reg)); + } else if (reg == 4) { + // Stack pointer. + } else { + opcodes.SameValue(Reg::X86Core(reg)); + } + } + // fp registers. + for (int reg = 0; reg < 8; reg++) { + opcodes.Undefined(Reg::X86Fp(reg)); + } + auto return_address_reg = Reg::X86Core(8); // R8(EIP). + WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame); + return; + } + case kX86_64: { + DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(Reg::X86_64Core(4), 8); // R4(RSP). + opcodes.Offset(Reg::X86_64Core(16), -8); // R16(RIP). + // core registers. + for (int reg = 0; reg < 16; reg++) { + if (reg == 4) { + // Stack pointer. + } else if (reg < 12 && reg != 3 && reg != 5) { // except EBX and EBP. + opcodes.Undefined(Reg::X86_64Core(reg)); + } else { + opcodes.SameValue(Reg::X86_64Core(reg)); + } + } + // fp registers. + for (int reg = 0; reg < 16; reg++) { + if (reg < 12) { + opcodes.Undefined(Reg::X86_64Fp(reg)); + } else { + opcodes.SameValue(Reg::X86_64Fp(reg)); + } + } + auto return_address_reg = Reg::X86_64Core(16); // R16(RIP). + WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame); + return; + } + case kNone: + break; + } + LOG(FATAL) << "Can not write CIE frame for ISA " << isa; + UNREACHABLE(); +} + +/* + * @brief Generate the DWARF sections. + * @param oat_writer The Oat file Writer. + * @param eh_frame Call Frame Information. + * @param debug_info Compilation unit information. + * @param debug_abbrev Abbreviations used to generate dbg_info. + * @param debug_str Debug strings. + * @param debug_line Line number table. + */ +void WriteDebugSections(const CompilerDriver* compiler, + const OatWriter* oat_writer, + uint32_t text_section_offset, + std::vector<uint8_t>* eh_frame, + std::vector<uint8_t>* debug_info, + std::vector<uint8_t>* debug_abbrev, + std::vector<uint8_t>* debug_str, + std::vector<uint8_t>* debug_line) { + const std::vector<OatWriter::DebugInfo>& method_infos = oat_writer->GetMethodDebugInfo(); + const InstructionSet isa = compiler->GetInstructionSet(); + uint32_t cunit_low_pc = static_cast<uint32_t>(-1); + uint32_t cunit_high_pc = 0; + for (auto method_info : method_infos) { + cunit_low_pc = std::min(cunit_low_pc, method_info.low_pc_); + cunit_high_pc = std::max(cunit_high_pc, method_info.high_pc_); + } + + // Write .eh_frame section. + size_t cie_offset = eh_frame->size(); + WriteEhFrameCIE(isa, eh_frame); + for (const OatWriter::DebugInfo& mi : method_infos) { + const SwapVector<uint8_t>* opcodes = mi.compiled_method_->GetCFIInfo(); + if (opcodes != nullptr) { + WriteEhFrameFDE(Is64BitInstructionSet(isa), cie_offset, + text_section_offset + mi.low_pc_, mi.high_pc_ - mi.low_pc_, + opcodes, eh_frame); + } + } + + // Write .debug_info section. + size_t debug_abbrev_offset = debug_abbrev->size(); + DebugInfoEntryWriter<> info(false /* 32 bit */, debug_abbrev); + info.StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes); + info.WriteStrp(DW_AT_producer, "Android dex2oat", debug_str); + info.WriteData1(DW_AT_language, DW_LANG_Java); + info.WriteAddr(DW_AT_low_pc, cunit_low_pc + text_section_offset); + info.WriteAddr(DW_AT_high_pc, cunit_high_pc + text_section_offset); + info.WriteData4(DW_AT_stmt_list, debug_line->size()); + for (auto method_info : method_infos) { + std::string method_name = PrettyMethod(method_info.dex_method_index_, + *method_info.dex_file_, true); + if (method_info.deduped_) { + // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol + // so that it will show up in a debuggerd crash report. + method_name += " [ DEDUPED ]"; + } + info.StartTag(DW_TAG_subprogram, DW_CHILDREN_no); + info.WriteStrp(DW_AT_name, method_name.data(), debug_str); + info.WriteAddr(DW_AT_low_pc, method_info.low_pc_ + text_section_offset); + info.WriteAddr(DW_AT_high_pc, method_info.high_pc_ + text_section_offset); + info.EndTag(); // DW_TAG_subprogram + } + info.EndTag(); // DW_TAG_compile_unit + WriteDebugInfoCU(debug_abbrev_offset, info, debug_info); + + // TODO: in gdb info functions <regexp> - reports Java functions, but + // source file is <unknown> because .debug_line is formed as one + // compilation unit. To fix this it is possible to generate + // a separate compilation unit for every distinct Java source. + // Each of the these compilation units can have several non-adjacent + // method ranges. + + // Write .debug_line section. + std::vector<FileEntry> files; + std::unordered_map<std::string, size_t> files_map; + std::vector<std::string> directories; + std::unordered_map<std::string, size_t> directories_map; + int code_factor_bits_ = 0; + int dwarf_isa = -1; + switch (isa) { + case kArm: // arm actually means thumb2. + case kThumb2: + code_factor_bits_ = 1; // 16-bit instuctions + dwarf_isa = 1; // DW_ISA_ARM_thumb. + break; + case kArm64: + case kMips: + case kMips64: + code_factor_bits_ = 2; // 32-bit instructions + break; + case kNone: + case kX86: + case kX86_64: + break; + } + DebugLineOpCodeWriter<> opcodes(false /* 32bit */, code_factor_bits_); + opcodes.SetAddress(text_section_offset + cunit_low_pc); + if (dwarf_isa != -1) { + opcodes.SetISA(dwarf_isa); + } + for (const OatWriter::DebugInfo& mi : method_infos) { + // Addresses in the line table should be unique and increasing. + if (mi.deduped_) { + continue; + } + + struct DebugInfoCallbacks { + static bool NewPosition(void* ctx, uint32_t address, uint32_t line) { + auto* context = reinterpret_cast<DebugInfoCallbacks*>(ctx); + context->dex2line_.push_back({address, static_cast<int32_t>(line)}); + return false; + } + DefaultSrcMap dex2line_; + } debug_info_callbacks; + + const DexFile* dex = mi.dex_file_; + if (mi.code_item_ != nullptr) { + dex->DecodeDebugInfo(mi.code_item_, + (mi.access_flags_ & kAccStatic) != 0, + mi.dex_method_index_, + DebugInfoCallbacks::NewPosition, + nullptr, + &debug_info_callbacks); + } + + // Get and deduplicate directory and filename. + int file_index = 0; // 0 - primary source file of the compilation. + auto& dex_class_def = dex->GetClassDef(mi.class_def_index_); + const char* source_file = dex->GetSourceFile(dex_class_def); + if (source_file != nullptr) { + std::string file_name(source_file); + size_t file_name_slash = file_name.find_last_of('/'); + std::string class_name(dex->GetClassDescriptor(dex_class_def)); + size_t class_name_slash = class_name.find_last_of('/'); + std::string full_path(file_name); + + // Guess directory from package name. + int directory_index = 0; // 0 - current directory of the compilation. + if (file_name_slash == std::string::npos && // Just filename. + class_name.front() == 'L' && // Type descriptor for a class. + class_name_slash != std::string::npos) { // Has package name. + std::string package_name = class_name.substr(1, class_name_slash - 1); + auto it = directories_map.find(package_name); + if (it == directories_map.end()) { + directory_index = 1 + directories.size(); + directories_map.emplace(package_name, directory_index); + directories.push_back(package_name); + } else { + directory_index = it->second; + } + full_path = package_name + "/" + file_name; + } + + // Add file entry. + auto it2 = files_map.find(full_path); + if (it2 == files_map.end()) { + file_index = 1 + files.size(); + files_map.emplace(full_path, file_index); + files.push_back(FileEntry { + file_name, + directory_index, + 0, // Modification time - NA. + 0, // File size - NA. + }); + } else { + file_index = it2->second; + } + } + opcodes.SetFile(file_index); + + // Generate mapping opcodes from PC to Java lines. + const DefaultSrcMap& dex2line_map = debug_info_callbacks.dex2line_; + uint32_t low_pc = text_section_offset + mi.low_pc_; + if (file_index != 0 && !dex2line_map.empty()) { + bool first = true; + for (SrcMapElem pc2dex : mi.compiled_method_->GetSrcMappingTable()) { + uint32_t pc = pc2dex.from_; + int dex_pc = pc2dex.to_; + auto dex2line = dex2line_map.Find(static_cast<uint32_t>(dex_pc)); + if (dex2line.first) { + int line = dex2line.second; + if (first) { + first = false; + if (pc > 0) { + // Assume that any preceding code is prologue. + int first_line = dex2line_map.front().to_; + // Prologue is not a sensible place for a breakpoint. + opcodes.NegateStmt(); + opcodes.AddRow(low_pc, first_line); + opcodes.NegateStmt(); + opcodes.SetPrologueEnd(); + } + opcodes.AddRow(low_pc + pc, line); + } else if (line != opcodes.CurrentLine()) { + opcodes.AddRow(low_pc + pc, line); + } + } + } + } else { + // line 0 - instruction cannot be attributed to any source line. + opcodes.AddRow(low_pc, 0); + } + } + opcodes.AdvancePC(text_section_offset + cunit_high_pc); + opcodes.EndSequence(); + WriteDebugLineTable(directories, files, opcodes, debug_line); +} + +} // namespace dwarf +} // namespace art diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h new file mode 100644 index 0000000000..39a99d6d38 --- /dev/null +++ b/compiler/elf_writer_debug.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_ +#define ART_COMPILER_ELF_WRITER_DEBUG_H_ + +#include <vector> + +#include "oat_writer.h" + +namespace art { +namespace dwarf { + +void WriteDebugSections(const CompilerDriver* compiler, + const OatWriter* oat_writer, + uint32_t text_section_offset, + std::vector<uint8_t>* eh_frame_data, + std::vector<uint8_t>* debug_info_data, + std::vector<uint8_t>* debug_abbrev_data, + std::vector<uint8_t>* debug_str_data, + std::vector<uint8_t>* debug_line_data); + +} // namespace dwarf +} // namespace art + +#endif // ART_COMPILER_ELF_WRITER_DEBUG_H_ diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index 1bd83b6c85..e9af25f293 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -22,14 +22,13 @@ #include "base/unix_file/fd_file.h" #include "buffered_output_stream.h" #include "compiled_method.h" +#include "dex_file-inl.h" #include "driver/compiler_driver.h" #include "driver/compiler_options.h" -#include "dwarf.h" -#include "dwarf/debug_frame_writer.h" -#include "dwarf/debug_line_writer.h" #include "elf_builder.h" #include "elf_file.h" #include "elf_utils.h" +#include "elf_writer_debug.h" #include "file_output_stream.h" #include "globals.h" #include "leb128.h" @@ -39,42 +38,6 @@ namespace art { -static void PushByte(std::vector<uint8_t>* buf, int data) { - buf->push_back(data & 0xff); -} - -static uint32_t PushStr(std::vector<uint8_t>* buf, const char* str, const char* def = nullptr) { - if (str == nullptr) { - str = def; - } - - uint32_t offset = buf->size(); - for (size_t i = 0; str[i] != '\0'; ++i) { - buf->push_back(str[i]); - } - buf->push_back('\0'); - return offset; -} - -static uint32_t PushStr(std::vector<uint8_t>* buf, const std::string &str) { - uint32_t offset = buf->size(); - buf->insert(buf->end(), str.begin(), str.end()); - buf->push_back('\0'); - return offset; -} - -static void UpdateWord(std::vector<uint8_t>* buf, int offset, int data) { - (*buf)[offset+0] = data; - (*buf)[offset+1] = data >> 8; - (*buf)[offset+2] = data >> 16; - (*buf)[offset+3] = data >> 24; -} - -static void PushHalf(std::vector<uint8_t>* buf, int data) { - buf->push_back(data & 0xff); - buf->push_back((data >> 8) & 0xff); -} - template <typename Elf_Word, typename Elf_Sword, typename Elf_Addr, typename Elf_Dyn, typename Elf_Sym, typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr> @@ -147,7 +110,8 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn, return false; } - if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) { + if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols() && + !oat_writer->GetMethodDebugInfo().empty()) { WriteDebugSymbols(compiler_driver_, builder.get(), oat_writer); } @@ -167,341 +131,6 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn, return builder->Write(); } -// TODO: rewriting it using DexFile::DecodeDebugInfo needs unneeded stuff. -static void GetLineInfoForJava(const uint8_t* dbgstream, DefaultSrcMap* dex2line) { - if (dbgstream == nullptr) { - return; - } - - int adjopcode; - uint32_t dex_offset = 0; - uint32_t java_line = DecodeUnsignedLeb128(&dbgstream); - - // skip parameters - for (uint32_t param_count = DecodeUnsignedLeb128(&dbgstream); param_count != 0; --param_count) { - DecodeUnsignedLeb128(&dbgstream); - } - - for (bool is_end = false; is_end == false; ) { - uint8_t opcode = *dbgstream; - dbgstream++; - switch (opcode) { - case DexFile::DBG_END_SEQUENCE: - is_end = true; - break; - - case DexFile::DBG_ADVANCE_PC: - dex_offset += DecodeUnsignedLeb128(&dbgstream); - break; - - case DexFile::DBG_ADVANCE_LINE: - java_line += DecodeSignedLeb128(&dbgstream); - break; - - case DexFile::DBG_START_LOCAL: - case DexFile::DBG_START_LOCAL_EXTENDED: - DecodeUnsignedLeb128(&dbgstream); - DecodeUnsignedLeb128(&dbgstream); - DecodeUnsignedLeb128(&dbgstream); - - if (opcode == DexFile::DBG_START_LOCAL_EXTENDED) { - DecodeUnsignedLeb128(&dbgstream); - } - break; - - case DexFile::DBG_END_LOCAL: - case DexFile::DBG_RESTART_LOCAL: - DecodeUnsignedLeb128(&dbgstream); - break; - - case DexFile::DBG_SET_PROLOGUE_END: - case DexFile::DBG_SET_EPILOGUE_BEGIN: - case DexFile::DBG_SET_FILE: - break; - - default: - adjopcode = opcode - DexFile::DBG_FIRST_SPECIAL; - dex_offset += adjopcode / DexFile::DBG_LINE_RANGE; - java_line += DexFile::DBG_LINE_BASE + (adjopcode % DexFile::DBG_LINE_RANGE); - dex2line->push_back({dex_offset, static_cast<int32_t>(java_line)}); - break; - } - } -} - -/* - * @brief Generate the DWARF debug_info and debug_abbrev sections - * @param oat_writer The Oat file Writer. - * @param dbg_info Compilation unit information. - * @param dbg_abbrev Abbreviations used to generate dbg_info. - * @param dbg_str Debug strings. - */ -static void FillInCFIInformation(OatWriter* oat_writer, - std::vector<uint8_t>* dbg_info, - std::vector<uint8_t>* dbg_abbrev, - std::vector<uint8_t>* dbg_str, - std::vector<uint8_t>* dbg_line, - uint32_t text_section_offset) { - const std::vector<OatWriter::DebugInfo>& method_infos = oat_writer->GetCFIMethodInfo(); - - uint32_t producer_str_offset = PushStr(dbg_str, "Android dex2oat"); - - constexpr bool use_64bit_addresses = false; - - // Create the debug_abbrev section with boilerplate information. - // We only care about low_pc and high_pc right now for the compilation - // unit and methods. - - // Tag 1: Compilation unit: DW_TAG_compile_unit. - PushByte(dbg_abbrev, 1); - PushByte(dbg_abbrev, dwarf::DW_TAG_compile_unit); - - // There are children (the methods). - PushByte(dbg_abbrev, dwarf::DW_CHILDREN_yes); - - // DW_AT_producer DW_FORM_data1. - // REVIEW: we can get rid of dbg_str section if - // DW_FORM_string (immediate string) was used everywhere instead of - // DW_FORM_strp (ref to string from .debug_str section). - // DW_FORM_strp makes sense only if we reuse the strings. - PushByte(dbg_abbrev, dwarf::DW_AT_producer); - PushByte(dbg_abbrev, dwarf::DW_FORM_strp); - - // DW_LANG_Java DW_FORM_data1. - PushByte(dbg_abbrev, dwarf::DW_AT_language); - PushByte(dbg_abbrev, dwarf::DW_FORM_data1); - - // DW_AT_low_pc DW_FORM_addr. - PushByte(dbg_abbrev, dwarf::DW_AT_low_pc); - PushByte(dbg_abbrev, dwarf::DW_FORM_addr); - - // DW_AT_high_pc DW_FORM_addr. - PushByte(dbg_abbrev, dwarf::DW_AT_high_pc); - PushByte(dbg_abbrev, dwarf::DW_FORM_addr); - - if (dbg_line != nullptr) { - // DW_AT_stmt_list DW_FORM_sec_offset. - PushByte(dbg_abbrev, dwarf::DW_AT_stmt_list); - PushByte(dbg_abbrev, dwarf::DW_FORM_data4); - } - - // End of DW_TAG_compile_unit. - PushByte(dbg_abbrev, 0); // DW_AT. - PushByte(dbg_abbrev, 0); // DW_FORM. - - // Tag 2: Compilation unit: DW_TAG_subprogram. - PushByte(dbg_abbrev, 2); - PushByte(dbg_abbrev, dwarf::DW_TAG_subprogram); - - // There are no children. - PushByte(dbg_abbrev, dwarf::DW_CHILDREN_no); - - // Name of the method. - PushByte(dbg_abbrev, dwarf::DW_AT_name); - PushByte(dbg_abbrev, dwarf::DW_FORM_strp); - - // DW_AT_low_pc DW_FORM_addr. - PushByte(dbg_abbrev, dwarf::DW_AT_low_pc); - PushByte(dbg_abbrev, dwarf::DW_FORM_addr); - - // DW_AT_high_pc DW_FORM_addr. - PushByte(dbg_abbrev, dwarf::DW_AT_high_pc); - PushByte(dbg_abbrev, dwarf::DW_FORM_addr); - - // End of DW_TAG_subprogram. - PushByte(dbg_abbrev, 0); // DW_AT. - PushByte(dbg_abbrev, 0); // DW_FORM. - - // End of abbrevs for compilation unit - PushByte(dbg_abbrev, 0); - - // Start the debug_info section with the header information - // 'unit_length' will be filled in later. - int cunit_length = dbg_info->size(); - Push32(dbg_info, 0); - - // 'version' - 3. - PushHalf(dbg_info, 3); - - // Offset into .debug_abbrev section (always 0). - Push32(dbg_info, 0); - - // Address size: 4 or 8. - PushByte(dbg_info, use_64bit_addresses ? 8 : 4); - - // Start the description for the compilation unit. - // This uses tag 1. - PushByte(dbg_info, 1); - - // The producer is Android dex2oat. - Push32(dbg_info, producer_str_offset); - - // The language is Java. - PushByte(dbg_info, dwarf::DW_LANG_Java); - - // low_pc and high_pc. - uint32_t cunit_low_pc = static_cast<uint32_t>(-1); - uint32_t cunit_high_pc = 0; - for (auto method_info : method_infos) { - cunit_low_pc = std::min(cunit_low_pc, method_info.low_pc_); - cunit_high_pc = std::max(cunit_high_pc, method_info.high_pc_); - } - Push32(dbg_info, cunit_low_pc + text_section_offset); - Push32(dbg_info, cunit_high_pc + text_section_offset); - - if (dbg_line != nullptr) { - // Line number table offset. - Push32(dbg_info, dbg_line->size()); - } - - for (auto method_info : method_infos) { - // Start a new TAG: subroutine (2). - PushByte(dbg_info, 2); - - // Enter name, low_pc, high_pc. - Push32(dbg_info, PushStr(dbg_str, method_info.method_name_)); - Push32(dbg_info, method_info.low_pc_ + text_section_offset); - Push32(dbg_info, method_info.high_pc_ + text_section_offset); - } - - if (dbg_line != nullptr) { - // TODO: in gdb info functions <regexp> - reports Java functions, but - // source file is <unknown> because .debug_line is formed as one - // compilation unit. To fix this it is possible to generate - // a separate compilation unit for every distinct Java source. - // Each of the these compilation units can have several non-adjacent - // method ranges. - - std::vector<dwarf::DebugLineWriter<>::FileEntry> files; - std::unordered_map<std::string, size_t> files_map; - std::vector<std::string> directories; - std::unordered_map<std::string, size_t> directories_map; - - int code_factor_bits_ = 0; - int isa = -1; - switch (oat_writer->GetOatHeader().GetInstructionSet()) { - case kArm: // arm actually means thumb2. - case kThumb2: - code_factor_bits_ = 1; // 16-bit instuctions - isa = 1; // DW_ISA_ARM_thumb. - break; - case kArm64: - case kMips: - case kMips64: - code_factor_bits_ = 2; // 32-bit instructions - break; - case kNone: - case kX86: - case kX86_64: - break; - } - - dwarf::DebugLineOpCodeWriter<> opcodes(use_64bit_addresses, code_factor_bits_); - opcodes.SetAddress(text_section_offset + cunit_low_pc); - if (isa != -1) { - opcodes.SetISA(isa); - } - DefaultSrcMap dex2line_map; - for (size_t i = 0; i < method_infos.size(); i++) { - const OatWriter::DebugInfo& method_info = method_infos[i]; - - // Addresses in the line table should be unique and increasing. - if (method_info.deduped_) { - continue; - } - - // Get and deduplicate directory and filename. - int file_index = 0; // 0 - primary source file of the compilation. - if (method_info.src_file_name_ != nullptr) { - std::string file_name(method_info.src_file_name_); - size_t file_name_slash = file_name.find_last_of('/'); - std::string class_name(method_info.class_descriptor_); - size_t class_name_slash = class_name.find_last_of('/'); - std::string full_path(file_name); - - // Guess directory from package name. - int directory_index = 0; // 0 - current directory of the compilation. - if (file_name_slash == std::string::npos && // Just filename. - class_name.front() == 'L' && // Type descriptor for a class. - class_name_slash != std::string::npos) { // Has package name. - std::string package_name = class_name.substr(1, class_name_slash - 1); - auto it = directories_map.find(package_name); - if (it == directories_map.end()) { - directory_index = 1 + directories.size(); - directories_map.emplace(package_name, directory_index); - directories.push_back(package_name); - } else { - directory_index = it->second; - } - full_path = package_name + "/" + file_name; - } - - // Add file entry. - auto it2 = files_map.find(full_path); - if (it2 == files_map.end()) { - file_index = 1 + files.size(); - files_map.emplace(full_path, file_index); - files.push_back(dwarf::DebugLineWriter<>::FileEntry { - file_name, - directory_index, - 0, // Modification time - NA. - 0, // File size - NA. - }); - } else { - file_index = it2->second; - } - } - opcodes.SetFile(file_index); - - // Generate mapping opcodes from PC to Java lines. - dex2line_map.clear(); - GetLineInfoForJava(method_info.dbgstream_, &dex2line_map); - uint32_t low_pc = text_section_offset + method_info.low_pc_; - if (file_index != 0 && !dex2line_map.empty()) { - bool first = true; - for (SrcMapElem pc2dex : method_info.compiled_method_->GetSrcMappingTable()) { - uint32_t pc = pc2dex.from_; - int dex = pc2dex.to_; - auto dex2line = dex2line_map.Find(static_cast<uint32_t>(dex)); - if (dex2line.first) { - int line = dex2line.second; - if (first) { - first = false; - if (pc > 0) { - // Assume that any preceding code is prologue. - int first_line = dex2line_map.front().to_; - // Prologue is not a sensible place for a breakpoint. - opcodes.NegateStmt(); - opcodes.AddRow(low_pc, first_line); - opcodes.NegateStmt(); - opcodes.SetPrologueEnd(); - } - opcodes.AddRow(low_pc + pc, line); - } else if (line != opcodes.CurrentLine()) { - opcodes.AddRow(low_pc + pc, line); - } - } - } - } else { - // line 0 - instruction cannot be attributed to any source line. - opcodes.AddRow(low_pc, 0); - } - } - - opcodes.AdvancePC(text_section_offset + cunit_high_pc); - opcodes.EndSequence(); - - dwarf::DebugLineWriter<> dbg_line_writer(dbg_line); - dbg_line_writer.WriteTable(directories, files, opcodes); - } - - // One byte terminator. - PushByte(dbg_info, 0); - - // We have now walked all the methods. Fill in lengths. - UpdateWord(dbg_info, cunit_length, dbg_info->size() - cunit_length - 4); -} - template <typename Elf_Word, typename Elf_Sword, typename Elf_Addr, typename Elf_Dyn, typename Elf_Sym, typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr> @@ -511,18 +140,22 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver, ElfBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn, Elf_Sym, Elf_Ehdr, Elf_Phdr, Elf_Shdr>* builder, OatWriter* oat_writer) { - UNUSED(compiler_driver); - Elf_Addr text_section_address = builder->GetTextBuilder().GetSection()->sh_addr; - // Iterate over the compiled methods. - const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetCFIMethodInfo(); + const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo(); ElfSymtabBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Shdr>* symtab = builder->GetSymtabBuilder(); for (auto it = method_info.begin(); it != method_info.end(); ++it) { + std::string name = PrettyMethod(it->dex_method_index_, *it->dex_file_, true); + if (it->deduped_) { + // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol + // so that it will show up in a debuggerd crash report. + name += " [ DEDUPED ]"; + } + uint32_t low_pc = it->low_pc_; // Add in code delta, e.g., thumb bit 0 for Thumb2 code. low_pc += it->compiled_method_->CodeDelta(); - symtab->AddSymbol(it->method_name_, &builder->GetTextBuilder(), low_pc, + symtab->AddSymbol(name, &builder->GetTextBuilder(), low_pc, true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC); // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2 @@ -533,43 +166,27 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver, } } - bool hasLineInfo = false; - for (auto& dbg_info : oat_writer->GetCFIMethodInfo()) { - if (dbg_info.dbgstream_ != nullptr && - !dbg_info.compiled_method_->GetSrcMappingTable().empty()) { - hasLineInfo = true; - break; - } - } - - if (hasLineInfo) { - ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_info(".debug_info", - SHT_PROGBITS, - 0, nullptr, 0, 1, 0); - ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_abbrev(".debug_abbrev", - SHT_PROGBITS, - 0, nullptr, 0, 1, 0); - ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_str(".debug_str", - SHT_PROGBITS, - 0, nullptr, 0, 1, 0); - ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_line(".debug_line", - SHT_PROGBITS, - 0, nullptr, 0, 1, 0); - - FillInCFIInformation(oat_writer, debug_info.GetBuffer(), - debug_abbrev.GetBuffer(), debug_str.GetBuffer(), - hasLineInfo ? debug_line.GetBuffer() : nullptr, - text_section_address); - - builder->RegisterRawSection(debug_info); - builder->RegisterRawSection(debug_abbrev); - - if (hasLineInfo) { - builder->RegisterRawSection(debug_line); - } - - builder->RegisterRawSection(debug_str); - } + typedef ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> Section; + Section eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0); + Section debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + Section debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + Section debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + Section debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0); + + dwarf::WriteDebugSections(compiler_driver, + oat_writer, + builder->GetTextBuilder().GetSection()->sh_addr, + eh_frame.GetBuffer(), + debug_info.GetBuffer(), + debug_abbrev.GetBuffer(), + debug_str.GetBuffer(), + debug_line.GetBuffer()); + + builder->RegisterRawSection(eh_frame); + builder->RegisterRawSection(debug_info); + builder->RegisterRawSection(debug_abbrev); + builder->RegisterRawSection(debug_str); + builder->RegisterRawSection(debug_line); } // Explicit instantiations diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc new file mode 100644 index 0000000000..3a0d520e47 --- /dev/null +++ b/compiler/jni/jni_cfi_test.cc @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <memory> +#include <vector> + +#include "arch/instruction_set.h" +#include "cfi_test.h" +#include "gtest/gtest.h" +#include "jni/quick/calling_convention.h" +#include "utils/assembler.h" + +#include "jni/jni_cfi_test_expected.inc" + +namespace art { + +// Run the tests only on host. +#ifndef HAVE_ANDROID_OS + +class JNICFITest : public CFITest { + public: + // Enable this flag to generate the expected outputs. + static constexpr bool kGenerateExpected = false; + + void TestImpl(InstructionSet isa, const char* isa_str, + const std::vector<uint8_t>& expected_asm, + const std::vector<uint8_t>& expected_cfi) { + // Description of simple method. + const bool is_static = true; + const bool is_synchronized = false; + const char* shorty = "IIFII"; + std::unique_ptr<JniCallingConvention> jni_conv( + JniCallingConvention::Create(is_static, is_synchronized, shorty, isa)); + std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv( + ManagedRuntimeCallingConvention::Create(is_static, is_synchronized, shorty, isa)); + const int frame_size(jni_conv->FrameSize()); + const std::vector<ManagedRegister>& callee_save_regs = jni_conv->CalleeSaveRegisters(); + + // Assemble the method. + std::unique_ptr<Assembler> jni_asm(Assembler::Create(isa)); + jni_asm->BuildFrame(frame_size, mr_conv->MethodRegister(), + callee_save_regs, mr_conv->EntrySpills()); + jni_asm->IncreaseFrameSize(32); + jni_asm->DecreaseFrameSize(32); + jni_asm->RemoveFrame(frame_size, callee_save_regs); + jni_asm->EmitSlowPaths(); + std::vector<uint8_t> actual_asm(jni_asm->CodeSize()); + MemoryRegion code(&actual_asm[0], actual_asm.size()); + jni_asm->FinalizeInstructions(code); + ASSERT_EQ(jni_asm->cfi().GetCurrentCFAOffset(), frame_size); + const std::vector<uint8_t>& actual_cfi = *(jni_asm->cfi().data()); + + if (kGenerateExpected) { + GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi); + } else { + EXPECT_EQ(expected_asm, actual_asm); + EXPECT_EQ(expected_cfi, actual_cfi); + } + } +}; + +#define TEST_ISA(isa) \ + TEST_F(JNICFITest, isa) { \ + std::vector<uint8_t> expected_asm(expected_asm_##isa, \ + expected_asm_##isa + arraysize(expected_asm_##isa)); \ + std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \ + expected_cfi_##isa + arraysize(expected_cfi_##isa)); \ + TestImpl(isa, #isa, expected_asm, expected_cfi); \ + } + +TEST_ISA(kThumb2) +TEST_ISA(kArm64) +TEST_ISA(kX86) +TEST_ISA(kX86_64) +TEST_ISA(kMips) +TEST_ISA(kMips64) + +#endif // HAVE_ANDROID_OS + +} // namespace art diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc new file mode 100644 index 0000000000..47e6f106ca --- /dev/null +++ b/compiler/jni/jni_cfi_test_expected.inc @@ -0,0 +1,505 @@ +static constexpr uint8_t expected_asm_kThumb2[] = { + 0x2D, 0xE9, 0xE0, 0x4D, 0x2D, 0xED, 0x10, 0x8A, 0x89, 0xB0, 0x00, 0x90, + 0xCD, 0xF8, 0x84, 0x10, 0x8D, 0xED, 0x22, 0x0A, 0xCD, 0xF8, 0x8C, 0x20, + 0xCD, 0xF8, 0x90, 0x30, 0x88, 0xB0, 0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC, + 0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x8D, +}; +static constexpr uint8_t expected_cfi_kThumb2[] = { + 0x44, 0x0E, 0x1C, 0x85, 0x07, 0x86, 0x06, 0x87, 0x05, 0x88, 0x04, 0x8A, + 0x03, 0x8B, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x5C, 0x05, 0x50, 0x17, 0x05, + 0x51, 0x16, 0x05, 0x52, 0x15, 0x05, 0x53, 0x14, 0x05, 0x54, 0x13, 0x05, + 0x55, 0x12, 0x05, 0x56, 0x11, 0x05, 0x57, 0x10, 0x05, 0x58, 0x0F, 0x05, + 0x59, 0x0E, 0x05, 0x5A, 0x0D, 0x05, 0x5B, 0x0C, 0x05, 0x5C, 0x0B, 0x05, + 0x5D, 0x0A, 0x05, 0x5E, 0x09, 0x05, 0x5F, 0x08, 0x42, 0x0E, 0x80, 0x01, + 0x54, 0x0E, 0xA0, 0x01, 0x42, 0x0E, 0x80, 0x01, 0x0A, 0x42, 0x0E, 0x5C, + 0x44, 0x0E, 0x1C, 0x06, 0x50, 0x06, 0x51, 0x06, 0x52, 0x06, 0x53, 0x06, + 0x54, 0x06, 0x55, 0x06, 0x56, 0x06, 0x57, 0x06, 0x58, 0x06, 0x59, 0x06, + 0x5A, 0x06, 0x5B, 0x06, 0x5C, 0x06, 0x5D, 0x06, 0x5E, 0x06, 0x5F, 0x44, + 0x0B, 0x0E, 0x80, 0x01, +}; +// 0x00000000: push {r5, r6, r7, r8, r10, r11, lr} +// 0x00000004: .cfi_def_cfa_offset: 28 +// 0x00000004: .cfi_offset: r5 at cfa-28 +// 0x00000004: .cfi_offset: r6 at cfa-24 +// 0x00000004: .cfi_offset: r7 at cfa-20 +// 0x00000004: .cfi_offset: r8 at cfa-16 +// 0x00000004: .cfi_offset: r10 at cfa-12 +// 0x00000004: .cfi_offset: r11 at cfa-8 +// 0x00000004: .cfi_offset: r14 at cfa-4 +// 0x00000004: vpush.f32 {s16-s31} +// 0x00000008: .cfi_def_cfa_offset: 92 +// 0x00000008: .cfi_offset_extended: r80 at cfa-92 +// 0x00000008: .cfi_offset_extended: r81 at cfa-88 +// 0x00000008: .cfi_offset_extended: r82 at cfa-84 +// 0x00000008: .cfi_offset_extended: r83 at cfa-80 +// 0x00000008: .cfi_offset_extended: r84 at cfa-76 +// 0x00000008: .cfi_offset_extended: r85 at cfa-72 +// 0x00000008: .cfi_offset_extended: r86 at cfa-68 +// 0x00000008: .cfi_offset_extended: r87 at cfa-64 +// 0x00000008: .cfi_offset_extended: r88 at cfa-60 +// 0x00000008: .cfi_offset_extended: r89 at cfa-56 +// 0x00000008: .cfi_offset_extended: r90 at cfa-52 +// 0x00000008: .cfi_offset_extended: r91 at cfa-48 +// 0x00000008: .cfi_offset_extended: r92 at cfa-44 +// 0x00000008: .cfi_offset_extended: r93 at cfa-40 +// 0x00000008: .cfi_offset_extended: r94 at cfa-36 +// 0x00000008: .cfi_offset_extended: r95 at cfa-32 +// 0x00000008: sub sp, sp, #36 +// 0x0000000a: .cfi_def_cfa_offset: 128 +// 0x0000000a: str r0, [sp, #0] +// 0x0000000c: str.w r1, [sp, #132] +// 0x00000010: vstr.f32 s0, [sp, #136] +// 0x00000014: str.w r2, [sp, #140] +// 0x00000018: str.w r3, [sp, #144] +// 0x0000001c: sub sp, sp, #32 +// 0x0000001e: .cfi_def_cfa_offset: 160 +// 0x0000001e: add sp, sp, #32 +// 0x00000020: .cfi_def_cfa_offset: 128 +// 0x00000020: .cfi_remember_state +// 0x00000020: add sp, sp, #36 +// 0x00000022: .cfi_def_cfa_offset: 92 +// 0x00000022: vpop.f32 {s16-s31} +// 0x00000026: .cfi_def_cfa_offset: 28 +// 0x00000026: .cfi_restore_extended: r80 +// 0x00000026: .cfi_restore_extended: r81 +// 0x00000026: .cfi_restore_extended: r82 +// 0x00000026: .cfi_restore_extended: r83 +// 0x00000026: .cfi_restore_extended: r84 +// 0x00000026: .cfi_restore_extended: r85 +// 0x00000026: .cfi_restore_extended: r86 +// 0x00000026: .cfi_restore_extended: r87 +// 0x00000026: .cfi_restore_extended: r88 +// 0x00000026: .cfi_restore_extended: r89 +// 0x00000026: .cfi_restore_extended: r90 +// 0x00000026: .cfi_restore_extended: r91 +// 0x00000026: .cfi_restore_extended: r92 +// 0x00000026: .cfi_restore_extended: r93 +// 0x00000026: .cfi_restore_extended: r94 +// 0x00000026: .cfi_restore_extended: r95 +// 0x00000026: pop {r5, r6, r7, r8, r10, r11, pc} +// 0x0000002a: .cfi_restore_state +// 0x0000002a: .cfi_def_cfa_offset: 128 + +static constexpr uint8_t expected_asm_kArm64[] = { + 0xFF, 0x03, 0x03, 0xD1, 0xFE, 0x5F, 0x00, 0xF9, 0xFD, 0x5B, 0x00, 0xF9, + 0xFC, 0x57, 0x00, 0xF9, 0xFB, 0x53, 0x00, 0xF9, 0xFA, 0x4F, 0x00, 0xF9, + 0xF9, 0x4B, 0x00, 0xF9, 0xF8, 0x47, 0x00, 0xF9, 0xF7, 0x43, 0x00, 0xF9, + 0xF6, 0x3F, 0x00, 0xF9, 0xF5, 0x3B, 0x00, 0xF9, 0xF4, 0x37, 0x00, 0xF9, + 0xEF, 0x33, 0x00, 0xFD, 0xEE, 0x2F, 0x00, 0xFD, 0xED, 0x2B, 0x00, 0xFD, + 0xEC, 0x27, 0x00, 0xFD, 0xEB, 0x23, 0x00, 0xFD, 0xEA, 0x1F, 0x00, 0xFD, + 0xE9, 0x1B, 0x00, 0xFD, 0xE8, 0x17, 0x00, 0xFD, 0xF5, 0x03, 0x12, 0xAA, + 0xE0, 0x03, 0x00, 0xB9, 0xE1, 0xC7, 0x00, 0xB9, 0xE0, 0xCB, 0x00, 0xBD, + 0xE2, 0xCF, 0x00, 0xB9, 0xE3, 0xD3, 0x00, 0xB9, 0xFF, 0x83, 0x00, 0xD1, + 0xFF, 0x83, 0x00, 0x91, 0xF2, 0x03, 0x15, 0xAA, 0xFE, 0x5F, 0x40, 0xF9, + 0xFD, 0x5B, 0x40, 0xF9, 0xFC, 0x57, 0x40, 0xF9, 0xFB, 0x53, 0x40, 0xF9, + 0xFA, 0x4F, 0x40, 0xF9, 0xF9, 0x4B, 0x40, 0xF9, 0xF8, 0x47, 0x40, 0xF9, + 0xF7, 0x43, 0x40, 0xF9, 0xF6, 0x3F, 0x40, 0xF9, 0xF5, 0x3B, 0x40, 0xF9, + 0xF4, 0x37, 0x40, 0xF9, 0xEF, 0x33, 0x40, 0xFD, 0xEE, 0x2F, 0x40, 0xFD, + 0xED, 0x2B, 0x40, 0xFD, 0xEC, 0x27, 0x40, 0xFD, 0xEB, 0x23, 0x40, 0xFD, + 0xEA, 0x1F, 0x40, 0xFD, 0xE9, 0x1B, 0x40, 0xFD, 0xE8, 0x17, 0x40, 0xFD, + 0xFF, 0x03, 0x03, 0x91, 0xC0, 0x03, 0x5F, 0xD6, +}; +static constexpr uint8_t expected_cfi_kArm64[] = { + 0x44, 0x0E, 0xC0, 0x01, 0x44, 0x9E, 0x02, 0x44, 0x9D, 0x04, 0x44, 0x9C, + 0x06, 0x44, 0x9B, 0x08, 0x44, 0x9A, 0x0A, 0x44, 0x99, 0x0C, 0x44, 0x98, + 0x0E, 0x44, 0x97, 0x10, 0x44, 0x96, 0x12, 0x44, 0x95, 0x14, 0x44, 0x94, + 0x16, 0x44, 0x05, 0x4F, 0x18, 0x44, 0x05, 0x4E, 0x1A, 0x44, 0x05, 0x4D, + 0x1C, 0x44, 0x05, 0x4C, 0x1E, 0x44, 0x05, 0x4B, 0x20, 0x44, 0x05, 0x4A, + 0x22, 0x44, 0x05, 0x49, 0x24, 0x44, 0x05, 0x48, 0x26, 0x5C, 0x0E, 0xE0, + 0x01, 0x44, 0x0E, 0xC0, 0x01, 0x0A, 0x48, 0xDE, 0x44, 0xDD, 0x44, 0xDC, + 0x44, 0xDB, 0x44, 0xDA, 0x44, 0xD9, 0x44, 0xD8, 0x44, 0xD7, 0x44, 0xD6, + 0x44, 0xD5, 0x44, 0xD4, 0x44, 0x06, 0x4F, 0x44, 0x06, 0x4E, 0x44, 0x06, + 0x4D, 0x44, 0x06, 0x4C, 0x44, 0x06, 0x4B, 0x44, 0x06, 0x4A, 0x44, 0x06, + 0x49, 0x44, 0x06, 0x48, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01, +}; +// 0x00000000: sub sp, sp, #0xc0 (192) +// 0x00000004: .cfi_def_cfa_offset: 192 +// 0x00000004: str lr, [sp, #184] +// 0x00000008: .cfi_offset: r30 at cfa-8 +// 0x00000008: str x29, [sp, #176] +// 0x0000000c: .cfi_offset: r29 at cfa-16 +// 0x0000000c: str x28, [sp, #168] +// 0x00000010: .cfi_offset: r28 at cfa-24 +// 0x00000010: str x27, [sp, #160] +// 0x00000014: .cfi_offset: r27 at cfa-32 +// 0x00000014: str x26, [sp, #152] +// 0x00000018: .cfi_offset: r26 at cfa-40 +// 0x00000018: str x25, [sp, #144] +// 0x0000001c: .cfi_offset: r25 at cfa-48 +// 0x0000001c: str x24, [sp, #136] +// 0x00000020: .cfi_offset: r24 at cfa-56 +// 0x00000020: str x23, [sp, #128] +// 0x00000024: .cfi_offset: r23 at cfa-64 +// 0x00000024: str x22, [sp, #120] +// 0x00000028: .cfi_offset: r22 at cfa-72 +// 0x00000028: str x21, [sp, #112] +// 0x0000002c: .cfi_offset: r21 at cfa-80 +// 0x0000002c: str x20, [sp, #104] +// 0x00000030: .cfi_offset: r20 at cfa-88 +// 0x00000030: str d15, [sp, #96] +// 0x00000034: .cfi_offset_extended: r79 at cfa-96 +// 0x00000034: str d14, [sp, #88] +// 0x00000038: .cfi_offset_extended: r78 at cfa-104 +// 0x00000038: str d13, [sp, #80] +// 0x0000003c: .cfi_offset_extended: r77 at cfa-112 +// 0x0000003c: str d12, [sp, #72] +// 0x00000040: .cfi_offset_extended: r76 at cfa-120 +// 0x00000040: str d11, [sp, #64] +// 0x00000044: .cfi_offset_extended: r75 at cfa-128 +// 0x00000044: str d10, [sp, #56] +// 0x00000048: .cfi_offset_extended: r74 at cfa-136 +// 0x00000048: str d9, [sp, #48] +// 0x0000004c: .cfi_offset_extended: r73 at cfa-144 +// 0x0000004c: str d8, [sp, #40] +// 0x00000050: .cfi_offset_extended: r72 at cfa-152 +// 0x00000050: mov x21, tr +// 0x00000054: str w0, [sp] +// 0x00000058: str w1, [sp, #196] +// 0x0000005c: str s0, [sp, #200] +// 0x00000060: str w2, [sp, #204] +// 0x00000064: str w3, [sp, #208] +// 0x00000068: sub sp, sp, #0x20 (32) +// 0x0000006c: .cfi_def_cfa_offset: 224 +// 0x0000006c: add sp, sp, #0x20 (32) +// 0x00000070: .cfi_def_cfa_offset: 192 +// 0x00000070: .cfi_remember_state +// 0x00000070: mov tr, x21 +// 0x00000074: ldr lr, [sp, #184] +// 0x00000078: .cfi_restore: r30 +// 0x00000078: ldr x29, [sp, #176] +// 0x0000007c: .cfi_restore: r29 +// 0x0000007c: ldr x28, [sp, #168] +// 0x00000080: .cfi_restore: r28 +// 0x00000080: ldr x27, [sp, #160] +// 0x00000084: .cfi_restore: r27 +// 0x00000084: ldr x26, [sp, #152] +// 0x00000088: .cfi_restore: r26 +// 0x00000088: ldr x25, [sp, #144] +// 0x0000008c: .cfi_restore: r25 +// 0x0000008c: ldr x24, [sp, #136] +// 0x00000090: .cfi_restore: r24 +// 0x00000090: ldr x23, [sp, #128] +// 0x00000094: .cfi_restore: r23 +// 0x00000094: ldr x22, [sp, #120] +// 0x00000098: .cfi_restore: r22 +// 0x00000098: ldr x21, [sp, #112] +// 0x0000009c: .cfi_restore: r21 +// 0x0000009c: ldr x20, [sp, #104] +// 0x000000a0: .cfi_restore: r20 +// 0x000000a0: ldr d15, [sp, #96] +// 0x000000a4: .cfi_restore_extended: r79 +// 0x000000a4: ldr d14, [sp, #88] +// 0x000000a8: .cfi_restore_extended: r78 +// 0x000000a8: ldr d13, [sp, #80] +// 0x000000ac: .cfi_restore_extended: r77 +// 0x000000ac: ldr d12, [sp, #72] +// 0x000000b0: .cfi_restore_extended: r76 +// 0x000000b0: ldr d11, [sp, #64] +// 0x000000b4: .cfi_restore_extended: r75 +// 0x000000b4: ldr d10, [sp, #56] +// 0x000000b8: .cfi_restore_extended: r74 +// 0x000000b8: ldr d9, [sp, #48] +// 0x000000bc: .cfi_restore_extended: r73 +// 0x000000bc: ldr d8, [sp, #40] +// 0x000000c0: .cfi_restore_extended: r72 +// 0x000000c0: add sp, sp, #0xc0 (192) +// 0x000000c4: .cfi_def_cfa_offset: 0 +// 0x000000c4: ret +// 0x000000c8: .cfi_restore_state +// 0x000000c8: .cfi_def_cfa_offset: 192 + +static constexpr uint8_t expected_asm_kX86[] = { + 0x57, 0x56, 0x55, 0x83, 0xC4, 0xE4, 0x50, 0x89, 0x4C, 0x24, 0x34, 0xF3, + 0x0F, 0x11, 0x44, 0x24, 0x38, 0x89, 0x54, 0x24, 0x3C, 0x89, 0x5C, 0x24, + 0x40, 0x83, 0xC4, 0xE0, 0x83, 0xC4, 0x20, 0x83, 0xC4, 0x20, 0x5D, 0x5E, + 0x5F, 0xC3, +}; +static constexpr uint8_t expected_cfi_kX86[] = { + 0x41, 0x0E, 0x08, 0x87, 0x02, 0x41, 0x0E, 0x0C, 0x86, 0x03, 0x41, 0x0E, + 0x10, 0x85, 0x04, 0x43, 0x0E, 0x2C, 0x41, 0x0E, 0x30, 0x55, 0x0E, 0x50, + 0x43, 0x0E, 0x30, 0x0A, 0x43, 0x0E, 0x10, 0x41, 0x0E, 0x0C, 0xC5, 0x41, + 0x0E, 0x08, 0xC6, 0x41, 0x0E, 0x04, 0xC7, 0x41, 0x0B, 0x0E, 0x30, +}; +// 0x00000000: push edi +// 0x00000001: .cfi_def_cfa_offset: 8 +// 0x00000001: .cfi_offset: r7 at cfa-8 +// 0x00000001: push esi +// 0x00000002: .cfi_def_cfa_offset: 12 +// 0x00000002: .cfi_offset: r6 at cfa-12 +// 0x00000002: push ebp +// 0x00000003: .cfi_def_cfa_offset: 16 +// 0x00000003: .cfi_offset: r5 at cfa-16 +// 0x00000003: add esp, -28 +// 0x00000006: .cfi_def_cfa_offset: 44 +// 0x00000006: push eax +// 0x00000007: .cfi_def_cfa_offset: 48 +// 0x00000007: mov [esp + 52], ecx +// 0x0000000b: movss [esp + 56], xmm0 +// 0x00000011: mov [esp + 60], edx +// 0x00000015: mov [esp + 64], ebx +// 0x00000019: add esp, -32 +// 0x0000001c: .cfi_def_cfa_offset: 80 +// 0x0000001c: add esp, 32 +// 0x0000001f: .cfi_def_cfa_offset: 48 +// 0x0000001f: .cfi_remember_state +// 0x0000001f: add esp, 32 +// 0x00000022: .cfi_def_cfa_offset: 16 +// 0x00000022: pop ebp +// 0x00000023: .cfi_def_cfa_offset: 12 +// 0x00000023: .cfi_restore: r5 +// 0x00000023: pop esi +// 0x00000024: .cfi_def_cfa_offset: 8 +// 0x00000024: .cfi_restore: r6 +// 0x00000024: pop edi +// 0x00000025: .cfi_def_cfa_offset: 4 +// 0x00000025: .cfi_restore: r7 +// 0x00000025: ret +// 0x00000026: .cfi_restore_state +// 0x00000026: .cfi_def_cfa_offset: 48 + +static constexpr uint8_t expected_asm_kX86_64[] = { + 0x41, 0x57, 0x41, 0x56, 0x41, 0x55, 0x41, 0x54, 0x55, 0x53, 0x48, 0x83, + 0xEC, 0x48, 0xF2, 0x44, 0x0F, 0x11, 0x7C, 0x24, 0x40, 0xF2, 0x44, 0x0F, + 0x11, 0x74, 0x24, 0x38, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24, 0x30, 0xF2, + 0x44, 0x0F, 0x11, 0x64, 0x24, 0x28, 0x89, 0x3C, 0x24, 0x89, 0xB4, 0x24, + 0x84, 0x00, 0x00, 0x00, 0xF3, 0x0F, 0x11, 0x84, 0x24, 0x88, 0x00, 0x00, + 0x00, 0x89, 0x94, 0x24, 0x8C, 0x00, 0x00, 0x00, 0x89, 0x8C, 0x24, 0x90, + 0x00, 0x00, 0x00, 0x48, 0x83, 0xC4, 0xE0, 0x48, 0x83, 0xC4, 0x20, 0xF2, + 0x44, 0x0F, 0x10, 0x64, 0x24, 0x28, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24, + 0x30, 0xF2, 0x44, 0x0F, 0x10, 0x74, 0x24, 0x38, 0xF2, 0x44, 0x0F, 0x10, + 0x7C, 0x24, 0x40, 0x48, 0x83, 0xC4, 0x48, 0x5B, 0x5D, 0x41, 0x5C, 0x41, + 0x5D, 0x41, 0x5E, 0x41, 0x5F, 0xC3, +}; +static constexpr uint8_t expected_cfi_kX86_64[] = { + 0x42, 0x0E, 0x10, 0x8F, 0x04, 0x42, 0x0E, 0x18, 0x8E, 0x06, 0x42, 0x0E, + 0x20, 0x8D, 0x08, 0x42, 0x0E, 0x28, 0x8C, 0x0A, 0x41, 0x0E, 0x30, 0x86, + 0x0C, 0x41, 0x0E, 0x38, 0x83, 0x0E, 0x44, 0x0E, 0x80, 0x01, 0x47, 0xA0, + 0x10, 0x47, 0x9F, 0x12, 0x47, 0x9E, 0x14, 0x47, 0x9D, 0x16, 0x65, 0x0E, + 0xA0, 0x01, 0x44, 0x0E, 0x80, 0x01, 0x0A, 0x47, 0xDD, 0x47, 0xDE, 0x47, + 0xDF, 0x47, 0xE0, 0x44, 0x0E, 0x38, 0x41, 0x0E, 0x30, 0xC3, 0x41, 0x0E, + 0x28, 0xC6, 0x42, 0x0E, 0x20, 0xCC, 0x42, 0x0E, 0x18, 0xCD, 0x42, 0x0E, + 0x10, 0xCE, 0x42, 0x0E, 0x08, 0xCF, 0x41, 0x0B, 0x0E, 0x80, 0x01, +}; +// 0x00000000: push r15 +// 0x00000002: .cfi_def_cfa_offset: 16 +// 0x00000002: .cfi_offset: r15 at cfa-16 +// 0x00000002: push r14 +// 0x00000004: .cfi_def_cfa_offset: 24 +// 0x00000004: .cfi_offset: r14 at cfa-24 +// 0x00000004: push r13 +// 0x00000006: .cfi_def_cfa_offset: 32 +// 0x00000006: .cfi_offset: r13 at cfa-32 +// 0x00000006: push r12 +// 0x00000008: .cfi_def_cfa_offset: 40 +// 0x00000008: .cfi_offset: r12 at cfa-40 +// 0x00000008: push rbp +// 0x00000009: .cfi_def_cfa_offset: 48 +// 0x00000009: .cfi_offset: r6 at cfa-48 +// 0x00000009: push rbx +// 0x0000000a: .cfi_def_cfa_offset: 56 +// 0x0000000a: .cfi_offset: r3 at cfa-56 +// 0x0000000a: subq rsp, 72 +// 0x0000000e: .cfi_def_cfa_offset: 128 +// 0x0000000e: movsd [rsp + 64], xmm15 +// 0x00000015: .cfi_offset: r32 at cfa-64 +// 0x00000015: movsd [rsp + 56], xmm14 +// 0x0000001c: .cfi_offset: r31 at cfa-72 +// 0x0000001c: movsd [rsp + 48], xmm13 +// 0x00000023: .cfi_offset: r30 at cfa-80 +// 0x00000023: movsd [rsp + 40], xmm12 +// 0x0000002a: .cfi_offset: r29 at cfa-88 +// 0x0000002a: mov [rsp], edi +// 0x0000002d: mov [rsp + 132], esi +// 0x00000034: movss [rsp + 136], xmm0 +// 0x0000003d: mov [rsp + 140], edx +// 0x00000044: mov [rsp + 144], ecx +// 0x0000004b: addq rsp, -32 +// 0x0000004f: .cfi_def_cfa_offset: 160 +// 0x0000004f: addq rsp, 32 +// 0x00000053: .cfi_def_cfa_offset: 128 +// 0x00000053: .cfi_remember_state +// 0x00000053: movsd xmm12, [rsp + 40] +// 0x0000005a: .cfi_restore: r29 +// 0x0000005a: movsd xmm13, [rsp + 48] +// 0x00000061: .cfi_restore: r30 +// 0x00000061: movsd xmm14, [rsp + 56] +// 0x00000068: .cfi_restore: r31 +// 0x00000068: movsd xmm15, [rsp + 64] +// 0x0000006f: .cfi_restore: r32 +// 0x0000006f: addq rsp, 72 +// 0x00000073: .cfi_def_cfa_offset: 56 +// 0x00000073: pop rbx +// 0x00000074: .cfi_def_cfa_offset: 48 +// 0x00000074: .cfi_restore: r3 +// 0x00000074: pop rbp +// 0x00000075: .cfi_def_cfa_offset: 40 +// 0x00000075: .cfi_restore: r6 +// 0x00000075: pop r12 +// 0x00000077: .cfi_def_cfa_offset: 32 +// 0x00000077: .cfi_restore: r12 +// 0x00000077: pop r13 +// 0x00000079: .cfi_def_cfa_offset: 24 +// 0x00000079: .cfi_restore: r13 +// 0x00000079: pop r14 +// 0x0000007b: .cfi_def_cfa_offset: 16 +// 0x0000007b: .cfi_restore: r14 +// 0x0000007b: pop r15 +// 0x0000007d: .cfi_def_cfa_offset: 8 +// 0x0000007d: .cfi_restore: r15 +// 0x0000007d: ret +// 0x0000007e: .cfi_restore_state +// 0x0000007e: .cfi_def_cfa_offset: 128 + +static constexpr uint8_t expected_asm_kMips[] = { + 0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB8, 0xAF, + 0x34, 0x00, 0xAF, 0xAF, 0x30, 0x00, 0xAE, 0xAF, 0x2C, 0x00, 0xAD, 0xAF, + 0x28, 0x00, 0xAC, 0xAF, 0x24, 0x00, 0xAB, 0xAF, 0x20, 0x00, 0xAA, 0xAF, + 0x1C, 0x00, 0xA9, 0xAF, 0x18, 0x00, 0xA8, 0xAF, 0x00, 0x00, 0xA4, 0xAF, + 0x44, 0x00, 0xA5, 0xAF, 0x48, 0x00, 0xA6, 0xAF, 0x4C, 0x00, 0xA7, 0xAF, + 0xE0, 0xFF, 0xBD, 0x27, 0x20, 0x00, 0xBD, 0x27, 0x18, 0x00, 0xA8, 0x8F, + 0x1C, 0x00, 0xA9, 0x8F, 0x20, 0x00, 0xAA, 0x8F, 0x24, 0x00, 0xAB, 0x8F, + 0x28, 0x00, 0xAC, 0x8F, 0x2C, 0x00, 0xAD, 0x8F, 0x30, 0x00, 0xAE, 0x8F, + 0x34, 0x00, 0xAF, 0x8F, 0x38, 0x00, 0xB8, 0x8F, 0x3C, 0x00, 0xBF, 0x8F, + 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips[] = { + 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x98, 0x02, 0x44, 0x8F, 0x03, + 0x44, 0x8E, 0x04, 0x44, 0x8D, 0x05, 0x44, 0x8C, 0x06, 0x44, 0x8B, 0x07, + 0x44, 0x8A, 0x08, 0x44, 0x89, 0x09, 0x44, 0x88, 0x0A, 0x54, 0x0E, 0x60, + 0x44, 0x0E, 0x40, 0x0A, 0x44, 0xC8, 0x44, 0xC9, 0x44, 0xCA, 0x44, 0xCB, + 0x44, 0xCC, 0x44, 0xCD, 0x44, 0xCE, 0x44, 0xCF, 0x44, 0xD8, 0x44, 0xDF, + 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: addiu r29, r29, -64 +// 0x00000004: .cfi_def_cfa_offset: 64 +// 0x00000004: sw r31, +60(r29) +// 0x00000008: .cfi_offset: r31 at cfa-4 +// 0x00000008: sw r24, +56(r29) +// 0x0000000c: .cfi_offset: r24 at cfa-8 +// 0x0000000c: sw r15, +52(r29) +// 0x00000010: .cfi_offset: r15 at cfa-12 +// 0x00000010: sw r14, +48(r29) +// 0x00000014: .cfi_offset: r14 at cfa-16 +// 0x00000014: sw r13, +44(r29) +// 0x00000018: .cfi_offset: r13 at cfa-20 +// 0x00000018: sw r12, +40(r29) +// 0x0000001c: .cfi_offset: r12 at cfa-24 +// 0x0000001c: sw r11, +36(r29) +// 0x00000020: .cfi_offset: r11 at cfa-28 +// 0x00000020: sw r10, +32(r29) +// 0x00000024: .cfi_offset: r10 at cfa-32 +// 0x00000024: sw r9, +28(r29) +// 0x00000028: .cfi_offset: r9 at cfa-36 +// 0x00000028: sw r8, +24(r29) +// 0x0000002c: .cfi_offset: r8 at cfa-40 +// 0x0000002c: sw r4, +0(r29) +// 0x00000030: sw r5, +68(r29) +// 0x00000034: sw r6, +72(r29) +// 0x00000038: sw r7, +76(r29) +// 0x0000003c: addiu r29, r29, -32 +// 0x00000040: .cfi_def_cfa_offset: 96 +// 0x00000040: addiu r29, r29, 32 +// 0x00000044: .cfi_def_cfa_offset: 64 +// 0x00000044: .cfi_remember_state +// 0x00000044: lw r8, +24(r29) +// 0x00000048: .cfi_restore: r8 +// 0x00000048: lw r9, +28(r29) +// 0x0000004c: .cfi_restore: r9 +// 0x0000004c: lw r10, +32(r29) +// 0x00000050: .cfi_restore: r10 +// 0x00000050: lw r11, +36(r29) +// 0x00000054: .cfi_restore: r11 +// 0x00000054: lw r12, +40(r29) +// 0x00000058: .cfi_restore: r12 +// 0x00000058: lw r13, +44(r29) +// 0x0000005c: .cfi_restore: r13 +// 0x0000005c: lw r14, +48(r29) +// 0x00000060: .cfi_restore: r14 +// 0x00000060: lw r15, +52(r29) +// 0x00000064: .cfi_restore: r15 +// 0x00000064: lw r24, +56(r29) +// 0x00000068: .cfi_restore: r24 +// 0x00000068: lw r31, +60(r29) +// 0x0000006c: .cfi_restore: r31 +// 0x0000006c: addiu r29, r29, 64 +// 0x00000070: .cfi_def_cfa_offset: 0 +// 0x00000070: jalr r0, r31 +// 0x00000074: nop +// 0x00000078: .cfi_restore_state +// 0x00000078: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips64[] = { + 0xA0, 0xFF, 0xBD, 0x67, 0x58, 0x00, 0xBF, 0xFF, 0x50, 0x00, 0xBE, 0xFF, + 0x48, 0x00, 0xBC, 0xFF, 0x40, 0x00, 0xB7, 0xFF, 0x38, 0x00, 0xB6, 0xFF, + 0x30, 0x00, 0xB5, 0xFF, 0x28, 0x00, 0xB4, 0xFF, 0x20, 0x00, 0xB3, 0xFF, + 0x18, 0x00, 0xB2, 0xFF, 0x00, 0x00, 0xA4, 0xAF, 0x64, 0x00, 0xA5, 0xAF, + 0x68, 0x00, 0xAE, 0xE7, 0x6C, 0x00, 0xA7, 0xAF, 0x70, 0x00, 0xA8, 0xAF, + 0xE0, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBD, 0x67, 0x18, 0x00, 0xB2, 0xDF, + 0x20, 0x00, 0xB3, 0xDF, 0x28, 0x00, 0xB4, 0xDF, 0x30, 0x00, 0xB5, 0xDF, + 0x38, 0x00, 0xB6, 0xDF, 0x40, 0x00, 0xB7, 0xDF, 0x48, 0x00, 0xBC, 0xDF, + 0x50, 0x00, 0xBE, 0xDF, 0x58, 0x00, 0xBF, 0xDF, 0x60, 0x00, 0xBD, 0x67, + 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips64[] = { + 0x44, 0x0E, 0x60, 0x44, 0x9F, 0x02, 0x44, 0x9E, 0x04, 0x44, 0x9C, 0x06, + 0x44, 0x97, 0x08, 0x44, 0x96, 0x0A, 0x44, 0x95, 0x0C, 0x44, 0x94, 0x0E, + 0x44, 0x93, 0x10, 0x44, 0x92, 0x12, 0x58, 0x0E, 0x80, 0x01, 0x44, 0x0E, + 0x60, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xD4, 0x44, 0xD5, 0x44, 0xD6, + 0x44, 0xD7, 0x44, 0xDC, 0x44, 0xDE, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, + 0x0B, 0x0E, 0x60, +}; +// 0x00000000: daddiu r29, r29, -96 +// 0x00000004: .cfi_def_cfa_offset: 96 +// 0x00000004: sd r31, +88(r29) +// 0x00000008: .cfi_offset: r31 at cfa-8 +// 0x00000008: sd r30, +80(r29) +// 0x0000000c: .cfi_offset: r30 at cfa-16 +// 0x0000000c: sd r28, +72(r29) +// 0x00000010: .cfi_offset: r28 at cfa-24 +// 0x00000010: sd r23, +64(r29) +// 0x00000014: .cfi_offset: r23 at cfa-32 +// 0x00000014: sd r22, +56(r29) +// 0x00000018: .cfi_offset: r22 at cfa-40 +// 0x00000018: sd r21, +48(r29) +// 0x0000001c: .cfi_offset: r21 at cfa-48 +// 0x0000001c: sd r20, +40(r29) +// 0x00000020: .cfi_offset: r20 at cfa-56 +// 0x00000020: sd r19, +32(r29) +// 0x00000024: .cfi_offset: r19 at cfa-64 +// 0x00000024: sd r18, +24(r29) +// 0x00000028: .cfi_offset: r18 at cfa-72 +// 0x00000028: sw r4, +0(r29) +// 0x0000002c: sw r5, +100(r29) +// 0x00000030: swc1 f14, +104(r29) +// 0x00000034: sw r7, +108(r29) +// 0x00000038: sw r8, +112(r29) +// 0x0000003c: daddiu r29, r29, -32 +// 0x00000040: .cfi_def_cfa_offset: 128 +// 0x00000040: daddiu r29, r29, 32 +// 0x00000044: .cfi_def_cfa_offset: 96 +// 0x00000044: .cfi_remember_state +// 0x00000044: ld r18, +24(r29) +// 0x00000048: .cfi_restore: r18 +// 0x00000048: ld r19, +32(r29) +// 0x0000004c: .cfi_restore: r19 +// 0x0000004c: ld r20, +40(r29) +// 0x00000050: .cfi_restore: r20 +// 0x00000050: ld r21, +48(r29) +// 0x00000054: .cfi_restore: r21 +// 0x00000054: ld r22, +56(r29) +// 0x00000058: .cfi_restore: r22 +// 0x00000058: ld r23, +64(r29) +// 0x0000005c: .cfi_restore: r23 +// 0x0000005c: ld r28, +72(r29) +// 0x00000060: .cfi_restore: r28 +// 0x00000060: ld r30, +80(r29) +// 0x00000064: .cfi_restore: r30 +// 0x00000064: ld r31, +88(r29) +// 0x00000068: .cfi_restore: r31 +// 0x00000068: daddiu r29, r29, 96 +// 0x0000006c: .cfi_def_cfa_offset: 0 +// 0x0000006c: jr r31 +// 0x00000070: nop +// 0x00000074: .cfi_restore_state +// 0x00000074: .cfi_def_cfa_offset: 96 + diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index 45e2fd0ffe..8a14038074 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -28,6 +28,7 @@ #include "compiled_method.h" #include "dex_file-inl.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "entrypoints/quick/quick_entrypoints.h" #include "jni_env_ext.h" #include "mirror/art_method.h" @@ -93,6 +94,7 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, // Assembler that holds generated instructions std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set)); + jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GetIncludeDebugSymbols()); // Offsets into data structures // TODO: if cross compiling these offsets are for the host not the target @@ -104,6 +106,7 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, const size_t frame_size(main_jni_conv->FrameSize()); const std::vector<ManagedRegister>& callee_save_regs = main_jni_conv->CalleeSaveRegisters(); __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills()); + DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size)); // 2. Set up the HandleScope mr_conv->ResetIterator(FrameOffset(frame_size)); @@ -423,7 +426,9 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, // 16. Remove activation - need to restore callee save registers since the GC may have changed // them. + DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size)); __ RemoveFrame(frame_size, callee_save_regs); + DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size)); // 17. Finalize code generation __ EmitSlowPaths(); @@ -432,13 +437,18 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, MemoryRegion code(&managed_code[0], managed_code.size()); __ FinalizeInstructions(code); - return CompiledMethod::SwapAllocCompiledMethodCFI(driver, - instruction_set, - ArrayRef<const uint8_t>(managed_code), - frame_size, - main_jni_conv->CoreSpillMask(), - main_jni_conv->FpSpillMask(), - ArrayRef<const uint8_t>()); + return CompiledMethod::SwapAllocCompiledMethod(driver, + instruction_set, + ArrayRef<const uint8_t>(managed_code), + frame_size, + main_jni_conv->CoreSpillMask(), + main_jni_conv->FpSpillMask(), + nullptr, // src_mapping_table. + ArrayRef<const uint8_t>(), // mapping_table. + ArrayRef<const uint8_t>(), // vmap_table. + ArrayRef<const uint8_t>(), // native_gc_map. + ArrayRef<const uint8_t>(*jni_asm->cfi().data()), + ArrayRef<const LinkerPatch>()); } // Copy a single parameter from the managed to the JNI calling convention diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index 1cbe481321..72ddf07089 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -258,12 +258,36 @@ bool Arm64RelativePatcher::NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, if ((patch_offset & 0xff8) == 0xff8) { // ...ff8 or ...ffc uint32_t adrp = GetInsn(code, literal_offset); DCHECK_EQ(adrp & 0xff000000, 0x90000000); - // TODO: Improve the check. For now, we're just checking if the next insn is - // the LDR using the result of the ADRP, otherwise we implement the workaround. + uint32_t next_offset = patch_offset + 4u; uint32_t next_insn = GetInsn(code, literal_offset + 4u); - bool ok = (next_insn & 0xffc00000) == 0xb9400000 && // LDR <Wt>, [<Xn>, #pimm] - (((next_insn >> 5) ^ adrp) & 0x1f) == 0; // <Xn> == ADRP destination reg - return !ok; + + // Below we avoid patching sequences where the adrp is followed by a load which can easily + // be proved to be aligned. + + // First check if the next insn is the LDR using the result of the ADRP. + // LDR <Wt>, [<Xn>, #pimm], where <Xn> == ADRP destination reg. + if ((next_insn & 0xffc00000) == 0xb9400000 && + (((next_insn >> 5) ^ adrp) & 0x1f) == 0) { + return false; + } + + // LDR <Wt>, <label> is always aligned and thus it doesn't cause boundary crossing. + if ((next_insn & 0xff000000) == 0x18000000) { + return false; + } + + // LDR <Xt>, <label> is aligned iff the pc + displacement is a multiple of 8. + if ((next_insn & 0xff000000) == 0x58000000) { + bool is_aligned_load = (((next_offset >> 2) ^ (next_insn >> 5)) & 1) == 0; + return !is_aligned_load; + } + + // LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned loads, as SP is + // guaranteed to be 128-bits aligned and <pimm> is multiple of the load size. + if ((next_insn & 0xbfc003e0) == 0xb94003e0) { + return false; + } + return true; } return false; } diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc index b36e6d0b07..21f93672ad 100644 --- a/compiler/linker/arm64/relative_patcher_arm64_test.cc +++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc @@ -42,6 +42,15 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { // LDUR x2, [sp, #4], i.e. unaligned load crossing 64-bit boundary (assuming aligned sp). static constexpr uint32_t kLdurInsn = 0xf840405fu; + // LDR w12, <label> and LDR x12, <label>. Bits 5-23 contain label displacement in 4-byte units. + static constexpr uint32_t kLdrWPcRelInsn = 0x1800000cu; + static constexpr uint32_t kLdrXPcRelInsn = 0x5800000cu; + + // LDR w13, [SP, #<pimm>] and LDR x13, [SP, #<pimm>]. Bits 10-21 contain displacement from SP + // in units of 4-bytes (for 32-bit load) or 8-bytes (for 64-bit load). + static constexpr uint32_t kLdrWSpRelInsn = 0xb94003edu; + static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu; + uint32_t Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code, const ArrayRef<const LinkerPatch>& method1_patches, const ArrayRef<const uint8_t>& last_method_code, @@ -260,20 +269,43 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { } } - void TestAdrpLdurLdr(uint32_t adrp_offset, bool has_thunk, - uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + void TestAdrpInsn2Ldr(uint32_t insn2, uint32_t adrp_offset, bool has_thunk, + uint32_t dex_cache_arrays_begin, uint32_t element_offset) { uint32_t method1_offset = CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader); ASSERT_LT(method1_offset, adrp_offset); ASSERT_EQ(adrp_offset & 3u, 0u); uint32_t num_nops = (adrp_offset - method1_offset) / 4u; if (has_thunk) { - TestNopsAdrpInsn2LdrHasThunk(num_nops, kLdurInsn, dex_cache_arrays_begin, element_offset); + TestNopsAdrpInsn2LdrHasThunk(num_nops, insn2, dex_cache_arrays_begin, element_offset); } else { - TestNopsAdrpInsn2Ldr(num_nops, kLdurInsn, dex_cache_arrays_begin, element_offset); + TestNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset); } ASSERT_EQ(method1_offset, GetMethodOffset(1u)); // If this fails, num_nops is wrong. } + + void TestAdrpLdurLdr(uint32_t adrp_offset, bool has_thunk, + uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset); + } + + void TestAdrpLdrPcRelLdr(uint32_t pcrel_ldr_insn, int32_t pcrel_disp, + uint32_t adrp_offset, bool has_thunk, + uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + ASSERT_LT(pcrel_disp, 0x100000); + ASSERT_GE(pcrel_disp, -0x100000); + ASSERT_EQ(pcrel_disp & 0x3, 0); + uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5); + TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset); + } + + void TestAdrpLdrSpRelLdr(uint32_t sprel_ldr_insn, uint32_t sprel_disp_in_load_units, + uint32_t adrp_offset, bool has_thunk, + uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + ASSERT_LT(sprel_disp_in_load_units, 0x1000u); + uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10); + TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset); + } }; const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = { @@ -509,5 +541,42 @@ TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0x1000) { TestAdrpLdurLdr(0x1000u, false, 0x12345678u, 0x1234u); } +#define TEST_FOR_OFFSETS(test, disp1, disp2) \ + test(0xff4u, disp1) test(0xff8u, disp1) test(0xffcu, disp1) test(0x1000u, disp1) \ + test(0xff4u, disp2) test(0xff8u, disp2) test(0xffcu, disp2) test(0x1000u, disp2) + +// LDR <Wt>, <label> is always aligned. We should never have to use a fixup. +#define LDRW_PCREL_TEST(adrp_offset, disp) \ + TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WPcRel ## disp) { \ + TestAdrpLdrPcRelLdr(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u, 0x1234u); \ + } + +TEST_FOR_OFFSETS(LDRW_PCREL_TEST, 0x1234, 0x1238) + +// LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8. +#define LDRX_PCREL_TEST(adrp_offset, disp) \ + TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XPcRel ## disp) { \ + bool unaligned = ((adrp_offset + 4u + static_cast<uint32_t>(disp)) & 7u) != 0; \ + bool has_thunk = (adrp_offset == 0xff8u || adrp_offset == 0xffcu) && unaligned; \ + TestAdrpLdrPcRelLdr(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u, 0x1234u); \ + } + +TEST_FOR_OFFSETS(LDRX_PCREL_TEST, 0x1234, 0x1238) + +// LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed. +#define LDRW_SPREL_TEST(adrp_offset, disp) \ + TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WSpRel ## disp) { \ + TestAdrpLdrSpRelLdr(kLdrWSpRelInsn, disp >> 2, adrp_offset, false, 0x12345678u, 0x1234u); \ + } + +TEST_FOR_OFFSETS(LDRW_SPREL_TEST, 0, 4) + +#define LDRX_SPREL_TEST(adrp_offset, disp) \ + TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XSpRel ## disp) { \ + TestAdrpLdrSpRelLdr(kLdrXSpRelInsn, disp >> 3, adrp_offset, false, 0x12345678u, 0x1234u); \ + } + +TEST_FOR_OFFSETS(LDRX_SPREL_TEST, 0, 8) + } // namespace linker } // namespace art diff --git a/compiler/linker/x86/relative_patcher_x86.cc b/compiler/linker/x86/relative_patcher_x86.cc index 246cf11dae..315585d9e7 100644 --- a/compiler/linker/x86/relative_patcher_x86.cc +++ b/compiler/linker/x86/relative_patcher_x86.cc @@ -16,14 +16,43 @@ #include "linker/x86/relative_patcher_x86.h" +#include "compiled_method.h" + namespace art { namespace linker { -void X86RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED, - uint32_t target_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unexpected relative dex cache array patch."; +void X86RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) { + uint32_t anchor_literal_offset = patch.PcInsnOffset(); + uint32_t literal_offset = patch.LiteralOffset(); + + // Check that the anchor points to pop in a "call +0; pop <reg>" sequence. + DCHECK_GE(anchor_literal_offset, 5u); + DCHECK_LT(anchor_literal_offset, code->size()); + DCHECK_EQ((*code)[anchor_literal_offset - 5u], 0xe8u); + DCHECK_EQ((*code)[anchor_literal_offset - 4u], 0x00u); + DCHECK_EQ((*code)[anchor_literal_offset - 3u], 0x00u); + DCHECK_EQ((*code)[anchor_literal_offset - 2u], 0x00u); + DCHECK_EQ((*code)[anchor_literal_offset - 1u], 0x00u); + DCHECK_EQ((*code)[anchor_literal_offset] & 0xf8u, 0x58u); + + // Check that the patched data contains kDummy32BitOffset. + constexpr int kDummy32BitOffset = 256; // Must match X86Mir2Lir::kDummy32BitOffset. + DCHECK_LE(literal_offset, code->size()); + DCHECK_EQ((*code)[literal_offset + 0u], static_cast<uint8_t>(kDummy32BitOffset >> 0)); + DCHECK_EQ((*code)[literal_offset + 1u], static_cast<uint8_t>(kDummy32BitOffset >> 8)); + DCHECK_EQ((*code)[literal_offset + 2u], static_cast<uint8_t>(kDummy32BitOffset >> 16)); + DCHECK_EQ((*code)[literal_offset + 3u], static_cast<uint8_t>(kDummy32BitOffset >> 24)); + + // Apply patch. + uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset; + uint32_t diff = target_offset - anchor_offset; + (*code)[literal_offset + 0u] = static_cast<uint8_t>(diff >> 0); + (*code)[literal_offset + 1u] = static_cast<uint8_t>(diff >> 8); + (*code)[literal_offset + 2u] = static_cast<uint8_t>(diff >> 16); + (*code)[literal_offset + 3u] = static_cast<uint8_t>(diff >> 24); } } // namespace linker diff --git a/compiler/linker/x86/relative_patcher_x86_test.cc b/compiler/linker/x86/relative_patcher_x86_test.cc index 15ac47e7a4..7acc33004a 100644 --- a/compiler/linker/x86/relative_patcher_x86_test.cc +++ b/compiler/linker/x86/relative_patcher_x86_test.cc @@ -101,5 +101,35 @@ TEST_F(X86RelativePatcherTest, CallTrampoline) { EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); } +TEST_F(X86RelativePatcherTest, DexCacheReference) { + dex_cache_arrays_begin_ = 0x12345678; + constexpr size_t kElementOffset = 0x1234; + static const uint8_t raw_code[] = { + 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0 + 0x5b, // pop ebx + 0x8b, 0x83, 0x00, 0x01, 0x00, 0x00, // mov eax, [ebx + 256 (kDummy32BitValue)] + }; + constexpr uint32_t anchor_offset = 5u; // After call +0. + ArrayRef<const uint8_t> code(raw_code); + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(code.size() - 4u, nullptr, anchor_offset, kElementOffset), + }; + AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); + Link(); + + auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); + ASSERT_TRUE(result.first); + uint32_t diff = + dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset); + static const uint8_t expected_code[] = { + 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0 + 0x5b, // pop ebx + 0x8b, 0x83, // mov eax, [ebx + diff] + static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), + static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + } // namespace linker } // namespace art diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 7120920773..5b4cc54858 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -450,24 +450,18 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { if (writer_->compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) { // Record debug information for this function if we are doing that. - - std::string name = PrettyMethod(it.GetMemberIndex(), *dex_file_, true); - if (deduped) { - // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol - // so that it will show up in a debuggerd crash report. - name += " [ DEDUPED ]"; - } - const uint32_t quick_code_start = quick_code_offset - writer_->oat_header_->GetExecutableOffset() - thumb_offset; - const DexFile::CodeItem *code_item = it.GetMethodCodeItem(); - const DexFile::ClassDef& class_def = dex_file_->GetClassDef(class_def_index_); - writer_->method_info_.push_back(DebugInfo(name, deduped, - dex_file_->GetClassDescriptor(class_def), - dex_file_->GetSourceFile(class_def), - quick_code_start, quick_code_start + code_size, - code_item == nullptr ? nullptr : dex_file_->GetDebugInfoStream(code_item), - compiled_method)); + writer_->method_info_.push_back(DebugInfo { + dex_file_, + class_def_index_, + it.GetMemberIndex(), + it.GetMethodAccessFlags(), + it.GetMethodCodeItem(), + deduped, + quick_code_start, + quick_code_start + code_size, + compiled_method}); } if (kIsDebugBuild) { diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index c472000f37..51bc9b4483 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -115,26 +115,18 @@ class OatWriter { ~OatWriter(); struct DebugInfo { - DebugInfo(const std::string& method_name, bool deduped, - const char* class_descriptor, const char* src_file_name, - uint32_t low_pc, uint32_t high_pc, - const uint8_t* dbgstream, CompiledMethod* compiled_method) - : method_name_(method_name), deduped_(deduped), - class_descriptor_(class_descriptor), src_file_name_(src_file_name), - low_pc_(low_pc), high_pc_(high_pc), - dbgstream_(dbgstream), compiled_method_(compiled_method) { - } - std::string method_name_; // Note: this name is a pretty-printed name. - bool deduped_; - const char* class_descriptor_; - const char* src_file_name_; - uint32_t low_pc_; - uint32_t high_pc_; - const uint8_t* dbgstream_; + const DexFile* dex_file_; + size_t class_def_index_; + uint32_t dex_method_index_; + uint32_t access_flags_; + const DexFile::CodeItem *code_item_; + bool deduped_; + uint32_t low_pc_; + uint32_t high_pc_; CompiledMethod* compiled_method_; }; - const std::vector<DebugInfo>& GetCFIMethodInfo() const { + const std::vector<DebugInfo>& GetMethodDebugInfo() const { return method_info_; } diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index da28dc7ecb..8736374306 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -82,6 +82,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) HGraphVisitor* instruction_visitor = GetInstructionVisitor(); DCHECK_EQ(current_block_index_, 0u); GenerateFrameEntry(); + DCHECK_EQ(GetAssembler()->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size_)); for (size_t e = block_order_->Size(); current_block_index_ < e; ++current_block_index_) { HBasicBlock* block = block_order_->Get(current_block_index_); // Don't generate code for an empty block. Its predecessors will branch to its successor @@ -415,7 +416,16 @@ void CodeGenerator::BuildNativeGCMap( } } -void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap* src_map) const { +void CodeGenerator::BuildSourceMap(DefaultSrcMap* src_map) const { + for (size_t i = 0; i < pc_infos_.Size(); i++) { + struct PcInfo pc_info = pc_infos_.Get(i); + uint32_t pc2dex_offset = pc_info.native_pc; + int32_t pc2dex_dalvik_offset = pc_info.dex_pc; + src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset})); + } +} + +void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { uint32_t pc2dex_data_size = 0u; uint32_t pc2dex_entries = pc_infos_.Size(); uint32_t pc2dex_offset = 0u; @@ -425,19 +435,12 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap* uint32_t dex2pc_offset = 0u; int32_t dex2pc_dalvik_offset = 0; - if (src_map != nullptr) { - src_map->reserve(pc2dex_entries); - } - for (size_t i = 0; i < pc2dex_entries; i++) { struct PcInfo pc_info = pc_infos_.Get(i); pc2dex_data_size += UnsignedLeb128Size(pc_info.native_pc - pc2dex_offset); pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset); pc2dex_offset = pc_info.native_pc; pc2dex_dalvik_offset = pc_info.dex_pc; - if (src_map != nullptr) { - src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset})); - } } // Walk over the blocks and find which ones correspond to catch block entries. diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 07ca6b1ccf..b888aca264 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -205,7 +205,8 @@ class CodeGenerator { slow_paths_.Add(slow_path); } - void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const; + void BuildSourceMap(DefaultSrcMap* src_map) const; + void BuildMappingTable(std::vector<uint8_t>* vector) const; void BuildVMapTable(std::vector<uint8_t>* vector) const; void BuildNativeGCMap( std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; @@ -425,6 +426,8 @@ class CodeGenerator { StackMapStream stack_map_stream_; + friend class OptimizingCFITest; + DISALLOW_COPY_AND_ASSIGN(CodeGenerator); }; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index cfc798a34e..a799a519c0 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -513,6 +513,14 @@ void CodeGeneratorARM::ComputeSpillMask() { } } +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::ArmCore(static_cast<int>(reg)); +} + +static dwarf::Reg DWARFReg(SRegister reg) { + return dwarf::Reg::ArmFp(static_cast<int>(reg)); +} + void CodeGeneratorARM::GenerateFrameEntry() { bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); @@ -531,12 +539,19 @@ void CodeGeneratorARM::GenerateFrameEntry() { // PC is in the list of callee-save to mimic Quick, but we need to push // LR at entry instead. - __ PushList((core_spill_mask_ & (~(1 << PC))) | 1 << LR); + uint32_t push_mask = (core_spill_mask_ & (~(1 << PC))) | 1 << LR; + __ PushList(push_mask); + __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(push_mask)); + __ cfi().RelOffsetForMany(DWARFReg(Register(0)), 0, push_mask, kArmWordSize); if (fpu_spill_mask_ != 0) { SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_)); __ vpushs(start_register, POPCOUNT(fpu_spill_mask_)); + __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_)); + __ cfi().RelOffsetForMany(DWARFReg(SRegister(0)), 0, fpu_spill_mask_, kArmWordSize); } - __ AddConstant(SP, -(GetFrameSize() - FrameEntrySpillSize())); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ AddConstant(SP, -adjust); + __ cfi().AdjustCFAOffset(adjust); __ StoreToOffset(kStoreWord, R0, SP, 0); } @@ -545,10 +560,14 @@ void CodeGeneratorARM::GenerateFrameExit() { __ bx(LR); return; } - __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize()); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ AddConstant(SP, adjust); + __ cfi().AdjustCFAOffset(-adjust); if (fpu_spill_mask_ != 0) { SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_)); __ vpops(start_register, POPCOUNT(fpu_spill_mask_)); + __ cfi().AdjustCFAOffset(-kArmPointerSize * POPCOUNT(fpu_spill_mask_)); + __ cfi().RestoreMany(DWARFReg(SRegister(0)), fpu_spill_mask_); } __ PopList(core_spill_mask_); } @@ -1190,7 +1209,10 @@ void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) { void InstructionCodeGeneratorARM::VisitReturnVoid(HReturnVoid* ret) { UNUSED(ret); + __ cfi().RememberState(); codegen_->GenerateFrameExit(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderARM::VisitReturn(HReturn* ret) { @@ -1201,7 +1223,10 @@ void LocationsBuilderARM::VisitReturn(HReturn* ret) { void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) { UNUSED(ret); + __ cfi().RememberState(); codegen_->GenerateFrameExit(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 439e85ca6c..5fe8adc86a 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -465,20 +465,67 @@ void CodeGeneratorARM64::GenerateFrameEntry() { // ... : reserved frame space. // sp[0] : current method. __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); - __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); - __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + GetAssembler()->cfi().AdjustCFAOffset(frame_size); + SpillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + SpillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); } } void CodeGeneratorARM64::GenerateFrameExit() { if (!HasEmptyFrame()) { int frame_size = GetFrameSize(); - __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); - __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + UnspillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + UnspillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); __ Drop(frame_size); + GetAssembler()->cfi().AdjustCFAOffset(-frame_size); } } +static inline dwarf::Reg DWARFReg(CPURegister reg) { + if (reg.IsFPRegister()) { + return dwarf::Reg::Arm64Fp(reg.code()); + } else { + DCHECK_LT(reg.code(), 31u); // X0 - X30. + return dwarf::Reg::Arm64Core(reg.code()); + } +} + +void CodeGeneratorARM64::SpillRegisters(vixl::CPURegList registers, int offset) { + int size = registers.RegisterSizeInBytes(); + while (registers.Count() >= 2) { + const CPURegister& dst0 = registers.PopLowestIndex(); + const CPURegister& dst1 = registers.PopLowestIndex(); + __ Stp(dst0, dst1, MemOperand(__ StackPointer(), offset)); + GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset); + GetAssembler()->cfi().RelOffset(DWARFReg(dst1), offset + size); + offset += 2 * size; + } + if (!registers.IsEmpty()) { + const CPURegister& dst0 = registers.PopLowestIndex(); + __ Str(dst0, MemOperand(__ StackPointer(), offset)); + GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset); + } + DCHECK(registers.IsEmpty()); +} + +void CodeGeneratorARM64::UnspillRegisters(vixl::CPURegList registers, int offset) { + int size = registers.RegisterSizeInBytes(); + while (registers.Count() >= 2) { + const CPURegister& dst0 = registers.PopLowestIndex(); + const CPURegister& dst1 = registers.PopLowestIndex(); + __ Ldp(dst0, dst1, MemOperand(__ StackPointer(), offset)); + GetAssembler()->cfi().Restore(DWARFReg(dst0)); + GetAssembler()->cfi().Restore(DWARFReg(dst1)); + offset += 2 * size; + } + if (!registers.IsEmpty()) { + const CPURegister& dst0 = registers.PopLowestIndex(); + __ Ldr(dst0, MemOperand(__ StackPointer(), offset)); + GetAssembler()->cfi().Restore(DWARFReg(dst0)); + } + DCHECK(registers.IsEmpty()); +} + void CodeGeneratorARM64::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); } @@ -1659,11 +1706,26 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct Register lhs = InputRegisterAt(condition, 0); Operand rhs = InputOperandAt(condition, 1); Condition arm64_cond = ARM64Condition(condition->GetCondition()); - if ((arm64_cond == eq || arm64_cond == ne) && rhs.IsImmediate() && (rhs.immediate() == 0)) { - if (arm64_cond == eq) { - __ Cbz(lhs, true_target); - } else { - __ Cbnz(lhs, true_target); + if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) { + switch (arm64_cond) { + case eq: + __ Cbz(lhs, true_target); + break; + case ne: + __ Cbnz(lhs, true_target); + break; + case lt: + // Test the sign bit and branch accordingly. + __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); + break; + case ge: + // Test the sign bit and branch accordingly. + __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); + break; + default: + // Without the `static_cast` the compiler throws an error for + // `-Werror=sign-promo`. + LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond); } } else { __ Cmp(lhs, rhs); @@ -2403,8 +2465,11 @@ void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction) { UNUSED(instruction); + GetAssembler()->cfi().RememberState(); codegen_->GenerateFrameExit(); __ Ret(); + GetAssembler()->cfi().RestoreState(); + GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) { @@ -2413,8 +2478,11 @@ void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) { void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction) { UNUSED(instruction); + GetAssembler()->cfi().RememberState(); codegen_->GenerateFrameExit(); __ Ret(); + GetAssembler()->cfi().RestoreState(); + GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderARM64::VisitShl(HShl* shl) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 7edb129880..9430e31037 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -227,6 +227,8 @@ class CodeGeneratorARM64 : public CodeGenerator { void GenerateFrameEntry() OVERRIDE; void GenerateFrameExit() OVERRIDE; + void SpillRegisters(vixl::CPURegList registers, int offset); + void UnspillRegisters(vixl::CPURegList registers, int offset); vixl::CPURegList GetFramePreservedCoreRegisters() const { return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc new file mode 100644 index 0000000000..921c1d86c2 --- /dev/null +++ b/compiler/optimizing/code_generator_utils.cc @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_utils.h" + +#include "base/logging.h" + +namespace art { + +void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, + int64_t* magic, int* shift) { + // It does not make sense to calculate magic and shift for zero divisor. + DCHECK_NE(divisor, 0); + + /* Implementation according to H.S.Warren's "Hacker's Delight" (Addison Wesley, 2002) + * Chapter 10 and T.Grablund, P.L.Montogomery's "Division by Invariant Integers Using + * Multiplication" (PLDI 1994). + * The magic number M and shift S can be calculated in the following way: + * Let nc be the most positive value of numerator(n) such that nc = kd - 1, + * where divisor(d) >= 2. + * Let nc be the most negative value of numerator(n) such that nc = kd + 1, + * where divisor(d) <= -2. + * Thus nc can be calculated like: + * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long + * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long + * + * So the shift p is the smallest p satisfying + * 2^p > nc * (d - 2^p % d), where d >= 2 + * 2^p > nc * (d + 2^p % d), where d <= -2. + * + * The magic number M is calculated by + * M = (2^p + d - 2^p % d) / d, where d >= 2 + * M = (2^p - d - 2^p % d) / d, where d <= -2. + * + * Notice that p is always bigger than or equal to 32 (resp. 64), so we just return 32 - p + * (resp. 64 - p) as the shift number S. + */ + + int64_t p = is_long ? 63 : 31; + const uint64_t exp = is_long ? (UINT64_C(1) << 63) : (UINT32_C(1) << 31); + + // Initialize the computations. + uint64_t abs_d = (divisor >= 0) ? divisor : -divisor; + uint64_t sign_bit = is_long ? static_cast<uint64_t>(divisor) >> 63 : + static_cast<uint32_t>(divisor) >> 31; + uint64_t tmp = exp + sign_bit; + uint64_t abs_nc = tmp - 1 - (tmp % abs_d); + uint64_t quotient1 = exp / abs_nc; + uint64_t remainder1 = exp % abs_nc; + uint64_t quotient2 = exp / abs_d; + uint64_t remainder2 = exp % abs_d; + + /* + * To avoid handling both positive and negative divisor, "Hacker's Delight" + * introduces a method to handle these 2 cases together to avoid duplication. + */ + uint64_t delta; + do { + p++; + quotient1 = 2 * quotient1; + remainder1 = 2 * remainder1; + if (remainder1 >= abs_nc) { + quotient1++; + remainder1 = remainder1 - abs_nc; + } + quotient2 = 2 * quotient2; + remainder2 = 2 * remainder2; + if (remainder2 >= abs_d) { + quotient2++; + remainder2 = remainder2 - abs_d; + } + delta = abs_d - remainder2; + } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0)); + + *magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1); + + if (!is_long) { + *magic = static_cast<int>(*magic); + } + + *shift = is_long ? p - 64 : p - 32; +} + +} // namespace art diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h new file mode 100644 index 0000000000..59b495c2c9 --- /dev/null +++ b/compiler/optimizing/code_generator_utils.h @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_ +#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_ + +#include <cstdint> + +namespace art { + +// Computes the magic number and the shift needed in the div/rem by constant algorithm, as out +// arguments `magic` and `shift` +void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift); + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_ diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 92b62e2c84..a6fb07fa98 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -16,6 +16,7 @@ #include "code_generator_x86.h" +#include "code_generator_utils.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" @@ -459,7 +460,12 @@ InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGene assembler_(codegen->GetAssembler()), codegen_(codegen) {} +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::X86Core(static_cast<int>(reg)); +} + void CodeGeneratorX86::GenerateFrameEntry() { + __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address __ Bind(&frame_entry_label_); bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); @@ -478,10 +484,14 @@ void CodeGeneratorX86::GenerateFrameEntry() { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { __ pushl(reg); + __ cfi().AdjustCFAOffset(kX86WordSize); + __ cfi().RelOffset(DWARFReg(reg), 0); } } - __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ subl(ESP, Immediate(adjust)); + __ cfi().AdjustCFAOffset(adjust); __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); } @@ -490,12 +500,16 @@ void CodeGeneratorX86::GenerateFrameExit() { return; } - __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ addl(ESP, Immediate(adjust)); + __ cfi().AdjustCFAOffset(-adjust); for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { __ popl(reg); + __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize)); + __ cfi().Restore(DWARFReg(reg)); } } } @@ -1102,8 +1116,11 @@ void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) { void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret) { UNUSED(ret); + __ cfi().RememberState(); codegen_->GenerateFrameExit(); __ ret(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderX86::VisitReturn(HReturn* ret) { @@ -1161,8 +1178,11 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType(); } } + __ cfi().RememberState(); codegen_->GenerateFrameExit(); __ ret(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { @@ -2278,6 +2298,133 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { __ addl(ESP, Immediate(2 * elem_size)); } + +void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(1).IsConstant()); + DCHECK(locations->InAt(1).GetConstant()->IsIntConstant()); + + Register out_register = locations->Out().AsRegister<Register>(); + Register input_register = locations->InAt(0).AsRegister<Register>(); + int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + + DCHECK(imm == 1 || imm == -1); + + if (instruction->IsRem()) { + __ xorl(out_register, out_register); + } else { + __ movl(out_register, input_register); + if (imm == -1) { + __ negl(out_register); + } + } +} + + +void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) { + LocationSummary* locations = instruction->GetLocations(); + + Register out_register = locations->Out().AsRegister<Register>(); + Register input_register = locations->InAt(0).AsRegister<Register>(); + int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + + DCHECK(IsPowerOfTwo(std::abs(imm))); + Register num = locations->GetTemp(0).AsRegister<Register>(); + + __ leal(num, Address(input_register, std::abs(imm) - 1)); + __ testl(input_register, input_register); + __ cmovl(kGreaterEqual, num, input_register); + int shift = CTZ(imm); + __ sarl(num, Immediate(shift)); + + if (imm < 0) { + __ negl(num); + } + + __ movl(out_register, num); +} + +void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + + Register eax = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + Register num; + Register edx; + + if (instruction->IsDiv()) { + edx = locations->GetTemp(0).AsRegister<Register>(); + num = locations->GetTemp(1).AsRegister<Register>(); + } else { + edx = locations->Out().AsRegister<Register>(); + num = locations->GetTemp(0).AsRegister<Register>(); + } + + DCHECK_EQ(EAX, eax); + DCHECK_EQ(EDX, edx); + if (instruction->IsDiv()) { + DCHECK_EQ(EAX, out); + } else { + DCHECK_EQ(EDX, out); + } + + int64_t magic; + int shift; + CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + + Label ndiv; + Label end; + // If numerator is 0, the result is 0, no computation needed. + __ testl(eax, eax); + __ j(kNotEqual, &ndiv); + + __ xorl(out, out); + __ jmp(&end); + + __ Bind(&ndiv); + + // Save the numerator. + __ movl(num, eax); + + // EAX = magic + __ movl(eax, Immediate(magic)); + + // EDX:EAX = magic * numerator + __ imull(num); + + if (imm > 0 && magic < 0) { + // EDX += num + __ addl(edx, num); + } else if (imm < 0 && magic > 0) { + __ subl(edx, num); + } + + // Shift if needed. + if (shift != 0) { + __ sarl(edx, Immediate(shift)); + } + + // EDX += 1 if EDX < 0 + __ movl(eax, edx); + __ shrl(edx, Immediate(31)); + __ addl(edx, eax); + + if (instruction->IsRem()) { + __ movl(eax, num); + __ imull(edx, Immediate(imm)); + __ subl(eax, edx); + __ movl(edx, eax); + } else { + __ movl(eax, edx); + } + __ Bind(&end); +} + void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); @@ -2289,28 +2436,42 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr switch (instruction->GetResultType()) { case Primitive::kPrimInt: { - Register second_reg = second.AsRegister<Register>(); DCHECK_EQ(EAX, first.AsRegister<Register>()); DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>()); - SlowPathCodeX86* slow_path = - new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.AsRegister<Register>(), - is_div); - codegen_->AddSlowPath(slow_path); + if (instruction->InputAt(1)->IsIntConstant()) { + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); - // 0x80000000/-1 triggers an arithmetic exception! - // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so - // it's safe to just use negl instead of more complex comparisons. + if (imm == 0) { + // Do not generate anything for 0. DivZeroCheck would forbid any generated code. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (is_div && IsPowerOfTwo(std::abs(imm))) { + DivByPowerOfTwo(instruction->AsDiv()); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } + } else { + SlowPathCodeX86* slow_path = + new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.AsRegister<Register>(), + is_div); + codegen_->AddSlowPath(slow_path); - __ cmpl(second_reg, Immediate(-1)); - __ j(kEqual, slow_path->GetEntryLabel()); + Register second_reg = second.AsRegister<Register>(); + // 0x80000000/-1 triggers an arithmetic exception! + // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so + // it's safe to just use negl instead of more complex comparisons. - // edx:eax <- sign-extended of eax - __ cdq(); - // eax = quotient, edx = remainder - __ idivl(second_reg); + __ cmpl(second_reg, Immediate(-1)); + __ j(kEqual, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + // edx:eax <- sign-extended of eax + __ cdq(); + // eax = quotient, edx = remainder + __ idivl(second_reg); + __ Bind(slow_path->GetExitLabel()); + } break; } @@ -2350,10 +2511,16 @@ void LocationsBuilderX86::VisitDiv(HDiv* div) { switch (div->GetResultType()) { case Primitive::kPrimInt: { locations->SetInAt(0, Location::RegisterLocation(EAX)); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); // Intel uses edx:eax as the dividend. locations->AddTemp(Location::RegisterLocation(EDX)); + // We need to save the numerator while we tweak eax and edx. As we are using imul in a way + // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as + // output and request another temp. + if (div->InputAt(1)->IsIntConstant()) { + locations->AddTemp(Location::RequiresRegister()); + } break; } case Primitive::kPrimLong: { @@ -2411,6 +2578,7 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { void LocationsBuilderX86::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); + LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong) ? LocationSummary::kCall : LocationSummary::kNoCall; @@ -2419,8 +2587,14 @@ void LocationsBuilderX86::VisitRem(HRem* rem) { switch (type) { case Primitive::kPrimInt: { locations->SetInAt(0, Location::RegisterLocation(EAX)); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); locations->SetOut(Location::RegisterLocation(EDX)); + // We need to save the numerator while we tweak eax and edx. As we are using imul in a way + // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as + // output and request another temp. + if (rem->InputAt(1)->IsIntConstant()) { + locations->AddTemp(Location::RequiresRegister()); + } break; } case Primitive::kPrimLong: { @@ -2538,16 +2712,16 @@ void LocationsBuilderX86::HandleShift(HBinaryOperation* op) { switch (op->GetResultType()) { case Primitive::kPrimInt: { - locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. + locations->SetInAt(0, Location::Any()); + // The shift count needs to be in CL or a constant. locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. - locations->SetInAt(1, Location::RegisterLocation(ECX)); + // The shift count needs to be in CL or a constant. + locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2566,38 +2740,87 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { switch (op->GetResultType()) { case Primitive::kPrimInt: { - Register first_reg = first.AsRegister<Register>(); - if (second.IsRegister()) { - Register second_reg = second.AsRegister<Register>(); - DCHECK_EQ(ECX, second_reg); - if (op->IsShl()) { - __ shll(first_reg, second_reg); - } else if (op->IsShr()) { - __ sarl(first_reg, second_reg); + if (first.IsRegister()) { + Register first_reg = first.AsRegister<Register>(); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + __ shll(first_reg, second_reg); + } else if (op->IsShr()) { + __ sarl(first_reg, second_reg); + } else { + __ shrl(first_reg, second_reg); + } } else { - __ shrl(first_reg, second_reg); + int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue; + if (shift == 0) { + return; + } + Immediate imm(shift); + if (op->IsShl()) { + __ shll(first_reg, imm); + } else if (op->IsShr()) { + __ sarl(first_reg, imm); + } else { + __ shrl(first_reg, imm); + } } } else { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); - if (op->IsShl()) { - __ shll(first_reg, imm); - } else if (op->IsShr()) { - __ sarl(first_reg, imm); + DCHECK(first.IsStackSlot()) << first; + Address addr(ESP, first.GetStackIndex()); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + __ shll(addr, second_reg); + } else if (op->IsShr()) { + __ sarl(addr, second_reg); + } else { + __ shrl(addr, second_reg); + } } else { - __ shrl(first_reg, imm); + int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue; + if (shift == 0) { + return; + } + Immediate imm(shift); + if (op->IsShl()) { + __ shll(addr, imm); + } else if (op->IsShr()) { + __ sarl(addr, imm); + } else { + __ shrl(addr, imm); + } } } + break; } case Primitive::kPrimLong: { - Register second_reg = second.AsRegister<Register>(); - DCHECK_EQ(ECX, second_reg); - if (op->IsShl()) { - GenerateShlLong(first, second_reg); - } else if (op->IsShr()) { - GenerateShrLong(first, second_reg); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + GenerateShlLong(first, second_reg); + } else if (op->IsShr()) { + GenerateShrLong(first, second_reg); + } else { + GenerateUShrLong(first, second_reg); + } } else { - GenerateUShrLong(first, second_reg); + // Shift by a constant. + int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue; + // Nothing to do if the shift is 0, as the input is already the output. + if (shift != 0) { + if (op->IsShl()) { + GenerateShlLong(first, shift); + } else if (op->IsShr()) { + GenerateShrLong(first, shift); + } else { + GenerateUShrLong(first, shift); + } + } } break; } @@ -2606,6 +2829,26 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { } } +void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Shift by 32 is easy. High gets low, and low gets 0. + codegen_->EmitParallelMoves( + loc.ToLow(), loc.ToHigh(), + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToLow()); + } else if (shift > 32) { + // Low part becomes 0. High part is low part << (shift-32). + __ movl(high, low); + __ shll(high, Immediate(shift - 32)); + __ xorl(low, low); + } else { + // Between 1 and 31. + __ shld(high, low, Immediate(shift)); + __ shll(low, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) { Label done; __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter); @@ -2617,6 +2860,27 @@ void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Need to copy the sign. + DCHECK_NE(low, high); + __ movl(low, high); + __ sarl(high, Immediate(31)); + } else if (shift > 32) { + DCHECK_NE(low, high); + // High part becomes sign. Low part is shifted by shift - 32. + __ movl(low, high); + __ sarl(high, Immediate(31)); + __ shrl(low, Immediate(shift - 32)); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ sarl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); @@ -2628,6 +2892,26 @@ void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Shift by 32 is easy. Low gets high, and high gets 0. + codegen_->EmitParallelMoves( + loc.ToHigh(), loc.ToLow(), + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToHigh()); + } else if (shift > 32) { + // Low part is high >> (shift - 32). High part becomes 0. + __ movl(low, high); + __ shrl(low, Immediate(shift - 32)); + __ xorl(high, high); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ shrl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); @@ -3388,7 +3672,13 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // Ensure the value is in a byte register. locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2))); } else { - locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); + bool is_fp_type = (value_type == Primitive::kPrimFloat) + || (value_type == Primitive::kPrimDouble); + if (is_fp_type) { + locations->SetInAt(2, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); + } } // Temporary registers for the write barrier. if (needs_write_barrier) { @@ -3667,23 +3957,43 @@ X86Assembler* ParallelMoveResolverX86::GetAssembler() const { } void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) { - ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); - int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(temp_reg, Address(ESP, src + stack_offset)); - __ movl(Address(ESP, dst + stack_offset), temp_reg); + ScratchRegisterScope possible_scratch( + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int temp = possible_scratch.GetRegister(); + if (temp == kNoRegister) { + // Use the stack. + __ pushl(Address(ESP, src)); + __ popl(Address(ESP, dst)); + } else { + Register temp_reg = static_cast<Register>(temp); + __ movl(temp_reg, Address(ESP, src)); + __ movl(Address(ESP, dst), temp_reg); + } } void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) { - ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); - int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(temp_reg, Address(ESP, src + stack_offset)); - __ movl(Address(ESP, dst + stack_offset), temp_reg); - __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize)); - __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg); + ScratchRegisterScope possible_scratch( + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int temp = possible_scratch.GetRegister(); + if (temp == kNoRegister) { + // Use the stack instead. + // Push src low word. + __ pushl(Address(ESP, src)); + // Push src high word. Stack offset = 4. + __ pushl(Address(ESP, src + 4 /* offset */ + kX86WordSize /* high */)); + + // Pop into dst high word. Stack offset = 8. + // Pop with ESP address uses the 'after increment' value of ESP. + __ popl(Address(ESP, dst + 4 /* offset */ + kX86WordSize /* high */)); + // Finally dst low word. Stack offset = 4. + __ popl(Address(ESP, dst)); + } else { + Register temp_reg = static_cast<Register>(temp); + __ movl(temp_reg, Address(ESP, src)); + __ movl(Address(ESP, dst), temp_reg); + __ movl(temp_reg, Address(ESP, src + kX86WordSize)); + __ movl(Address(ESP, dst + kX86WordSize), temp_reg); + } } void ParallelMoveResolverX86::EmitMove(size_t index) { @@ -3748,10 +4058,18 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ xorps(dest, dest); } else { ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - Register temp = static_cast<Register>(ensure_scratch.GetRegister()); - __ movl(temp, Immediate(value)); - __ movd(dest, temp); + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int temp_reg = ensure_scratch.GetRegister(); + if (temp_reg == kNoRegister) { + // Avoid spilling/restoring a scratch register by using the stack. + __ pushl(Immediate(value)); + __ movss(dest, Address(ESP, 0)); + __ addl(ESP, Immediate(4)); + } else { + Register temp = static_cast<Register>(temp_reg); + __ movl(temp, Immediate(value)); + __ movd(dest, temp); + } } } else { DCHECK(destination.IsStackSlot()) << destination; @@ -3800,42 +4118,96 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { } } -void ParallelMoveResolverX86::Exchange(Register reg, int mem) { - Register suggested_scratch = reg == EAX ? EBX : EAX; - ScratchRegisterScope ensure_scratch( - this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters()); +void ParallelMoveResolverX86::Exchange(Register reg1, Register reg2) { + // Prefer to avoid xchg as it isn't speedy on smaller processors. + ScratchRegisterScope possible_scratch( + this, reg1, codegen_->GetNumberOfCoreRegisters()); + int temp_reg = possible_scratch.GetRegister(); + if (temp_reg == kNoRegister || temp_reg == reg2) { + __ pushl(reg1); + __ movl(reg1, reg2); + __ popl(reg2); + } else { + Register temp = static_cast<Register>(temp_reg); + __ movl(temp, reg1); + __ movl(reg1, reg2); + __ movl(reg2, temp); + } +} - int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset)); - __ movl(Address(ESP, mem + stack_offset), reg); - __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister())); +void ParallelMoveResolverX86::Exchange(Register reg, int mem) { + ScratchRegisterScope possible_scratch( + this, reg, codegen_->GetNumberOfCoreRegisters()); + int temp_reg = possible_scratch.GetRegister(); + if (temp_reg == kNoRegister) { + __ pushl(Address(ESP, mem)); + __ movl(Address(ESP, mem + kX86WordSize), reg); + __ popl(reg); + } else { + Register temp = static_cast<Register>(temp_reg); + __ movl(temp, Address(ESP, mem)); + __ movl(Address(ESP, mem), reg); + __ movl(reg, temp); + } } void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) { - ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - - Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); - int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(temp_reg, Address(ESP, mem + stack_offset)); - __ movss(Address(ESP, mem + stack_offset), reg); - __ movd(reg, temp_reg); + ScratchRegisterScope possible_scratch( + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int temp_reg = possible_scratch.GetRegister(); + if (temp_reg == kNoRegister) { + __ pushl(Address(ESP, mem)); + __ movss(Address(ESP, mem + kX86WordSize), reg); + __ movss(reg, Address(ESP, 0)); + __ addl(ESP, Immediate(kX86WordSize)); + } else { + Register temp = static_cast<Register>(temp_reg); + __ movl(temp, Address(ESP, mem)); + __ movss(Address(ESP, mem), reg); + __ movd(reg, temp); + } } void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { - ScratchRegisterScope ensure_scratch1( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - - Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX; - ScratchRegisterScope ensure_scratch2( - this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters()); - - int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0; - stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0; - __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset)); - __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset)); - __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister())); - __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister())); + ScratchRegisterScope possible_scratch1( + this, kNoRegister, codegen_->GetNumberOfCoreRegisters()); + int temp_reg1 = possible_scratch1.GetRegister(); + if (temp_reg1 == kNoRegister) { + // No free registers. Use the stack. + __ pushl(Address(ESP, mem1)); + __ pushl(Address(ESP, mem2 + kX86WordSize)); + // Pop with ESP address uses the 'after increment' value of ESP. + __ popl(Address(ESP, mem1 + kX86WordSize)); + __ popl(Address(ESP, mem2)); + } else { + // Got the first one. Try for a second. + ScratchRegisterScope possible_scratch2( + this, temp_reg1, codegen_->GetNumberOfCoreRegisters()); + int temp_reg2 = possible_scratch2.GetRegister(); + if (temp_reg2 == kNoRegister) { + Register temp = static_cast<Register>(temp_reg1); + // Bummer. Only have one free register to use. + // Save mem1 on the stack. + __ pushl(Address(ESP, mem1)); + + // Copy mem2 into mem1. + __ movl(temp, Address(ESP, mem2 + kX86WordSize)); + __ movl(Address(ESP, mem1 + kX86WordSize), temp); + + // Now pop mem1 into mem2. + // Pop with ESP address uses the 'after increment' value of ESP. + __ popl(Address(ESP, mem2)); + } else { + // Great. We have 2 registers to play with. + Register temp1 = static_cast<Register>(temp_reg1); + Register temp2 = static_cast<Register>(temp_reg2); + DCHECK_NE(temp1, temp2); + __ movl(temp1, Address(ESP, mem1)); + __ movl(temp2, Address(ESP, mem2)); + __ movl(Address(ESP, mem2), temp1); + __ movl(Address(ESP, mem1), temp2); + } + } } void ParallelMoveResolverX86::EmitSwap(size_t index) { @@ -3844,7 +4216,7 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { Location destination = move->GetDestination(); if (source.IsRegister() && destination.IsRegister()) { - __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>()); + Exchange(destination.AsRegister<Register>(), source.AsRegister<Register>()); } else if (source.IsRegister() && destination.IsStackSlot()) { Exchange(source.AsRegister<Register>(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsRegister()) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 0cc3c6533a..8c56e35329 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -106,6 +106,7 @@ class ParallelMoveResolverX86 : public ParallelMoveResolver { X86Assembler* GetAssembler() const; private: + void Exchange(Register reg1, Register Reg2); void Exchange(Register reg, int mem); void Exchange(int mem1, int mem2); void Exchange32(XmmRegister reg, int mem); @@ -163,11 +164,17 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg); void HandleBitwiseOperation(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivByPowerOfTwo(HDiv* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateRemFP(HRem *rem); void HandleShift(HBinaryOperation* instruction); void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); void GenerateUShrLong(const Location& loc, Register shifter); + void GenerateShlLong(const Location& loc, int shift); + void GenerateShrLong(const Location& loc, int shift); + void GenerateUShrLong(const Location& loc, int shift); void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index cdbc7780a8..01b24ea33f 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -16,6 +16,7 @@ #include "code_generator_x86_64.h" +#include "code_generator_utils.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "intrinsics.h" @@ -428,7 +429,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - isa_features_(isa_features) { + isa_features_(isa_features), + constant_area_start_(0) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -481,7 +483,15 @@ void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const { } } +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::X86_64Core(static_cast<int>(reg)); +} +static dwarf::Reg DWARFReg(FloatRegister reg) { + return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); +} + void CodeGeneratorX86_64::GenerateFrameEntry() { + __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address __ Bind(&frame_entry_label_); bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); @@ -501,17 +511,22 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { __ pushq(CpuRegister(reg)); + __ cfi().AdjustCFAOffset(kX86_64WordSize); + __ cfi().RelOffset(DWARFReg(reg), 0); } } - __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize())); + int adjust = GetFrameSize() - GetCoreSpillSize(); + __ subq(CpuRegister(RSP), Immediate(adjust)); + __ cfi().AdjustCFAOffset(adjust); uint32_t xmm_spill_location = GetFpuSpillStart(); size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { - __ movsd(Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)), - XmmRegister(kFpuCalleeSaves[i])); + int offset = xmm_spill_location + (xmm_spill_slot_size * i); + __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i])); + __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset); } } @@ -526,17 +541,22 @@ void CodeGeneratorX86_64::GenerateFrameExit() { size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { - __ movsd(XmmRegister(kFpuCalleeSaves[i]), - Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i))); + int offset = xmm_spill_location + (xmm_spill_slot_size * i); + __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset)); + __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i])); } } - __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize())); + int adjust = GetFrameSize() - GetCoreSpillSize(); + __ addq(CpuRegister(RSP), Immediate(adjust)); + __ cfi().AdjustCFAOffset(-adjust); for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { __ popq(CpuRegister(reg)); + __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize)); + __ cfi().Restore(DWARFReg(reg)); } } } @@ -1123,8 +1143,11 @@ void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret) { UNUSED(ret); + __ cfi().RememberState(); codegen_->GenerateFrameExit(); __ ret(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { @@ -1175,8 +1198,11 @@ void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType(); } } + __ cfi().RememberState(); codegen_->GenerateFrameExit(); __ ret(); + __ cfi().RestoreState(); + __ cfi().DefCFAOffset(codegen_->GetFrameSize()); } Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { @@ -1951,7 +1977,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { case Primitive::kPrimDouble: case Primitive::kPrimFloat: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2015,12 +2041,30 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { } case Primitive::kPrimFloat: { - __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ addss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ addss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { - __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ addsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ addsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -2048,7 +2092,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2086,12 +2130,30 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { } case Primitive::kPrimFloat: { - __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ subss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ subss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { - __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ subsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ subsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -2124,7 +2186,7 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2169,13 +2231,31 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { case Primitive::kPrimFloat: { DCHECK(first.Equals(locations->Out())); - __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ mulss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ mulss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { DCHECK(first.Equals(locations->Out())); - __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ mulsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ mulsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -2259,6 +2339,216 @@ void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) { __ addq(CpuRegister(RSP), Immediate(2 * elem_size)); } +void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + CpuRegister output_register = locations->Out().AsRegister<CpuRegister>(); + CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + + DCHECK(imm == 1 || imm == -1); + + switch (instruction->GetResultType()) { + case Primitive::kPrimInt: { + if (instruction->IsRem()) { + __ xorl(output_register, output_register); + } else { + __ movl(output_register, input_register); + if (imm == -1) { + __ negl(output_register); + } + } + break; + } + + case Primitive::kPrimLong: { + if (instruction->IsRem()) { + __ xorq(output_register, output_register); + } else { + __ movq(output_register, input_register); + if (imm == -1) { + __ negq(output_register); + } + } + break; + } + + default: + LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + + CpuRegister output_register = locations->Out().AsRegister<CpuRegister>(); + CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>(); + + int64_t imm = Int64FromConstant(second.GetConstant()); + + DCHECK(IsPowerOfTwo(std::abs(imm))); + + CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); + + if (instruction->GetResultType() == Primitive::kPrimInt) { + __ leal(tmp, Address(numerator, std::abs(imm) - 1)); + __ testl(numerator, numerator); + __ cmov(kGreaterEqual, tmp, numerator); + int shift = CTZ(imm); + __ sarl(tmp, Immediate(shift)); + + if (imm < 0) { + __ negl(tmp); + } + + __ movl(output_register, tmp); + } else { + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); + + __ movq(rdx, Immediate(std::abs(imm) - 1)); + __ addq(rdx, numerator); + __ testq(numerator, numerator); + __ cmov(kGreaterEqual, rdx, numerator); + int shift = CTZ(imm); + __ sarq(rdx, Immediate(shift)); + + if (imm < 0) { + __ negq(rdx); + } + + __ movq(output_register, rdx); + } +} + +void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + + CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>() + : locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>() + : locations->Out().AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + DCHECK_EQ(RAX, eax.AsRegister()); + DCHECK_EQ(RDX, edx.AsRegister()); + if (instruction->IsDiv()) { + DCHECK_EQ(RAX, out.AsRegister()); + } else { + DCHECK_EQ(RDX, out.AsRegister()); + } + + int64_t magic; + int shift; + + // TODO: can these branches be written as one? + if (instruction->GetResultType() == Primitive::kPrimInt) { + int imm = second.GetConstant()->AsIntConstant()->GetValue(); + + CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + + __ movl(numerator, eax); + + Label no_div; + Label end; + __ testl(eax, eax); + __ j(kNotEqual, &no_div); + + __ xorl(out, out); + __ jmp(&end); + + __ Bind(&no_div); + + __ movl(eax, Immediate(magic)); + __ imull(numerator); + + if (imm > 0 && magic < 0) { + __ addl(edx, numerator); + } else if (imm < 0 && magic > 0) { + __ subl(edx, numerator); + } + + if (shift != 0) { + __ sarl(edx, Immediate(shift)); + } + + __ movl(eax, edx); + __ shrl(edx, Immediate(31)); + __ addl(edx, eax); + + if (instruction->IsRem()) { + __ movl(eax, numerator); + __ imull(edx, Immediate(imm)); + __ subl(eax, edx); + __ movl(edx, eax); + } else { + __ movl(eax, edx); + } + __ Bind(&end); + } else { + int64_t imm = second.GetConstant()->AsLongConstant()->GetValue(); + + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + + CpuRegister rax = eax; + CpuRegister rdx = edx; + + CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift); + + // Save the numerator. + __ movq(numerator, rax); + + // RAX = magic + __ movq(rax, Immediate(magic)); + + // RDX:RAX = magic * numerator + __ imulq(numerator); + + if (imm > 0 && magic < 0) { + // RDX += numerator + __ addq(rdx, numerator); + } else if (imm < 0 && magic > 0) { + // RDX -= numerator + __ subq(rdx, numerator); + } + + // Shift if needed. + if (shift != 0) { + __ sarq(rdx, Immediate(shift)); + } + + // RDX += 1 if RDX < 0 + __ movq(rax, rdx); + __ shrq(rdx, Immediate(63)); + __ addq(rdx, rax); + + if (instruction->IsRem()) { + __ movq(rax, numerator); + + if (IsInt<32>(imm)) { + __ imulq(rdx, Immediate(static_cast<int32_t>(imm))); + } else { + __ movq(numerator, Immediate(imm)); + __ imulq(rdx, numerator); + } + + __ subq(rax, rdx); + __ movq(rdx, rax); + } else { + __ movq(rax, rdx); + } + } +} + void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); Primitive::Type type = instruction->GetResultType(); @@ -2267,37 +2557,52 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in bool is_div = instruction->IsDiv(); LocationSummary* locations = instruction->GetLocations(); - CpuRegister out_reg = locations->Out().AsRegister<CpuRegister>(); - CpuRegister second_reg = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location second = locations->InAt(1); DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister()); - DCHECK_EQ(is_div ? RAX : RDX, out_reg.AsRegister()); + DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister()); - SlowPathCodeX86_64* slow_path = - new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64( - out_reg.AsRegister(), type, is_div); - codegen_->AddSlowPath(slow_path); + if (second.IsConstant()) { + int64_t imm = Int64FromConstant(second.GetConstant()); - // 0x80000000(00000000)/-1 triggers an arithmetic exception! - // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000) - // so it's safe to just use negl instead of more complex comparisons. - if (type == Primitive::kPrimInt) { - __ cmpl(second_reg, Immediate(-1)); - __ j(kEqual, slow_path->GetEntryLabel()); - // edx:eax <- sign-extended of eax - __ cdq(); - // eax = quotient, edx = remainder - __ idivl(second_reg); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) { + DivByPowerOfTwo(instruction->AsDiv()); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } } else { - __ cmpq(second_reg, Immediate(-1)); - __ j(kEqual, slow_path->GetEntryLabel()); - // rdx:rax <- sign-extended of rax - __ cqo(); - // rax = quotient, rdx = remainder - __ idivq(second_reg); - } + SlowPathCodeX86_64* slow_path = + new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64( + out.AsRegister(), type, is_div); + codegen_->AddSlowPath(slow_path); - __ Bind(slow_path->GetExitLabel()); + CpuRegister second_reg = second.AsRegister<CpuRegister>(); + // 0x80000000(00000000)/-1 triggers an arithmetic exception! + // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000) + // so it's safe to just use negl instead of more complex comparisons. + if (type == Primitive::kPrimInt) { + __ cmpl(second_reg, Immediate(-1)); + __ j(kEqual, slow_path->GetEntryLabel()); + // edx:eax <- sign-extended of eax + __ cdq(); + // eax = quotient, edx = remainder + __ idivl(second_reg); + } else { + __ cmpq(second_reg, Immediate(-1)); + __ j(kEqual, slow_path->GetEntryLabel()); + // rdx:rax <- sign-extended of rax + __ cqo(); + // rax = quotient, rdx = remainder + __ idivq(second_reg); + } + __ Bind(slow_path->GetExitLabel()); + } } void LocationsBuilderX86_64::VisitDiv(HDiv* div) { @@ -2307,17 +2612,23 @@ void LocationsBuilderX86_64::VisitDiv(HDiv* div) { case Primitive::kPrimInt: case Primitive::kPrimLong: { locations->SetInAt(0, Location::RegisterLocation(RAX)); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); // Intel uses edx:eax as the dividend. locations->AddTemp(Location::RegisterLocation(RDX)); + // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way + // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as + // output and request another temp. + if (div->InputAt(1)->IsConstant()) { + locations->AddTemp(Location::RequiresRegister()); + } break; } case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2342,12 +2653,30 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { } case Primitive::kPrimFloat: { - __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ divss(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue())); + } else { + DCHECK(second.IsStackSlot()); + __ divss(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } case Primitive::kPrimDouble: { - __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + if (second.IsFpuRegister()) { + __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); + } else if (second.IsConstant()) { + __ divsd(first.AsFpuRegister<XmmRegister>(), + codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue())); + } else { + DCHECK(second.IsDoubleStackSlot()); + __ divsd(first.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), second.GetStackIndex())); + } break; } @@ -2365,9 +2694,15 @@ void LocationsBuilderX86_64::VisitRem(HRem* rem) { case Primitive::kPrimInt: case Primitive::kPrimLong: { locations->SetInAt(0, Location::RegisterLocation(RAX)); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); // Intel uses rdx:rax as the dividend and puts the remainder in rdx locations->SetOut(Location::RegisterLocation(RDX)); + // We need to save the numerator while we tweak eax and edx. As we are using imul in a way + // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as + // output and request another temp. + if (rem->InputAt(1)->IsConstant()) { + locations->AddTemp(Location::RequiresRegister()); + } break; } @@ -3486,15 +3821,27 @@ void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) { void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) { ScratchRegisterScope ensure_scratch( - this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); + this, TMP, codegen_->GetNumberOfCoreRegisters()); - int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; - __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); - __ movq(CpuRegister(ensure_scratch.GetRegister()), - Address(CpuRegister(RSP), mem2 + stack_offset)); - __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); - __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), - CpuRegister(ensure_scratch.GetRegister())); + int temp_reg = ensure_scratch.GetRegister(); + if (temp_reg == kNoRegister) { + // Use the stack as a temporary. + // Save mem1 on the stack. + __ pushq(Address(CpuRegister(RSP), mem1)); + + // Copy mem2 into mem1. + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem2 + kX86_64WordSize)); + __ movq(Address(CpuRegister(RSP), mem1 + kX86_64WordSize), CpuRegister(TMP)); + + // Now pop mem1 into mem2. + __ popq(Address(CpuRegister(RSP), mem2)); + } else { + CpuRegister temp = CpuRegister(temp_reg); + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1)); + __ movq(temp, Address(CpuRegister(RSP), mem2)); + __ movq(Address(CpuRegister(RSP), mem2), CpuRegister(TMP)); + __ movq(Address(CpuRegister(RSP), mem1), temp); + } } void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { @@ -3503,6 +3850,13 @@ void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { __ movd(reg, CpuRegister(TMP)); } +void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) { + // Prefer to avoid xchg as it isn't speedy on smaller processors. + __ movq(CpuRegister(TMP), reg1); + __ movq(reg1, reg2); + __ movq(reg2, CpuRegister(TMP)); +} + void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); __ movsd(Address(CpuRegister(RSP), mem), reg); @@ -3515,7 +3869,7 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) { Location destination = move->GetDestination(); if (source.IsRegister() && destination.IsRegister()) { - __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); + Exchange64(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); } else if (source.IsRegister() && destination.IsStackSlot()) { Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsRegister()) { @@ -3880,5 +4234,66 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) { LOG(FATAL) << "Unreachable"; } +void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { + // Generate the constant area if needed. + X86_64Assembler* assembler = GetAssembler(); + if (!assembler->IsConstantAreaEmpty()) { + // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 + // byte values. If used for vectors at a later time, this will need to be + // updated to 16 bytes with the appropriate offset. + assembler->Align(4, 0); + constant_area_start_ = assembler->CodeSize(); + assembler->AddConstantArea(); + } + + // And finish up. + CodeGenerator::Finalize(allocator); +} + +/** + * Class to handle late fixup of offsets into constant area. + */ +class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> { + public: + RIPFixup(const CodeGeneratorX86_64& codegen, int offset) + : codegen_(codegen), offset_into_constant_area_(offset) {} + + private: + void Process(const MemoryRegion& region, int pos) OVERRIDE { + // Patch the correct offset for the instruction. We use the address of the + // 'next' instruction, which is 'pos' (patch the 4 bytes before). + int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_; + int relative_position = constant_offset - pos; + + // Patch in the right value. + region.StoreUnaligned<int32_t>(pos - 4, relative_position); + } + + const CodeGeneratorX86_64& codegen_; + + // Location in constant area that the fixup refers to. + int offset_into_constant_area_; +}; + +Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v)); + return Address::RIP(fixup); +} + +Address CodeGeneratorX86_64::LiteralFloatAddress(float v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v)); + return Address::RIP(fixup); +} + +Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v)); + return Address::RIP(fixup); +} + +Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v)); + return Address::RIP(fixup); +} + } // namespace x86_64 } // namespace art diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 375c0b03b9..61bf6ac71d 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -118,6 +118,7 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolver { void Exchange32(CpuRegister reg, int mem); void Exchange32(XmmRegister reg, int mem); void Exchange32(int mem1, int mem2); + void Exchange64(CpuRegister reg1, CpuRegister reg2); void Exchange64(CpuRegister reg, int mem); void Exchange64(XmmRegister reg, int mem); void Exchange64(int mem1, int mem2); @@ -173,6 +174,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg); void HandleBitwiseOperation(HBinaryOperation* operation); void GenerateRemFP(HRem *rem); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivByPowerOfTwo(HDiv* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleShift(HBinaryOperation* operation); void GenerateMemoryBarrier(MemBarrierKind kind); @@ -243,6 +247,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE; void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + void Finalize(CodeAllocator* allocator) OVERRIDE; InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kX86_64; @@ -274,6 +279,15 @@ class CodeGeneratorX86_64 : public CodeGenerator { return isa_features_; } + int ConstantAreaStart() const { + return constant_area_start_; + } + + Address LiteralDoubleAddress(double v); + Address LiteralFloatAddress(float v); + Address LiteralInt32Address(int32_t v); + Address LiteralInt64Address(int64_t v); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; @@ -284,6 +298,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { X86_64Assembler assembler_; const X86_64InstructionSetFeatures& isa_features_; + // Offset to the start of the constant area in the assembled code. + // Used for fixups to the constant area. + int constant_area_start_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); }; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 56ec8a7ed1..afbc490150 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -24,9 +24,21 @@ namespace art { class InstructionSimplifierVisitor : public HGraphVisitor { public: InstructionSimplifierVisitor(HGraph* graph, OptimizingCompilerStats* stats) - : HGraphVisitor(graph), stats_(stats) {} + : HGraphVisitor(graph), + stats_(stats) {} + + void Run(); private: + void RecordSimplification() { + simplification_occurred_ = true; + simplifications_at_current_position_++; + if (stats_) { + stats_->RecordStat(kInstructionSimplifications); + } + } + + bool TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop); void VisitShift(HBinaryOperation* shift); void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE; @@ -40,6 +52,8 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitAnd(HAnd* instruction) OVERRIDE; void VisitDiv(HDiv* instruction) OVERRIDE; void VisitMul(HMul* instruction) OVERRIDE; + void VisitNeg(HNeg* instruction) OVERRIDE; + void VisitNot(HNot* instruction) OVERRIDE; void VisitOr(HOr* instruction) OVERRIDE; void VisitShl(HShl* instruction) OVERRIDE; void VisitShr(HShr* instruction) OVERRIDE; @@ -48,11 +62,38 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitXor(HXor* instruction) OVERRIDE; OptimizingCompilerStats* stats_; + bool simplification_occurred_ = false; + int simplifications_at_current_position_ = 0; + // We ensure we do not loop infinitely. The value is a finger in the air guess + // that should allow enough simplification. + static constexpr int kMaxSamePositionSimplifications = 10; }; void InstructionSimplifier::Run() { InstructionSimplifierVisitor visitor(graph_, stats_); - visitor.VisitInsertionOrder(); + visitor.Run(); +} + +void InstructionSimplifierVisitor::Run() { + for (HReversePostOrderIterator it(*GetGraph()); !it.Done();) { + // The simplification of an instruction to another instruction may yield + // possibilities for other simplifications. So although we perform a reverse + // post order visit, we sometimes need to revisit an instruction index. + simplification_occurred_ = false; + VisitBasicBlock(it.Current()); + if (simplification_occurred_ && + (simplifications_at_current_position_ < kMaxSamePositionSimplifications)) { + // New simplifications may be applicable to the instruction at the + // current index, so don't advance the iterator. + continue; + } + if (simplifications_at_current_position_ >= kMaxSamePositionSimplifications) { + LOG(WARNING) << "Too many simplifications (" << simplifications_at_current_position_ + << ") occurred at the current position."; + } + simplifications_at_current_position_ = 0; + it.Advance(); + } } namespace { @@ -63,6 +104,35 @@ bool AreAllBitsSet(HConstant* constant) { } // namespace +// Returns true if the code was simplified to use only one negation operation +// after the binary operation instead of one on each of the inputs. +bool InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop) { + DCHECK(binop->IsAdd() || binop->IsSub()); + DCHECK(binop->GetLeft()->IsNeg() && binop->GetRight()->IsNeg()); + HNeg* left_neg = binop->GetLeft()->AsNeg(); + HNeg* right_neg = binop->GetRight()->AsNeg(); + if (!left_neg->HasOnlyOneNonEnvironmentUse() || + !right_neg->HasOnlyOneNonEnvironmentUse()) { + return false; + } + // Replace code looking like + // NEG tmp1, a + // NEG tmp2, b + // ADD dst, tmp1, tmp2 + // with + // ADD tmp, a, b + // NEG dst, tmp + binop->ReplaceInput(left_neg->GetInput(), 0); + binop->ReplaceInput(right_neg->GetInput(), 1); + left_neg->GetBlock()->RemoveInstruction(left_neg); + right_neg->GetBlock()->RemoveInstruction(right_neg); + HNeg* neg = new (GetGraph()->GetArena()) HNeg(binop->GetType(), binop); + binop->GetBlock()->InsertInstructionBefore(neg, binop->GetNext()); + binop->ReplaceWithExceptInReplacementAtIndex(neg, 0); + RecordSimplification(); + return true; +} + void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()); HConstant* input_cst = instruction->GetConstantRight(); @@ -182,6 +252,36 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { // src instruction->ReplaceWith(input_other); instruction->GetBlock()->RemoveInstruction(instruction); + return; + } + + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + bool left_is_neg = left->IsNeg(); + bool right_is_neg = right->IsNeg(); + + if (left_is_neg && right_is_neg) { + if (TryMoveNegOnInputsAfterBinop(instruction)) { + return; + } + } + + HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg(); + if ((left_is_neg ^ right_is_neg) && neg->HasOnlyOneNonEnvironmentUse()) { + // Replace code looking like + // NEG tmp, b + // ADD dst, a, tmp + // with + // SUB dst, a, b + // We do not perform the optimization if the input negation has environment + // uses or multiple non-environment uses as it could lead to worse code. In + // particular, we do not want the live range of `b` to be extended if we are + // not sure the initial 'NEG' instruction can be removed. + HInstruction* other = left_is_neg ? right : left; + HSub* sub = new(GetGraph()->GetArena()) HSub(instruction->GetType(), other, neg->GetInput()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub); + RecordSimplification(); + neg->GetBlock()->RemoveInstruction(neg); } } @@ -201,7 +301,7 @@ void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { // We assume that GVN has run before, so we only perform a pointer comparison. // If for some reason the values are equal but the pointers are different, we - // are still correct and only miss an optimisation opportunity. + // are still correct and only miss an optimization opportunity. if (instruction->GetLeft() == instruction->GetRight()) { // Replace code looking like // AND dst, src, src @@ -235,6 +335,7 @@ void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) { // NEG dst, src instruction->GetBlock()->ReplaceAndRemoveInstructionWith( instruction, (new (GetGraph()->GetArena()) HNeg(type, input_other))); + RecordSimplification(); } } @@ -267,6 +368,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { // NEG dst, src HNeg* neg = new (allocator) HNeg(type, input_other); block->ReplaceAndRemoveInstructionWith(instruction, neg); + RecordSimplification(); return; } @@ -280,6 +382,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { // The 'int' and 'long' cases are handled below. block->ReplaceAndRemoveInstructionWith(instruction, new (allocator) HAdd(type, input_other, input_other)); + RecordSimplification(); return; } @@ -295,7 +398,72 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { HIntConstant* shift = GetGraph()->GetIntConstant(WhichPowerOf2(factor)); HShl* shl = new(allocator) HShl(type, input_other, shift); block->ReplaceAndRemoveInstructionWith(instruction, shl); + RecordSimplification(); + } + } +} + +void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) { + HInstruction* input = instruction->GetInput(); + if (input->IsNeg()) { + // Replace code looking like + // NEG tmp, src + // NEG dst, tmp + // with + // src + HNeg* previous_neg = input->AsNeg(); + instruction->ReplaceWith(previous_neg->GetInput()); + instruction->GetBlock()->RemoveInstruction(instruction); + // We perform the optimization even if the input negation has environment + // uses since it allows removing the current instruction. But we only delete + // the input negation only if it is does not have any uses left. + if (!previous_neg->HasUses()) { + previous_neg->GetBlock()->RemoveInstruction(previous_neg); + } + RecordSimplification(); + return; + } + + if (input->IsSub() && input->HasOnlyOneNonEnvironmentUse()) { + // Replace code looking like + // SUB tmp, a, b + // NEG dst, tmp + // with + // SUB dst, b, a + // We do not perform the optimization if the input subtraction has + // environment uses or multiple non-environment uses as it could lead to + // worse code. In particular, we do not want the live ranges of `a` and `b` + // to be extended if we are not sure the initial 'SUB' instruction can be + // removed. + HSub* sub = input->AsSub(); + HSub* new_sub = + new (GetGraph()->GetArena()) HSub(instruction->GetType(), sub->GetRight(), sub->GetLeft()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, new_sub); + if (!sub->HasUses()) { + sub->GetBlock()->RemoveInstruction(sub); + } + RecordSimplification(); + } +} + +void InstructionSimplifierVisitor::VisitNot(HNot* instruction) { + HInstruction* input = instruction->GetInput(); + if (input->IsNot()) { + // Replace code looking like + // NOT tmp, src + // NOT dst, tmp + // with + // src + // We perform the optimization even if the input negation has environment + // uses since it allows removing the current instruction. But we only delete + // the input negation only if it is does not have any uses left. + HNot* previous_not = input->AsNot(); + instruction->ReplaceWith(previous_not->GetInput()); + instruction->GetBlock()->RemoveInstruction(instruction); + if (!previous_not->HasUses()) { + previous_not->GetBlock()->RemoveInstruction(previous_not); } + RecordSimplification(); } } @@ -315,7 +483,7 @@ void InstructionSimplifierVisitor::VisitOr(HOr* instruction) { // We assume that GVN has run before, so we only perform a pointer comparison. // If for some reason the values are equal but the pointers are different, we - // are still correct and only miss an optimisation opportunity. + // are still correct and only miss an optimization opportunity. if (instruction->GetLeft() == instruction->GetRight()) { // Replace code looking like // OR dst, src, src @@ -356,20 +524,61 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { HBasicBlock* block = instruction->GetBlock(); ArenaAllocator* allocator = GetGraph()->GetArena(); - if (instruction->GetLeft()->IsConstant()) { - int64_t left = Int64FromConstant(instruction->GetLeft()->AsConstant()); - if (left == 0) { + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + if (left->IsConstant()) { + if (Int64FromConstant(left->AsConstant()) == 0) { // Replace code looking like // SUB dst, 0, src // with // NEG dst, src - // Note that we cannot optimise `0.0 - x` to `-x` for floating-point. When + // Note that we cannot optimize `0.0 - x` to `-x` for floating-point. When // `x` is `0.0`, the former expression yields `0.0`, while the later // yields `-0.0`. - HNeg* neg = new (allocator) HNeg(type, instruction->GetRight()); + HNeg* neg = new (allocator) HNeg(type, right); block->ReplaceAndRemoveInstructionWith(instruction, neg); + RecordSimplification(); + return; + } + } + + if (left->IsNeg() && right->IsNeg()) { + if (TryMoveNegOnInputsAfterBinop(instruction)) { + return; } } + + if (right->IsNeg() && right->HasOnlyOneNonEnvironmentUse()) { + // Replace code looking like + // NEG tmp, b + // SUB dst, a, tmp + // with + // ADD dst, a, b + HAdd* add = new(GetGraph()->GetArena()) HAdd(type, left, right->AsNeg()->GetInput()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add); + RecordSimplification(); + right->GetBlock()->RemoveInstruction(right); + return; + } + + if (left->IsNeg() && left->HasOnlyOneNonEnvironmentUse()) { + // Replace code looking like + // NEG tmp, a + // SUB dst, tmp, b + // with + // ADD tmp, a, b + // NEG dst, tmp + // The second version is not intrinsically better, but enables more + // transformations. + HAdd* add = new(GetGraph()->GetArena()) HAdd(type, left->AsNeg()->GetInput(), right); + instruction->GetBlock()->InsertInstructionBefore(add, instruction); + HNeg* neg = new (GetGraph()->GetArena()) HNeg(instruction->GetType(), add); + instruction->GetBlock()->InsertInstructionBefore(neg, instruction); + instruction->ReplaceWith(neg); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + left->GetBlock()->RemoveInstruction(left); + } } void InstructionSimplifierVisitor::VisitUShr(HUShr* instruction) { @@ -397,6 +606,7 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) { // NOT dst, src HNot* bitwise_not = new (GetGraph()->GetArena()) HNot(instruction->GetType(), input_other); instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, bitwise_not); + RecordSimplification(); return; } } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 5122a00d92..cbf94f0f81 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -298,25 +298,27 @@ static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) // TODO: Allow x86 to work with memory. This requires assembler support, see below. // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly. locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); // Immediate constant. - locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above. + locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. } -static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { +static void MathAbsFP(LocationSummary* locations, + bool is64bit, + X86_64Assembler* assembler, + CodeGeneratorX86_64* codegen) { Location output = locations->Out(); - CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); if (output.IsFpuRegister()) { // In-register - XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // TODO: Can mask directly with constant area using pand if we can guarantee + // that the literal is aligned on a 16 byte boundary. This will avoid a + // temporary. if (is64bit) { - __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF))); - __ movd(xmm_temp, cpu_temp); + __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); } else { - __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF))); - __ movd(xmm_temp, cpu_temp); + __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); } } else { @@ -341,7 +343,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), true, GetAssembler()); + MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { @@ -349,7 +351,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), false, GetAssembler()); + MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_); } static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { @@ -399,8 +401,11 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); } -static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, - X86_64Assembler* assembler) { +static void GenMinMaxFP(LocationSummary* locations, + bool is_min, + bool is_double, + X86_64Assembler* assembler, + CodeGeneratorX86_64* codegen) { Location op1_loc = locations->InAt(0); Location op2_loc = locations->InAt(1); Location out_loc = locations->Out(); @@ -427,7 +432,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, // // This removes one jmp, but needs to copy one input (op1) to out. // - // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? + // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); @@ -461,14 +466,11 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, // NaN handling. __ Bind(&nan); - CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>(); - // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access. if (is_double) { - __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000))); + __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000))); } else { - __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000))); + __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000))); } - __ movd(out, cpu_temp, is_double); __ jmp(&done); // out := op2; @@ -483,7 +485,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double, __ Bind(&done); } -static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -492,39 +494,38 @@ static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invo // The following is sub-optimal, but all we can do for now. It would be fine to also accept // the second input to be the output (we can simply swap inputs). locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); // Immediate constant. } void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPPlusTempLocations(arena_, invoke); + CreateFPFPToFP(arena_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_); } static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index f764eb421f..5f50494482 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1177,6 +1177,9 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { bool HasUses() const { return !uses_.IsEmpty() || !env_uses_.IsEmpty(); } bool HasEnvironmentUses() const { return !env_uses_.IsEmpty(); } bool HasNonEnvironmentUses() const { return !uses_.IsEmpty(); } + bool HasOnlyOneNonEnvironmentUse() const { + return !HasEnvironmentUses() && GetUses().HasOnlyOneUse(); + } // Does this instruction strictly dominate `other_instruction`? // Returns false if this instruction and `other_instruction` are the same. @@ -1214,6 +1217,13 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { void ReplaceWith(HInstruction* instruction); void ReplaceInput(HInstruction* replacement, size_t index); + // This is almost the same as doing `ReplaceWith()`. But in this helper, the + // uses of this instruction by `other` are *not* updated. + void ReplaceWithExceptInReplacementAtIndex(HInstruction* other, size_t use_index) { + ReplaceWith(other); + other->ReplaceInput(this, use_index); + } + // Move `this` instruction before `cursor`. void MoveBefore(HInstruction* cursor); diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc new file mode 100644 index 0000000000..6d986ba7d3 --- /dev/null +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -0,0 +1,127 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <memory> +#include <vector> + +#include "arch/instruction_set.h" +#include "cfi_test.h" +#include "gtest/gtest.h" +#include "optimizing/code_generator.h" +#include "utils/assembler.h" + +#include "optimizing/optimizing_cfi_test_expected.inc" + +namespace art { + +// Run the tests only on host. +#ifndef HAVE_ANDROID_OS + +class OptimizingCFITest : public CFITest { + public: + // Enable this flag to generate the expected outputs. + static constexpr bool kGenerateExpected = false; + + void TestImpl(InstructionSet isa, const char* isa_str, + const std::vector<uint8_t>& expected_asm, + const std::vector<uint8_t>& expected_cfi) { + // Setup simple context. + ArenaPool pool; + ArenaAllocator allocator(&pool); + CompilerOptions opts; + std::unique_ptr<const InstructionSetFeatures> isa_features; + std::string error; + isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); + HGraph graph(&allocator); + // Generate simple frame with some spills. + std::unique_ptr<CodeGenerator> code_gen( + CodeGenerator::Create(&graph, isa, *isa_features.get(), opts)); + const int frame_size = 64; + int core_reg = 0; + int fp_reg = 0; + for (int i = 0; i < 2; i++) { // Two registers of each kind. + for (; core_reg < 32; core_reg++) { + if (code_gen->IsCoreCalleeSaveRegister(core_reg)) { + auto location = Location::RegisterLocation(core_reg); + code_gen->AddAllocatedRegister(location); + core_reg++; + break; + } + } + for (; fp_reg < 32; fp_reg++) { + if (code_gen->IsFloatingPointCalleeSaveRegister(fp_reg)) { + auto location = Location::FpuRegisterLocation(fp_reg); + code_gen->AddAllocatedRegister(location); + fp_reg++; + break; + } + } + } + code_gen->ComputeSpillMask(); + code_gen->SetFrameSize(frame_size); + code_gen->GenerateFrameEntry(); + code_gen->GetInstructionVisitor()->VisitReturnVoid(new (&allocator) HReturnVoid()); + // Get the outputs. + InternalCodeAllocator code_allocator; + code_gen->Finalize(&code_allocator); + const std::vector<uint8_t>& actual_asm = code_allocator.GetMemory(); + Assembler* opt_asm = code_gen->GetAssembler(); + const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data()); + + if (kGenerateExpected) { + GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi); + } else { + EXPECT_EQ(expected_asm, actual_asm); + EXPECT_EQ(expected_cfi, actual_cfi); + } + } + + private: + class InternalCodeAllocator : public CodeAllocator { + public: + InternalCodeAllocator() {} + + virtual uint8_t* Allocate(size_t size) { + memory_.resize(size); + return memory_.data(); + } + + const std::vector<uint8_t>& GetMemory() { return memory_; } + + private: + std::vector<uint8_t> memory_; + + DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); + }; +}; + +#define TEST_ISA(isa) \ + TEST_F(OptimizingCFITest, isa) { \ + std::vector<uint8_t> expected_asm(expected_asm_##isa, \ + expected_asm_##isa + arraysize(expected_asm_##isa)); \ + std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \ + expected_cfi_##isa + arraysize(expected_cfi_##isa)); \ + TestImpl(isa, #isa, expected_asm, expected_cfi); \ + } + +TEST_ISA(kThumb2) +TEST_ISA(kArm64) +TEST_ISA(kX86) +TEST_ISA(kX86_64) + +#endif // HAVE_ANDROID_OS + +} // namespace art diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc new file mode 100644 index 0000000000..2125f6eb01 --- /dev/null +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -0,0 +1,141 @@ +static constexpr uint8_t expected_asm_kThumb2[] = { + 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0, + 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD, +}; +static constexpr uint8_t expected_cfi_kThumb2[] = { + 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14, + 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42, + 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B, 0x0E, + 0x40, +}; +// 0x00000000: push {r5, r6, lr} +// 0x00000002: .cfi_def_cfa_offset: 12 +// 0x00000002: .cfi_offset: r5 at cfa-12 +// 0x00000002: .cfi_offset: r6 at cfa-8 +// 0x00000002: .cfi_offset: r14 at cfa-4 +// 0x00000002: vpush.f32 {s16-s17} +// 0x00000006: .cfi_def_cfa_offset: 20 +// 0x00000006: .cfi_offset_extended: r80 at cfa-20 +// 0x00000006: .cfi_offset_extended: r81 at cfa-16 +// 0x00000006: sub sp, sp, #44 +// 0x00000008: .cfi_def_cfa_offset: 64 +// 0x00000008: str r0, [sp, #0] +// 0x0000000a: .cfi_remember_state +// 0x0000000a: add sp, sp, #44 +// 0x0000000c: .cfi_def_cfa_offset: 20 +// 0x0000000c: vpop.f32 {s16-s17} +// 0x00000010: .cfi_def_cfa_offset: 12 +// 0x00000010: .cfi_restore_extended: r80 +// 0x00000010: .cfi_restore_extended: r81 +// 0x00000010: pop {r5, r6, pc} +// 0x00000012: .cfi_restore_state +// 0x00000012: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kArm64[] = { + 0xE0, 0x0F, 0x1C, 0xB8, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9, + 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF3, 0xD3, 0x42, 0xA9, + 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6, +}; +static constexpr uint8_t expected_cfi_kArm64[] = { + 0x44, 0x0E, 0x40, 0x44, 0x93, 0x06, 0x94, 0x04, 0x44, 0x9E, 0x02, 0x44, + 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49, + 0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: str w0, [sp, #-64]! +// 0x00000004: .cfi_def_cfa_offset: 64 +// 0x00000004: stp x19, x20, [sp, #40] +// 0x00000008: .cfi_offset: r19 at cfa-24 +// 0x00000008: .cfi_offset: r20 at cfa-16 +// 0x00000008: str lr, [sp, #56] +// 0x0000000c: .cfi_offset: r30 at cfa-8 +// 0x0000000c: stp d8, d9, [sp, #24] +// 0x00000010: .cfi_offset_extended: r72 at cfa-40 +// 0x00000010: .cfi_offset_extended: r73 at cfa-32 +// 0x00000010: .cfi_remember_state +// 0x00000010: ldp d8, d9, [sp, #24] +// 0x00000014: .cfi_restore_extended: r72 +// 0x00000014: .cfi_restore_extended: r73 +// 0x00000014: ldp x19, x20, [sp, #40] +// 0x00000018: .cfi_restore: r19 +// 0x00000018: .cfi_restore: r20 +// 0x00000018: ldr lr, [sp, #56] +// 0x0000001c: .cfi_restore: r30 +// 0x0000001c: add sp, sp, #0x40 (64) +// 0x00000020: .cfi_def_cfa_offset: 0 +// 0x00000020: ret +// 0x00000024: .cfi_restore_state +// 0x00000024: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kX86[] = { + 0x56, 0x55, 0x83, 0xEC, 0x34, 0x89, 0x04, 0x24, 0x83, 0xC4, 0x34, 0x5D, + 0x5E, 0xC3, +}; +static constexpr uint8_t expected_cfi_kX86[] = { + 0x41, 0x0E, 0x08, 0x86, 0x02, 0x41, 0x0E, 0x0C, 0x85, 0x03, 0x43, 0x0E, + 0x40, 0x43, 0x0A, 0x43, 0x0E, 0x0C, 0x41, 0x0E, 0x08, 0xC5, 0x41, 0x0E, + 0x04, 0xC6, 0x41, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: push esi +// 0x00000001: .cfi_def_cfa_offset: 8 +// 0x00000001: .cfi_offset: r6 at cfa-8 +// 0x00000001: push ebp +// 0x00000002: .cfi_def_cfa_offset: 12 +// 0x00000002: .cfi_offset: r5 at cfa-12 +// 0x00000002: sub esp, 52 +// 0x00000005: .cfi_def_cfa_offset: 64 +// 0x00000005: mov [esp], eax +// 0x00000008: .cfi_remember_state +// 0x00000008: add esp, 52 +// 0x0000000b: .cfi_def_cfa_offset: 12 +// 0x0000000b: pop ebp +// 0x0000000c: .cfi_def_cfa_offset: 8 +// 0x0000000c: .cfi_restore: r5 +// 0x0000000c: pop esi +// 0x0000000d: .cfi_def_cfa_offset: 4 +// 0x0000000d: .cfi_restore: r6 +// 0x0000000d: ret +// 0x0000000e: .cfi_restore_state +// 0x0000000e: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kX86_64[] = { + 0x55, 0x53, 0x48, 0x83, 0xEC, 0x28, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24, + 0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0x89, 0x3C, 0x24, 0xF2, + 0x44, 0x0F, 0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24, + 0x20, 0x48, 0x83, 0xC4, 0x28, 0x5B, 0x5D, 0xC3, +}; +static constexpr uint8_t expected_cfi_kX86_64[] = { + 0x41, 0x0E, 0x10, 0x86, 0x04, 0x41, 0x0E, 0x18, 0x83, 0x06, 0x44, 0x0E, + 0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x43, 0x0A, 0x47, 0xDD, 0x47, + 0xDE, 0x44, 0x0E, 0x18, 0x41, 0x0E, 0x10, 0xC3, 0x41, 0x0E, 0x08, 0xC6, + 0x41, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: push rbp +// 0x00000001: .cfi_def_cfa_offset: 16 +// 0x00000001: .cfi_offset: r6 at cfa-16 +// 0x00000001: push rbx +// 0x00000002: .cfi_def_cfa_offset: 24 +// 0x00000002: .cfi_offset: r3 at cfa-24 +// 0x00000002: subq rsp, 40 +// 0x00000006: .cfi_def_cfa_offset: 64 +// 0x00000006: movsd [rsp + 32], xmm13 +// 0x0000000d: .cfi_offset: r30 at cfa-32 +// 0x0000000d: movsd [rsp + 24], xmm12 +// 0x00000014: .cfi_offset: r29 at cfa-40 +// 0x00000014: mov [rsp], edi +// 0x00000017: .cfi_remember_state +// 0x00000017: movsd xmm12, [rsp + 24] +// 0x0000001e: .cfi_restore: r29 +// 0x0000001e: movsd xmm13, [rsp + 32] +// 0x00000025: .cfi_restore: r30 +// 0x00000025: addq rsp, 40 +// 0x00000029: .cfi_def_cfa_offset: 24 +// 0x00000029: pop rbx +// 0x0000002a: .cfi_def_cfa_offset: 16 +// 0x0000002a: .cfi_restore: r3 +// 0x0000002a: pop rbp +// 0x0000002b: .cfi_def_cfa_offset: 8 +// 0x0000002b: .cfi_restore: r6 +// 0x0000002b: ret +// 0x0000002c: .cfi_restore_state +// 0x0000002c: .cfi_def_cfa_offset: 64 + diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 12798edac5..a428c75c8c 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -50,6 +50,7 @@ #include "ssa_builder.h" #include "ssa_phi_elimination.h" #include "ssa_liveness_analysis.h" +#include "utils/assembler.h" #include "reference_type_propagation.h" namespace art { @@ -199,20 +200,6 @@ class OptimizingCompiler FINAL : public Compiler { InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet()))); } - bool WriteElf(art::File* file, - OatWriter* oat_writer, - const std::vector<const art::DexFile*>& dex_files, - const std::string& android_root, - bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) { - return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, - *GetCompilerDriver()); - } else { - return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, - *GetCompilerDriver()); - } - } - void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE; void Init() OVERRIDE; @@ -370,6 +357,9 @@ static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) { return ArrayRef<const uint8_t>(vector); } +// TODO: The function below uses too much stack space. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wframe-larger-than=" CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, CodeGenerator* codegen, @@ -395,12 +385,17 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, CodeVectorAllocator allocator; codegen->CompileOptimized(&allocator); + DefaultSrcMap src_mapping_table; + if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) { + codegen->BuildSourceMap(&src_mapping_table); + } + std::vector<uint8_t> stack_map; codegen->BuildStackMaps(&stack_map); compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized); - return CompiledMethod::SwapAllocCompiledMethodStackMap( + return CompiledMethod::SwapAllocCompiledMethod( compiler_driver, codegen->GetInstructionSet(), ArrayRef<const uint8_t>(allocator.GetMemory()), @@ -410,9 +405,15 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), - ArrayRef<const uint8_t>(stack_map)); + &src_mapping_table, + ArrayRef<const uint8_t>(), // mapping_table. + ArrayRef<const uint8_t>(stack_map), + ArrayRef<const uint8_t>(), // native_gc_map. + ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), + ArrayRef<const LinkerPatch>()); } +#pragma GCC diagnostic pop CompiledMethod* OptimizingCompiler::CompileBaseline( CodeGenerator* codegen, @@ -422,9 +423,11 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( codegen->CompileBaseline(&allocator); std::vector<uint8_t> mapping_table; + codegen->BuildMappingTable(&mapping_table); DefaultSrcMap src_mapping_table; - bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols(); - codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr); + if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) { + codegen->BuildSourceMap(&src_mapping_table); + } std::vector<uint8_t> vmap_table; codegen->BuildVMapTable(&vmap_table); std::vector<uint8_t> gc_map; @@ -445,7 +448,8 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( AlignVectorSize(mapping_table), AlignVectorSize(vmap_table), AlignVectorSize(gc_map), - ArrayRef<const uint8_t>()); + ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), + ArrayRef<const LinkerPatch>()); } CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item, @@ -511,6 +515,8 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen); return nullptr; } + codegen->GetAssembler()->cfi().SetEnabled( + compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()); PassInfoPrinter pass_info_printer(graph, method_name.c_str(), diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index b97a66719d..4d5b8d0639 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -47,6 +47,7 @@ enum MethodCompilationStat { kNotCompiledUnhandledInstruction, kRemovedCheckedCast, kRemovedNullCheck, + kInstructionSimplifications, kLastStat }; @@ -110,6 +111,7 @@ class OptimizingCompilerStats { case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction"; case kRemovedCheckedCast: return "kRemovedCheckedCast"; case kRemovedNullCheck: return "kRemovedNullCheck"; + case kInstructionSimplifications: return "kInstructionSimplifications"; default: LOG(FATAL) << "invalid stat"; } return ""; diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 9df8f5640d..4936685367 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -269,6 +269,20 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked, } +int ParallelMoveResolver::AllocateScratchRegister(int blocked, + int register_count) { + int scratch = -1; + for (int reg = 0; reg < register_count; ++reg) { + if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) { + scratch = reg; + break; + } + } + + return scratch; +} + + ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers) : resolver_(resolver), @@ -282,6 +296,16 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( } +ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope( + ParallelMoveResolver* resolver, int blocked, int number_of_registers) + : resolver_(resolver), + reg_(kNoRegister), + spilled_(false) { + // We don't want to spill a register if none are free. + reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers); +} + + ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() { if (spilled_) { resolver_->RestoreScratch(reg_); diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 3fa1b37afd..173cffc71e 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -42,10 +42,15 @@ class ParallelMoveResolver : public ValueObject { protected: class ScratchRegisterScope : public ValueObject { public: + // Spill a scratch register if no regs are free. ScratchRegisterScope(ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers); + // Grab a scratch register only if available. + ScratchRegisterScope(ParallelMoveResolver* resolver, + int blocked, + int number_of_registers); ~ScratchRegisterScope(); int GetRegister() const { return reg_; } @@ -62,6 +67,8 @@ class ParallelMoveResolver : public ValueObject { // Allocate a scratch register for performing a move. The method will try to use // a register that is the destination of a move, but that move has not been emitted yet. int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled); + // As above, but return -1 if no free register. + int AllocateScratchRegister(int blocked, int register_count); // Emit a move. virtual void EmitMove(size_t index) = 0; diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc index 8059289096..c41066027d 100644 --- a/compiler/utils/arm/assembler_arm.cc +++ b/compiler/utils/arm/assembler_arm.cc @@ -370,40 +370,46 @@ void ArmAssembler::Pad(uint32_t bytes) { } } +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::ArmCore(static_cast<int>(reg)); +} + +static dwarf::Reg DWARFReg(SRegister reg) { + return dwarf::Reg::ArmFp(static_cast<int>(reg)); +} + constexpr size_t kFramePointerSize = 4; void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) { + CHECK_EQ(buffer_.Size(), 0U); // Nothing emitted yet CHECK_ALIGNED(frame_size, kStackAlignment); CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister()); // Push callee saves and link register. - RegList push_list = 1 << LR; - size_t pushed_values = 1; - int32_t min_s = kNumberOfSRegisters; - int32_t max_s = -1; - for (size_t i = 0; i < callee_save_regs.size(); i++) { - if (callee_save_regs.at(i).AsArm().IsCoreRegister()) { - Register reg = callee_save_regs.at(i).AsArm().AsCoreRegister(); - push_list |= 1 << reg; - pushed_values++; + RegList core_spill_mask = 1 << LR; + uint32_t fp_spill_mask = 0; + for (const ManagedRegister& reg : callee_save_regs) { + if (reg.AsArm().IsCoreRegister()) { + core_spill_mask |= 1 << reg.AsArm().AsCoreRegister(); } else { - CHECK(callee_save_regs.at(i).AsArm().IsSRegister()); - min_s = std::min(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), min_s); - max_s = std::max(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), max_s); + fp_spill_mask |= 1 << reg.AsArm().AsSRegister(); } } - PushList(push_list); - if (max_s != -1) { - pushed_values += 1 + max_s - min_s; - vpushs(static_cast<SRegister>(min_s), 1 + max_s - min_s); + PushList(core_spill_mask); + cfi_.AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize); + cfi_.RelOffsetForMany(DWARFReg(Register(0)), 0, core_spill_mask, kFramePointerSize); + if (fp_spill_mask != 0) { + vpushs(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask)); + cfi_.AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize); + cfi_.RelOffsetForMany(DWARFReg(SRegister(0)), 0, fp_spill_mask, kFramePointerSize); } // Increase frame to required size. + int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); CHECK_GT(frame_size, pushed_values * kFramePointerSize); // Must at least have space for Method*. - size_t adjust = frame_size - (pushed_values * kFramePointerSize); - IncreaseFrameSize(adjust); + IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize); // handles CFI as well. // Write out Method*. StoreToOffset(kStoreWord, R0, SP, 0); @@ -432,46 +438,46 @@ void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void ArmAssembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + cfi_.RememberState(); + // Compute callee saves to pop and PC. - RegList pop_list = 1 << PC; - size_t pop_values = 1; - int32_t min_s = kNumberOfSRegisters; - int32_t max_s = -1; - for (size_t i = 0; i < callee_save_regs.size(); i++) { - if (callee_save_regs.at(i).AsArm().IsCoreRegister()) { - Register reg = callee_save_regs.at(i).AsArm().AsCoreRegister(); - pop_list |= 1 << reg; - pop_values++; + RegList core_spill_mask = 1 << PC; + uint32_t fp_spill_mask = 0; + for (const ManagedRegister& reg : callee_save_regs) { + if (reg.AsArm().IsCoreRegister()) { + core_spill_mask |= 1 << reg.AsArm().AsCoreRegister(); } else { - CHECK(callee_save_regs.at(i).AsArm().IsSRegister()); - min_s = std::min(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), min_s); - max_s = std::max(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), max_s); + fp_spill_mask |= 1 << reg.AsArm().AsSRegister(); } } - if (max_s != -1) { - pop_values += 1 + max_s - min_s; - } - // Decrease frame to start of callee saves. + int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask); CHECK_GT(frame_size, pop_values * kFramePointerSize); - size_t adjust = frame_size - (pop_values * kFramePointerSize); - DecreaseFrameSize(adjust); + DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize)); // handles CFI as well. - if (max_s != -1) { - vpops(static_cast<SRegister>(min_s), 1 + max_s - min_s); + if (fp_spill_mask != 0) { + vpops(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask)); + cfi_.AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask)); + cfi_.RestoreMany(DWARFReg(SRegister(0)), fp_spill_mask); } // Pop callee saves and PC. - PopList(pop_list); + PopList(core_spill_mask); + + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size); } void ArmAssembler::IncreaseFrameSize(size_t adjust) { AddConstant(SP, -adjust); + cfi_.AdjustCFAOffset(adjust); } void ArmAssembler::DecreaseFrameSize(size_t adjust) { AddConstant(SP, adjust); + cfi_.AdjustCFAOffset(-adjust); } void ArmAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) { diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 6286b106aa..3b42f63509 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -373,24 +373,34 @@ void Thumb2Assembler::ldrsh(Register rd, const Address& ad, Condition cond) { void Thumb2Assembler::ldrd(Register rd, const Address& ad, Condition cond) { + ldrd(rd, Register(rd + 1), ad, cond); +} + + +void Thumb2Assembler::ldrd(Register rd, Register rd2, const Address& ad, Condition cond) { CheckCondition(cond); - CHECK_EQ(rd % 2, 0); + // Encoding T1. // This is different from other loads. The encoding is like ARM. int32_t encoding = B31 | B30 | B29 | B27 | B22 | B20 | static_cast<int32_t>(rd) << 12 | - (static_cast<int32_t>(rd) + 1) << 8 | + static_cast<int32_t>(rd2) << 8 | ad.encodingThumbLdrdStrd(); Emit32(encoding); } void Thumb2Assembler::strd(Register rd, const Address& ad, Condition cond) { + strd(rd, Register(rd + 1), ad, cond); +} + + +void Thumb2Assembler::strd(Register rd, Register rd2, const Address& ad, Condition cond) { CheckCondition(cond); - CHECK_EQ(rd % 2, 0); + // Encoding T1. // This is different from other loads. The encoding is like ARM. int32_t encoding = B31 | B30 | B29 | B27 | B22 | static_cast<int32_t>(rd) << 12 | - (static_cast<int32_t>(rd) + 1) << 8 | + static_cast<int32_t>(rd2) << 8 | ad.encodingThumbLdrdStrd(); Emit32(encoding); } @@ -2613,14 +2623,16 @@ void Thumb2Assembler::StoreToOffset(StoreOperandType type, Register tmp_reg = kNoRegister; if (!Address::CanHoldStoreOffsetThumb(type, offset)) { CHECK_NE(base, IP); - if (reg != IP) { + if (reg != IP && + (type != kStoreWordPair || reg + 1 != IP)) { tmp_reg = IP; } else { - // Be careful not to use IP twice (for `reg` and to build the - // Address object used by the store instruction(s) below). - // Instead, save R5 on the stack (or R6 if R5 is not available), - // use it as secondary temporary register, and restore it after - // the store instruction has been emitted. + // Be careful not to use IP twice (for `reg` (or `reg` + 1 in + // the case of a word-pair store)) and to build the Address + // object used by the store instruction(s) below). Instead, + // save R5 on the stack (or R6 if R5 is not available), use it + // as secondary temporary register, and restore it after the + // store instruction has been emitted. tmp_reg = base != R5 ? R5 : R6; Push(tmp_reg); if (base == SP) { diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 81dd13894f..e33c240dbf 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -135,9 +135,17 @@ class Thumb2Assembler FINAL : public ArmAssembler { void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; + // Load/store register dual instructions using registers `rd` and `rd` + 1. void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; + // Load/store register dual instructions using registers `rd` and `rd2`. + // Note that contrary to the ARM A1 encoding, the Thumb-2 T1 encoding + // does not require `rd` to be even, nor `rd2' to be equal to `rd` + 1. + void ldrd(Register rd, Register rd2, const Address& ad, Condition cond); + void strd(Register rd, Register rd2, const Address& ad, Condition cond); + + void ldm(BlockAddressMode am, Register base, RegList regs, Condition cond = AL) OVERRIDE; void stm(BlockAddressMode am, Register base, diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 62e0b90e73..5f5561a499 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -291,4 +291,59 @@ TEST_F(AssemblerThumb2Test, StoreWordToNonThumbOffset) { DriverStr(expected, "StoreWordToNonThumbOffset"); } +TEST_F(AssemblerThumb2Test, StoreWordPairToThumbOffset) { + arm::StoreOperandType type = arm::kStoreWordPair; + int32_t offset = 1020; + ASSERT_TRUE(arm::Address::CanHoldStoreOffsetThumb(type, offset)); + + __ StoreToOffset(type, arm::R0, arm::SP, offset); + // We cannot use IP (i.e. R12) as first source register, as it would + // force us to use SP (i.e. R13) as second source register, which + // would have an "unpredictable" effect according to the ARMv7 + // specification (the T1 encoding describes the result as + // UNPREDICTABLE when of the source registers is R13). + // + // So we use (R11, IP) (e.g. (R11, R12)) as source registers in the + // following instructions. + __ StoreToOffset(type, arm::R11, arm::SP, offset); + __ StoreToOffset(type, arm::R11, arm::R5, offset); + + const char* expected = + "strd r0, r1, [sp, #1020]\n" + "strd r11, ip, [sp, #1020]\n" + "strd r11, ip, [r5, #1020]\n"; + DriverStr(expected, "StoreWordPairToThumbOffset"); +} + +TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) { + arm::StoreOperandType type = arm::kStoreWordPair; + int32_t offset = 1024; + ASSERT_FALSE(arm::Address::CanHoldStoreOffsetThumb(type, offset)); + + __ StoreToOffset(type, arm::R0, arm::SP, offset); + // Same comment as in AssemblerThumb2Test.StoreWordPairToThumbOffset + // regarding the use of (R11, IP) (e.g. (R11, R12)) as source + // registers in the following instructions. + __ StoreToOffset(type, arm::R11, arm::SP, offset); + __ StoreToOffset(type, arm::R11, arm::R5, offset); + + const char* expected = + "mov ip, #1024\n" // LoadImmediate(ip, 1024) + "add ip, ip, sp\n" + "strd r0, r1, [ip, #0]\n" + + "str r5, [sp, #-4]!\n" // Push(r5) + "movw r5, #1028\n" // LoadImmediate(r5, 1024 + kRegisterSize) + "add r5, r5, sp\n" + "strd r11, ip, [r5, #0]\n" + "ldr r5, [sp], #4\n" // Pop(r5) + + "str r6, [sp, #-4]!\n" // Push(r6) + "mov r6, #1024\n" // LoadImmediate(r6, 1024) + "add r6, r6, r5\n" + "strd r11, ip, [r6, #0]\n" + "ldr r6, [sp], #4\n"; // Pop(r6) + DriverStr(expected, "StoreWordPairToNonThumbOffset"); +} + } // namespace art diff --git a/compiler/utils/arm/managed_register_arm.h b/compiler/utils/arm/managed_register_arm.h index a496c87150..5fde9e8856 100644 --- a/compiler/utils/arm/managed_register_arm.h +++ b/compiler/utils/arm/managed_register_arm.h @@ -19,6 +19,7 @@ #include "base/logging.h" #include "constants_arm.h" +#include "dwarf/register.h" #include "utils/managed_register.h" namespace art { diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index 58c73674da..fbd04114e4 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -63,12 +63,14 @@ void Arm64Assembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scr void Arm64Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); AddConstant(SP, -adjust); + cfi().AdjustCFAOffset(adjust); } // See Arm64 PCS Section 5.2.2.1. void Arm64Assembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); AddConstant(SP, adjust); + cfi().AdjustCFAOffset(-adjust); } void Arm64Assembler::AddConstant(XRegister rd, int32_t value, Condition cond) { @@ -638,6 +640,14 @@ void Arm64Assembler::EmitExceptionPoll(Arm64Exception *exception) { ___ Brk(); } +static dwarf::Reg DWARFReg(XRegister reg) { + return dwarf::Reg::Arm64Core(static_cast<int>(reg)); +} + +static dwarf::Reg DWARFReg(DRegister reg) { + return dwarf::Reg::Arm64Fp(static_cast<int>(reg)); +} + constexpr size_t kFramePointerSize = 8; constexpr unsigned int kJniRefSpillRegsSize = 11 + 8; @@ -660,45 +670,20 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, // TUNING: Use stp. // Note: Must match Arm64JniCallingConvention::CoreSpillMask(). size_t reg_offset = frame_size; - reg_offset -= 8; - StoreToOffset(LR, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X29, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X28, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X27, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X26, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X25, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X24, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X23, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X22, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X21, SP, reg_offset); - reg_offset -= 8; - StoreToOffset(X20, SP, reg_offset); - - reg_offset -= 8; - StoreDToOffset(D15, SP, reg_offset); - reg_offset -= 8; - StoreDToOffset(D14, SP, reg_offset); - reg_offset -= 8; - StoreDToOffset(D13, SP, reg_offset); - reg_offset -= 8; - StoreDToOffset(D12, SP, reg_offset); - reg_offset -= 8; - StoreDToOffset(D11, SP, reg_offset); - reg_offset -= 8; - StoreDToOffset(D10, SP, reg_offset); - reg_offset -= 8; - StoreDToOffset(D9, SP, reg_offset); - reg_offset -= 8; - StoreDToOffset(D8, SP, reg_offset); + static constexpr XRegister x_spills[] = { + LR, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20 }; + for (size_t i = 0; i < arraysize(x_spills); i++) { + XRegister reg = x_spills[i]; + reg_offset -= 8; + StoreToOffset(reg, SP, reg_offset); + cfi_.RelOffset(DWARFReg(reg), reg_offset); + } + for (int d = 15; d >= 8; d--) { + DRegister reg = static_cast<DRegister>(d); + reg_offset -= 8; + StoreDToOffset(reg, SP, reg_offset); + cfi_.RelOffset(DWARFReg(reg), reg_offset); + } // Move TR(Caller saved) to ETR(Callee saved). The original (ETR)X21 has been saved on stack. // This way we make sure that TR is not trashed by native code. @@ -734,6 +719,7 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void Arm64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + cfi_.RememberState(); // For now we only check that the size of the frame is greater than the spill size. CHECK_EQ(callee_save_regs.size(), kJniRefSpillRegsSize); @@ -748,51 +734,30 @@ void Arm64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedReg // TUNING: Use ldp. // Note: Must match Arm64JniCallingConvention::CoreSpillMask(). size_t reg_offset = frame_size; - reg_offset -= 8; - LoadFromOffset(LR, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X29, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X28, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X27, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X26, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X25, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X24, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X23, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X22, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X21, SP, reg_offset); - reg_offset -= 8; - LoadFromOffset(X20, SP, reg_offset); - - reg_offset -= 8; - LoadDFromOffset(D15, SP, reg_offset); - reg_offset -= 8; - LoadDFromOffset(D14, SP, reg_offset); - reg_offset -= 8; - LoadDFromOffset(D13, SP, reg_offset); - reg_offset -= 8; - LoadDFromOffset(D12, SP, reg_offset); - reg_offset -= 8; - LoadDFromOffset(D11, SP, reg_offset); - reg_offset -= 8; - LoadDFromOffset(D10, SP, reg_offset); - reg_offset -= 8; - LoadDFromOffset(D9, SP, reg_offset); - reg_offset -= 8; - LoadDFromOffset(D8, SP, reg_offset); + static constexpr XRegister x_spills[] = { + LR, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20 }; + for (size_t i = 0; i < arraysize(x_spills); i++) { + XRegister reg = x_spills[i]; + reg_offset -= 8; + LoadFromOffset(reg, SP, reg_offset); + cfi_.Restore(DWARFReg(reg)); + } + for (int d = 15; d >= 8; d--) { + DRegister reg = static_cast<DRegister>(d); + reg_offset -= 8; + LoadDFromOffset(reg, SP, reg_offset); + cfi_.Restore(DWARFReg(reg)); + } // Decrease frame size to start of callee saved regs. DecreaseFrameSize(frame_size); // Pop callee saved and return to LR. ___ Ret(); + + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size); } } // namespace arm64 diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h index e1d6f3179d..62c1d4dbee 100644 --- a/compiler/utils/arm64/managed_register_arm64.h +++ b/compiler/utils/arm64/managed_register_arm64.h @@ -19,6 +19,7 @@ #include "base/logging.h" #include "constants_arm64.h" +#include "dwarf/register.h" #include "utils/managed_register.h" namespace art { diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc index 5340dd3a25..36342c61c5 100644 --- a/compiler/utils/assembler.cc +++ b/compiler/utils/assembler.cc @@ -105,6 +105,9 @@ void AssemblerBuffer::ExtendCapacity() { CHECK_EQ(Size(), old_size); } +void DebugFrameOpCodeWriterForAssembler::ImplicitlyAdvancePC() { + this->AdvancePC(assembler_->CodeSize()); +} Assembler* Assembler::Create(InstructionSet instruction_set) { switch (instruction_set) { diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 323f93cb42..ebafd3dd1e 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -29,6 +29,7 @@ #include "offsets.h" #include "x86/constants_x86.h" #include "x86_64/constants_x86_64.h" +#include "dwarf/debug_frame_opcode_writer.h" namespace art { @@ -354,6 +355,23 @@ class AssemblerBuffer { friend class AssemblerFixup; }; +// The purpose of this class is to ensure that we do not have to explicitly +// call the AdvancePC method (which is good for convenience and correctness). +class DebugFrameOpCodeWriterForAssembler FINAL + : public dwarf::DebugFrameOpCodeWriter<> { + public: + // This method is called the by the opcode writers. + virtual void ImplicitlyAdvancePC() FINAL; + + explicit DebugFrameOpCodeWriterForAssembler(Assembler* buffer) + : dwarf::DebugFrameOpCodeWriter<>(), + assembler_(buffer) { + } + + private: + Assembler* assembler_; +}; + class Assembler { public: static Assembler* Create(InstructionSet instruction_set); @@ -506,10 +524,18 @@ class Assembler { virtual ~Assembler() {} + /** + * @brief Buffer of DWARF's Call Frame Information opcodes. + * @details It is used by debuggers and other tools to unwind the call stack. + */ + DebugFrameOpCodeWriterForAssembler& cfi() { return cfi_; } + protected: - Assembler() : buffer_() {} + Assembler() : buffer_(), cfi_(this) {} AssemblerBuffer buffer_; + + DebugFrameOpCodeWriterForAssembler cfi_; }; } // namespace art diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index b13edb68bf..3fe1a31d70 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -44,7 +44,9 @@ static std::string tmpnam_; enum class RegisterView { // private kUsePrimaryName, - kUseSecondaryName + kUseSecondaryName, + kUseTertiaryName, + kUseQuaternaryName, }; template<typename Ass, typename Reg, typename FPReg, typename Imm> @@ -97,6 +99,15 @@ class AssemblerTest : public testing::Test { fmt); } + std::string Repeatrb(void (Ass::*f)(Reg, Reg), std::string fmt) { + return RepeatTemplatedRegisters<Reg, Reg>(f, + GetRegisters(), + GetRegisters(), + &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>, + &AssemblerTest::GetRegName<RegisterView::kUseQuaternaryName>, + fmt); + } + std::string RepeatRr(void (Ass::*f)(Reg, Reg), std::string fmt) { return RepeatTemplatedRegisters<Reg, Reg>(f, GetRegisters(), @@ -240,6 +251,18 @@ class AssemblerTest : public testing::Test { UNREACHABLE(); } + // Tertiary register names are the tertiary view on registers, e.g., 16b on 64b systems. + virtual std::string GetTertiaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "Architecture does not support tertiary registers"; + UNREACHABLE(); + } + + // Quaternary register names are the quaternary view on registers, e.g., 8b on 64b systems. + virtual std::string GetQuaternaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "Architecture does not support quaternary registers"; + UNREACHABLE(); + } + std::string GetRegisterName(const Reg& reg) { return GetRegName<RegisterView::kUsePrimaryName>(reg); } @@ -520,6 +543,14 @@ class AssemblerTest : public testing::Test { case RegisterView::kUseSecondaryName: sreg << GetSecondaryRegisterName(reg); break; + + case RegisterView::kUseTertiaryName: + sreg << GetTertiaryRegisterName(reg); + break; + + case RegisterView::kUseQuaternaryName: + sreg << GetQuaternaryRegisterName(reg); + break; } return sreg.str(); } diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index b5437b0eda..709a911f6a 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -536,6 +536,10 @@ void MipsAssembler::StoreDToOffset(DRegister reg, Register base, int32_t offset) Sdc1(reg, base, offset); } +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::MipsCore(static_cast<int>(reg)); +} + constexpr size_t kFramePointerSize = 4; void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, @@ -549,10 +553,12 @@ void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, // Push callee saves and return address int stack_offset = frame_size - kFramePointerSize; StoreToOffset(kStoreWord, RA, SP, stack_offset); + cfi_.RelOffset(DWARFReg(RA), stack_offset); for (int i = callee_save_regs.size() - 1; i >= 0; --i) { stack_offset -= kFramePointerSize; Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister(); StoreToOffset(kStoreWord, reg, SP, stack_offset); + cfi_.RelOffset(DWARFReg(reg), stack_offset); } // Write out Method*. @@ -568,31 +574,40 @@ void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void MipsAssembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + cfi_.RememberState(); // Pop callee saves and return address int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize; for (size_t i = 0; i < callee_save_regs.size(); ++i) { Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister(); LoadFromOffset(kLoadWord, reg, SP, stack_offset); + cfi_.Restore(DWARFReg(reg)); stack_offset += kFramePointerSize; } LoadFromOffset(kLoadWord, RA, SP, stack_offset); + cfi_.Restore(DWARFReg(RA)); // Decrease frame to required size. DecreaseFrameSize(frame_size); // Then jump to the return address. Jr(RA); + + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size); } void MipsAssembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); AddConstant(SP, SP, -adjust); + cfi_.AdjustCFAOffset(adjust); } void MipsAssembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); AddConstant(SP, SP, adjust); + cfi_.AdjustCFAOffset(-adjust); } void MipsAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) { diff --git a/compiler/utils/mips/managed_register_mips.h b/compiler/utils/mips/managed_register_mips.h index dd55cc4e6a..40d39e3386 100644 --- a/compiler/utils/mips/managed_register_mips.h +++ b/compiler/utils/mips/managed_register_mips.h @@ -18,6 +18,7 @@ #define ART_COMPILER_UTILS_MIPS_MANAGED_REGISTER_MIPS_H_ #include "constants_mips.h" +#include "dwarf/register.h" #include "utils/managed_register.h" namespace art { diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 388d274894..282ab96ce4 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -568,6 +568,10 @@ void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, G } } +static dwarf::Reg DWARFReg(GpuRegister reg) { + return dwarf::Reg::Mips64Core(static_cast<int>(reg)); +} + constexpr size_t kFramePointerSize = 8; void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, @@ -581,10 +585,12 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, // Push callee saves and return address int stack_offset = frame_size - kFramePointerSize; StoreToOffset(kStoreDoubleword, RA, SP, stack_offset); + cfi_.RelOffset(DWARFReg(RA), stack_offset); for (int i = callee_save_regs.size() - 1; i >= 0; --i) { stack_offset -= kFramePointerSize; GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister(); StoreToOffset(kStoreDoubleword, reg, SP, stack_offset); + cfi_.RelOffset(DWARFReg(reg), stack_offset); } // Write out Method*. @@ -612,31 +618,40 @@ void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void Mips64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + cfi_.RememberState(); // Pop callee saves and return address int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize; for (size_t i = 0; i < callee_save_regs.size(); ++i) { GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister(); LoadFromOffset(kLoadDoubleword, reg, SP, stack_offset); + cfi_.Restore(DWARFReg(reg)); stack_offset += kFramePointerSize; } LoadFromOffset(kLoadDoubleword, RA, SP, stack_offset); + cfi_.Restore(DWARFReg(RA)); // Decrease frame to required size. DecreaseFrameSize(frame_size); // Then jump to the return address. Jr(RA); + + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size); } void Mips64Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); AddConstant64(SP, SP, -adjust); + cfi_.AdjustCFAOffset(adjust); } void Mips64Assembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); AddConstant64(SP, SP, adjust); + cfi_.AdjustCFAOffset(-adjust); } void Mips64Assembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) { diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h index 924a928389..4c4705bbfb 100644 --- a/compiler/utils/mips64/managed_register_mips64.h +++ b/compiler/utils/mips64/managed_register_mips64.h @@ -18,6 +18,7 @@ #define ART_COMPILER_UTILS_MIPS64_MANAGED_REGISTER_MIPS64_H_ #include "constants_mips64.h" +#include "dwarf/register.h" #include "utils/managed_register.h" namespace art { diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 4cca529258..f8bba07f84 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1285,32 +1285,62 @@ void X86Assembler::decl(const Address& address) { void X86Assembler::shll(Register reg, const Immediate& imm) { - EmitGenericShift(4, reg, imm); + EmitGenericShift(4, Operand(reg), imm); } void X86Assembler::shll(Register operand, Register shifter) { - EmitGenericShift(4, operand, shifter); + EmitGenericShift(4, Operand(operand), shifter); +} + + +void X86Assembler::shll(const Address& address, const Immediate& imm) { + EmitGenericShift(4, address, imm); +} + + +void X86Assembler::shll(const Address& address, Register shifter) { + EmitGenericShift(4, address, shifter); } void X86Assembler::shrl(Register reg, const Immediate& imm) { - EmitGenericShift(5, reg, imm); + EmitGenericShift(5, Operand(reg), imm); } void X86Assembler::shrl(Register operand, Register shifter) { - EmitGenericShift(5, operand, shifter); + EmitGenericShift(5, Operand(operand), shifter); +} + + +void X86Assembler::shrl(const Address& address, const Immediate& imm) { + EmitGenericShift(5, address, imm); +} + + +void X86Assembler::shrl(const Address& address, Register shifter) { + EmitGenericShift(5, address, shifter); } void X86Assembler::sarl(Register reg, const Immediate& imm) { - EmitGenericShift(7, reg, imm); + EmitGenericShift(7, Operand(reg), imm); } void X86Assembler::sarl(Register operand, Register shifter) { - EmitGenericShift(7, operand, shifter); + EmitGenericShift(7, Operand(operand), shifter); +} + + +void X86Assembler::sarl(const Address& address, const Immediate& imm) { + EmitGenericShift(7, address, imm); +} + + +void X86Assembler::sarl(const Address& address, Register shifter) { + EmitGenericShift(7, address, shifter); } @@ -1323,6 +1353,15 @@ void X86Assembler::shld(Register dst, Register src, Register shifter) { } +void X86Assembler::shld(Register dst, Register src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0xA4); + EmitRegisterOperand(src, dst); + EmitUint8(imm.value() & 0xFF); +} + + void X86Assembler::shrd(Register dst, Register src, Register shifter) { DCHECK_EQ(ECX, shifter); AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -1332,6 +1371,15 @@ void X86Assembler::shrd(Register dst, Register src, Register shifter) { } +void X86Assembler::shrd(Register dst, Register src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0xAC); + EmitRegisterOperand(src, dst); + EmitUint8(imm.value() & 0xFF); +} + + void X86Assembler::negl(Register reg) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF7); @@ -1615,28 +1663,32 @@ void X86Assembler::EmitLabelLink(Label* label) { void X86Assembler::EmitGenericShift(int reg_or_opcode, - Register reg, + const Operand& operand, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK(imm.is_int8()); if (imm.value() == 1) { EmitUint8(0xD1); - EmitOperand(reg_or_opcode, Operand(reg)); + EmitOperand(reg_or_opcode, operand); } else { EmitUint8(0xC1); - EmitOperand(reg_or_opcode, Operand(reg)); + EmitOperand(reg_or_opcode, operand); EmitUint8(imm.value() & 0xFF); } } void X86Assembler::EmitGenericShift(int reg_or_opcode, - Register operand, + const Operand& operand, Register shifter) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK_EQ(shifter, ECX); EmitUint8(0xD3); - EmitOperand(reg_or_opcode, Operand(operand)); + EmitOperand(reg_or_opcode, operand); +} + +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::X86Core(static_cast<int>(reg)); } constexpr size_t kFramePointerSize = 4; @@ -1645,12 +1697,15 @@ void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& spill_regs, const ManagedRegisterEntrySpills& entry_spills) { DCHECK_EQ(buffer_.Size(), 0U); // Nothing emitted yet. + cfi_.SetCurrentCFAOffset(4); // Return address on stack. CHECK_ALIGNED(frame_size, kStackAlignment); int gpr_count = 0; for (int i = spill_regs.size() - 1; i >= 0; --i) { Register spill = spill_regs.at(i).AsX86().AsCpuRegister(); pushl(spill); gpr_count++; + cfi_.AdjustCFAOffset(kFramePointerSize); + cfi_.RelOffset(DWARFReg(spill), 0); } // return address then method on stack. @@ -1658,7 +1713,10 @@ void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, sizeof(StackReference<mirror::ArtMethod>) /*method*/ - kFramePointerSize /*return address*/; addl(ESP, Immediate(-adjust)); + cfi_.AdjustCFAOffset(adjust); pushl(method_reg.AsX86().AsCpuRegister()); + cfi_.AdjustCFAOffset(kFramePointerSize); + DCHECK_EQ(static_cast<size_t>(cfi_.GetCurrentCFAOffset()), frame_size); for (size_t i = 0; i < entry_spills.size(); ++i) { ManagedRegisterSpill spill = entry_spills.at(i); @@ -1680,25 +1738,33 @@ void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void X86Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& spill_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + cfi_.RememberState(); int adjust = frame_size - (spill_regs.size() * kFramePointerSize) - sizeof(StackReference<mirror::ArtMethod>); addl(ESP, Immediate(adjust)); + cfi_.AdjustCFAOffset(-adjust); for (size_t i = 0; i < spill_regs.size(); ++i) { - x86::X86ManagedRegister spill = spill_regs.at(i).AsX86(); - DCHECK(spill.IsCpuRegister()); - popl(spill.AsCpuRegister()); + Register spill = spill_regs.at(i).AsX86().AsCpuRegister(); + popl(spill); + cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize)); + cfi_.Restore(DWARFReg(spill)); } ret(); + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size); } void X86Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); addl(ESP, Immediate(-adjust)); + cfi_.AdjustCFAOffset(adjust); } void X86Assembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); addl(ESP, Immediate(adjust)); + cfi_.AdjustCFAOffset(-adjust); } void X86Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) { diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index f3675aeceb..37acb6ef16 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -429,12 +429,20 @@ class X86Assembler FINAL : public Assembler { void shll(Register reg, const Immediate& imm); void shll(Register operand, Register shifter); + void shll(const Address& address, const Immediate& imm); + void shll(const Address& address, Register shifter); void shrl(Register reg, const Immediate& imm); void shrl(Register operand, Register shifter); + void shrl(const Address& address, const Immediate& imm); + void shrl(const Address& address, Register shifter); void sarl(Register reg, const Immediate& imm); void sarl(Register operand, Register shifter); + void sarl(const Address& address, const Immediate& imm); + void sarl(const Address& address, Register shifter); void shld(Register dst, Register src, Register shifter); + void shld(Register dst, Register src, const Immediate& imm); void shrd(Register dst, Register src, Register shifter); + void shrd(Register dst, Register src, const Immediate& imm); void negl(Register reg); void notl(Register reg); @@ -619,8 +627,8 @@ class X86Assembler FINAL : public Assembler { void EmitLabelLink(Label* label); void EmitNearLabelLink(Label* label); - void EmitGenericShift(int rm, Register reg, const Immediate& imm); - void EmitGenericShift(int rm, Register operand, Register shifter); + void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm); + void EmitGenericShift(int rm, const Operand& operand, Register shifter); DISALLOW_COPY_AND_ASSIGN(X86Assembler); }; diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h index 09d2b4919d..4e8c41e217 100644 --- a/compiler/utils/x86/managed_register_x86.h +++ b/compiler/utils/x86/managed_register_x86.h @@ -18,6 +18,7 @@ #define ART_COMPILER_UTILS_X86_MANAGED_REGISTER_X86_H_ #include "constants_x86.h" +#include "dwarf/register.h" #include "utils/managed_register.h" namespace art { diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 2e0d9e1840..638659d635 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -209,7 +209,9 @@ void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) { void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitOptionalByteRegNormalizingRex32(dst, src); + // Byte register is only in the source register form, so we don't use + // EmitOptionalByteRegNormalizingRex32(dst, src); + EmitOptionalRex32(dst, src); EmitUint8(0x0F); EmitUint8(0xB6); EmitOperand(dst.LowBits(), src); @@ -227,7 +229,9 @@ void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) { void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitOptionalByteRegNormalizingRex32(dst, src); + // Byte register is only in the source register form, so we don't use + // EmitOptionalByteRegNormalizingRex32(dst, src); + EmitOptionalRex32(dst, src); EmitUint8(0x0F); EmitUint8(0xBE); EmitOperand(dst.LowBits(), src); @@ -1620,6 +1624,14 @@ void X86_64Assembler::imull(CpuRegister reg) { } +void X86_64Assembler::imulq(CpuRegister reg) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitRex64(reg); + EmitUint8(0xF7); + EmitOperand(5, Operand(reg)); +} + + void X86_64Assembler::imull(const Address& address) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(address); @@ -1967,6 +1979,10 @@ void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) for (int i = 1; i < length; i++) { EmitUint8(operand.encoding_[i]); } + AssemblerFixup* fixup = operand.GetFixup(); + if (fixup != nullptr) { + EmitFixup(fixup); + } } @@ -2165,11 +2181,18 @@ void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) { } void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) { - EmitOptionalRex(true, false, dst.NeedsRex(), false, src.NeedsRex()); + // For src, SPL, BPL, SIL, DIL need the rex prefix. + bool force = src.AsRegister() > 3; + EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex()); } void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) { - uint8_t rex = 0x40 | operand.rex(); // REX.0000 + uint8_t rex = operand.rex(); + // For dst, SPL, BPL, SIL, DIL need the rex prefix. + bool force = dst.AsRegister() > 3; + if (force) { + rex |= 0x40; // REX.0000 + } if (dst.NeedsRex()) { rex |= 0x44; // REX.0R00 } @@ -2178,12 +2201,20 @@ void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const } } +static dwarf::Reg DWARFReg(Register reg) { + return dwarf::Reg::X86_64Core(static_cast<int>(reg)); +} +static dwarf::Reg DWARFReg(FloatRegister reg) { + return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); +} + constexpr size_t kFramePointerSize = 8; void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& spill_regs, const ManagedRegisterEntrySpills& entry_spills) { DCHECK_EQ(buffer_.Size(), 0U); // Nothing emitted yet. + cfi_.SetCurrentCFAOffset(8); // Return address on stack. CHECK_ALIGNED(frame_size, kStackAlignment); int gpr_count = 0; for (int i = spill_regs.size() - 1; i >= 0; --i) { @@ -2191,6 +2222,8 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, if (spill.IsCpuRegister()) { pushq(spill.AsCpuRegister()); gpr_count++; + cfi_.AdjustCFAOffset(kFramePointerSize); + cfi_.RelOffset(DWARFReg(spill.AsCpuRegister().AsRegister()), 0); } } // return address then method on stack. @@ -2198,6 +2231,7 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, - (gpr_count * kFramePointerSize) - kFramePointerSize /*return address*/; subq(CpuRegister(RSP), Immediate(rest_of_frame)); + cfi_.AdjustCFAOffset(rest_of_frame); // spill xmms int64_t offset = rest_of_frame; @@ -2206,6 +2240,7 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, if (spill.IsXmmRegister()) { offset -= sizeof(double); movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister()); + cfi_.RelOffset(DWARFReg(spill.AsXmmRegister().AsFloatRegister()), offset); } } @@ -2237,6 +2272,7 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void X86_64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& spill_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); + cfi_.RememberState(); int gpr_count = 0; // unspill xmms int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize; @@ -2245,28 +2281,38 @@ void X86_64Assembler::RemoveFrame(size_t frame_size, if (spill.IsXmmRegister()) { offset += sizeof(double); movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset)); + cfi_.Restore(DWARFReg(spill.AsXmmRegister().AsFloatRegister())); } else { gpr_count++; } } - addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize)); + int adjust = static_cast<int>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize; + addq(CpuRegister(RSP), Immediate(adjust)); + cfi_.AdjustCFAOffset(-adjust); for (size_t i = 0; i < spill_regs.size(); ++i) { x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64(); if (spill.IsCpuRegister()) { popq(spill.AsCpuRegister()); + cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize)); + cfi_.Restore(DWARFReg(spill.AsCpuRegister().AsRegister())); } } ret(); + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size); } void X86_64Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust))); + cfi_.AdjustCFAOffset(adjust); } void X86_64Assembler::DecreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); addq(CpuRegister(RSP), Immediate(adjust)); + cfi_.AdjustCFAOffset(-adjust); } void X86_64Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) { @@ -2694,5 +2740,55 @@ void X86_64ExceptionSlowPath::Emit(Assembler *sasm) { #undef __ } +void X86_64Assembler::AddConstantArea() { + const std::vector<int32_t>& area = constant_area_.GetBuffer(); + for (size_t i = 0, e = area.size(); i < e; i++) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitInt32(area[i]); + } +} + +int ConstantArea::AddInt32(int32_t v) { + for (size_t i = 0, e = buffer_.size(); i < e; i++) { + if (v == buffer_[i]) { + return i * elem_size_; + } + } + + // Didn't match anything. + int result = buffer_.size() * elem_size_; + buffer_.push_back(v); + return result; +} + +int ConstantArea::AddInt64(int64_t v) { + int32_t v_low = v; + int32_t v_high = v >> 32; + if (buffer_.size() > 1) { + // Ensure we don't pass the end of the buffer. + for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) { + if (v_low == buffer_[i] && v_high == buffer_[i + 1]) { + return i * elem_size_; + } + } + } + + // Didn't match anything. + int result = buffer_.size() * elem_size_; + buffer_.push_back(v_low); + buffer_.push_back(v_high); + return result; +} + +int ConstantArea::AddDouble(double v) { + // Treat the value as a 64-bit integer value. + return AddInt64(bit_cast<int64_t, double>(v)); +} + +int ConstantArea::AddFloat(float v) { + // Treat the value as a 32-bit integer value. + return AddInt32(bit_cast<int32_t, float>(v)); +} + } // namespace x86_64 } // namespace art diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index a786a6cbff..15b8b15c74 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -97,9 +97,13 @@ class Operand : public ValueObject { && (reg.NeedsRex() == ((rex_ & 1) != 0)); // REX.000B bits match. } + AssemblerFixup* GetFixup() const { + return fixup_; + } + protected: // Operand can be sub classed (e.g: Address). - Operand() : rex_(0), length_(0) { } + Operand() : rex_(0), length_(0), fixup_(nullptr) { } void SetModRM(uint8_t mod_in, CpuRegister rm_in) { CHECK_EQ(mod_in & ~3, 0); @@ -136,12 +140,17 @@ class Operand : public ValueObject { length_ += disp_size; } + void SetFixup(AssemblerFixup* fixup) { + fixup_ = fixup; + } + private: uint8_t rex_; uint8_t length_; uint8_t encoding_[6]; + AssemblerFixup* fixup_; - explicit Operand(CpuRegister reg) : rex_(0), length_(0) { SetModRM(3, reg); } + explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); } // Get the operand encoding byte at the given index. uint8_t encoding_at(int index_in) const { @@ -226,12 +235,25 @@ class Address : public Operand { result.SetSIB(TIMES_1, CpuRegister(RSP), CpuRegister(RBP)); result.SetDisp32(addr); } else { + // RIP addressing is done using RBP as the base register. + // The value in RBP isn't used. Instead the offset is added to RIP. result.SetModRM(0, CpuRegister(RBP)); result.SetDisp32(addr); } return result; } + // An RIP relative address that will be fixed up later. + static Address RIP(AssemblerFixup* fixup) { + Address result; + // RIP addressing is done using RBP as the base register. + // The value in RBP isn't used. Instead the offset is added to RIP. + result.SetModRM(0, CpuRegister(RBP)); + result.SetDisp32(0); + result.SetFixup(fixup); + return result; + } + // If no_rip is true then the Absolute address isn't RIP relative. static Address Absolute(ThreadOffset<8> addr, bool no_rip = false) { return Absolute(addr.Int32Value(), no_rip); @@ -242,6 +264,43 @@ class Address : public Operand { }; +/** + * Class to handle constant area values. + */ +class ConstantArea { + public: + ConstantArea() {} + + // Add a double to the constant area, returning the offset into + // the constant area where the literal resides. + int AddDouble(double v); + + // Add a float to the constant area, returning the offset into + // the constant area where the literal resides. + int AddFloat(float v); + + // Add an int32_t to the constant area, returning the offset into + // the constant area where the literal resides. + int AddInt32(int32_t v); + + // Add an int64_t to the constant area, returning the offset into + // the constant area where the literal resides. + int AddInt64(int64_t v); + + int GetSize() const { + return buffer_.size() * elem_size_; + } + + const std::vector<int32_t>& GetBuffer() const { + return buffer_; + } + + private: + static constexpr size_t elem_size_ = sizeof(int32_t); + std::vector<int32_t> buffer_; +}; + + class X86_64Assembler FINAL : public Assembler { public: X86_64Assembler() {} @@ -468,6 +527,7 @@ class X86_64Assembler FINAL : public Assembler { void imull(CpuRegister reg, const Immediate& imm); void imull(CpuRegister reg, const Address& address); + void imulq(CpuRegister src); void imulq(CpuRegister dst, CpuRegister src); void imulq(CpuRegister reg, const Immediate& imm); void imulq(CpuRegister reg, const Address& address); @@ -668,6 +728,28 @@ class X86_64Assembler FINAL : public Assembler { // and branch to a ExceptionSlowPath if it is. void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; + // Add a double to the constant area, returning the offset into + // the constant area where the literal resides. + int AddDouble(double v) { return constant_area_.AddDouble(v); } + + // Add a float to the constant area, returning the offset into + // the constant area where the literal resides. + int AddFloat(float v) { return constant_area_.AddFloat(v); } + + // Add an int32_t to the constant area, returning the offset into + // the constant area where the literal resides. + int AddInt32(int32_t v) { return constant_area_.AddInt32(v); } + + // Add an int64_t to the constant area, returning the offset into + // the constant area where the literal resides. + int AddInt64(int64_t v) { return constant_area_.AddInt64(v); } + + // Add the contents of the constant area to the assembler buffer. + void AddConstantArea(); + + // Is the constant area empty? Return true if there are no literals in the constant area. + bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; } + private: void EmitUint8(uint8_t value); void EmitInt32(int32_t value); @@ -713,6 +795,8 @@ class X86_64Assembler FINAL : public Assembler { void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src); void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand); + ConstantArea constant_area_; + DISALLOW_COPY_AND_ASSIGN(X86_64Assembler); }; diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index a79bd09687..116190a832 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -174,6 +174,40 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, x86_64 secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14d"); secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15d"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "ax"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "bx"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "cx"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDX), "dx"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBP), "bp"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSP), "sp"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSI), "si"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDI), "di"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R8), "r8w"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R9), "r9w"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R10), "r10w"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R11), "r11w"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R12), "r12w"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13w"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14w"); + tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15w"); + + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "al"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "bl"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "cl"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDX), "dl"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBP), "bpl"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSP), "spl"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSI), "sil"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDI), "dil"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R8), "r8b"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R9), "r9b"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R10), "r10b"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R11), "r11b"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R12), "r12b"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13b"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14b"); + quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15b"); + fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM0)); fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM1)); fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM2)); @@ -216,9 +250,21 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, x86_64 return secondary_register_names_[reg]; } + std::string GetTertiaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE { + CHECK(tertiary_register_names_.find(reg) != tertiary_register_names_.end()); + return tertiary_register_names_[reg]; + } + + std::string GetQuaternaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE { + CHECK(quaternary_register_names_.find(reg) != quaternary_register_names_.end()); + return quaternary_register_names_[reg]; + } + private: std::vector<x86_64::CpuRegister*> registers_; std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> secondary_register_names_; + std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> tertiary_register_names_; + std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> quaternary_register_names_; std::vector<x86_64::XmmRegister*> fp_registers_; }; @@ -269,6 +315,10 @@ TEST_F(AssemblerX86_64Test, AddlImm) { DriverStr(Repeatri(&x86_64::X86_64Assembler::addl, 4U, "add ${imm}, %{reg}"), "addli"); } +TEST_F(AssemblerX86_64Test, ImulqReg1) { + DriverStr(RepeatR(&x86_64::X86_64Assembler::imulq, "imulq %{reg}"), "imulq"); +} + TEST_F(AssemblerX86_64Test, ImulqRegs) { DriverStr(RepeatRR(&x86_64::X86_64Assembler::imulq, "imulq %{reg2}, %{reg1}"), "imulq"); } @@ -874,31 +924,12 @@ std::string setcc_test_fn(AssemblerX86_64Test::Base* assembler_test, "l", "ge", "le" }; std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters(); - - std::string byte_regs[16]; - byte_regs[x86_64::RAX] = "al"; - byte_regs[x86_64::RBX] = "bl"; - byte_regs[x86_64::RCX] = "cl"; - byte_regs[x86_64::RDX] = "dl"; - byte_regs[x86_64::RBP] = "bpl"; - byte_regs[x86_64::RSP] = "spl"; - byte_regs[x86_64::RSI] = "sil"; - byte_regs[x86_64::RDI] = "dil"; - byte_regs[x86_64::R8] = "r8b"; - byte_regs[x86_64::R9] = "r9b"; - byte_regs[x86_64::R10] = "r10b"; - byte_regs[x86_64::R11] = "r11b"; - byte_regs[x86_64::R12] = "r12b"; - byte_regs[x86_64::R13] = "r13b"; - byte_regs[x86_64::R14] = "r14b"; - byte_regs[x86_64::R15] = "r15b"; - std::ostringstream str; for (auto reg : registers) { for (size_t i = 0; i < 15; ++i) { assembler->setcc(static_cast<x86_64::Condition>(i), *reg); - str << "set" << suffixes[i] << " %" << byte_regs[reg->AsRegister()] << "\n"; + str << "set" << suffixes[i] << " %" << assembler_test->GetQuaternaryRegisterName(*reg) << "\n"; } } @@ -1029,4 +1060,12 @@ TEST_F(AssemblerX86_64Test, DecreaseFrame) { DriverFn(&decreaseframe_test_fn, "DecreaseFrame"); } +TEST_F(AssemblerX86_64Test, MovzxbRegs) { + DriverStr(Repeatrb(&x86_64::X86_64Assembler::movzxb, "movzbl %{reg2}, %{reg1}"), "movzxb"); +} + +TEST_F(AssemblerX86_64Test, MovsxbRegs) { + DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb"); +} + } // namespace art diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h index 822659fffc..47bbb44fc8 100644 --- a/compiler/utils/x86_64/managed_register_x86_64.h +++ b/compiler/utils/x86_64/managed_register_x86_64.h @@ -18,6 +18,7 @@ #define ART_COMPILER_UTILS_X86_64_MANAGED_REGISTER_X86_64_H_ #include "constants_x86_64.h" +#include "dwarf/register.h" #include "utils/managed_register.h" namespace art { diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 2e1b7aef04..10949e4524 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -1503,11 +1503,16 @@ class Dex2Oat FINAL { return failure_count; } - // Returns true if dex_files has a dex with the named location. + // Returns true if dex_files has a dex with the named location. We compare canonical locations, + // so that relative and absolute paths will match. Not caching for the dex_files isn't very + // efficient, but under normal circumstances the list is neither large nor is this part too + // sensitive. static bool DexFilesContains(const std::vector<const DexFile*>& dex_files, const std::string& location) { + std::string canonical_location(DexFile::GetDexCanonicalLocation(location.c_str())); for (size_t i = 0; i < dex_files.size(); ++i) { - if (dex_files[i]->GetLocation() == location) { + if (DexFile::GetDexCanonicalLocation(dex_files[i]->GetLocation().c_str()) == + canonical_location) { return true; } } diff --git a/disassembler/Android.mk b/disassembler/Android.mk index 1cfd45acdc..691c43f7a6 100644 --- a/disassembler/Android.mk +++ b/disassembler/Android.mk @@ -23,7 +23,6 @@ LIBART_DISASSEMBLER_SRC_FILES := \ disassembler_arm.cc \ disassembler_arm64.cc \ disassembler_mips.cc \ - disassembler_mips64.cc \ disassembler_x86.cc # $(1): target or host diff --git a/disassembler/disassembler.cc b/disassembler/disassembler.cc index fbc8dbb404..c05c3ed8ec 100644 --- a/disassembler/disassembler.cc +++ b/disassembler/disassembler.cc @@ -23,7 +23,6 @@ #include "disassembler_arm.h" #include "disassembler_arm64.h" #include "disassembler_mips.h" -#include "disassembler_mips64.h" #include "disassembler_x86.h" namespace art { @@ -34,9 +33,9 @@ Disassembler* Disassembler::Create(InstructionSet instruction_set, DisassemblerO } else if (instruction_set == kArm64) { return new arm64::DisassemblerArm64(options); } else if (instruction_set == kMips) { - return new mips::DisassemblerMips(options); + return new mips::DisassemblerMips(options, false); } else if (instruction_set == kMips64) { - return new mips64::DisassemblerMips64(options); + return new mips::DisassemblerMips(options, true); } else if (instruction_set == kX86) { return new x86::DisassemblerX86(options, false); } else if (instruction_set == kX86_64) { diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc index e2b7341c9a..ac8173773d 100644 --- a/disassembler/disassembler_mips.cc +++ b/disassembler/disassembler_mips.cc @@ -44,6 +44,7 @@ static const uint32_t kCop1 = (17 << kOpcodeShift); static const uint32_t kITypeMask = (0x3f << kOpcodeShift); static const uint32_t kJTypeMask = (0x3f << kOpcodeShift); static const uint32_t kRTypeMask = ((0x3f << kOpcodeShift) | (0x3f)); +static const uint32_t kSpecial0Mask = (0x3f << kOpcodeShift); static const uint32_t kSpecial2Mask = (0x3f << kOpcodeShift); static const uint32_t kFpMask = kRTypeMask; @@ -61,6 +62,7 @@ static const MipsInstruction gMipsInstructions[] = { { kRTypeMask, 7, "srav", "DTS", }, { kRTypeMask, 8, "jr", "S", }, { kRTypeMask | (0x1f << 11), 9 | (31 << 11), "jalr", "S", }, // rd = 31 is implicit. + { kRTypeMask | (0x1f << 11), 9, "jr", "S", }, // rd = 0 is implicit. { kRTypeMask, 9, "jalr", "DS", }, // General case. { kRTypeMask | (0x1f << 6), 10, "movz", "DST", }, { kRTypeMask | (0x1f << 6), 11, "movn", "DST", }, @@ -71,6 +73,9 @@ static const MipsInstruction gMipsInstructions[] = { { kRTypeMask, 17, "mthi", "S", }, { kRTypeMask, 18, "mflo", "D", }, { kRTypeMask, 19, "mtlo", "S", }, + { kRTypeMask, 20, "dsllv", "DTS", }, + { kRTypeMask, 22, "dsrlv", "DTS", }, + { kRTypeMask, 23, "dsrav", "DTS", }, { kRTypeMask | (0x1f << 6), 24, "mult", "ST", }, { kRTypeMask | (0x1f << 6), 25, "multu", "ST", }, { kRTypeMask | (0x1f << 6), 26, "div", "ST", }, @@ -89,12 +94,38 @@ static const MipsInstruction gMipsInstructions[] = { { kRTypeMask, 39, "nor", "DST", }, { kRTypeMask, 42, "slt", "DST", }, { kRTypeMask, 43, "sltu", "DST", }, - // 0, 48, tge - // 0, 49, tgeu - // 0, 50, tlt - // 0, 51, tltu - // 0, 52, teq - // 0, 54, tne + { kRTypeMask, 45, "daddu", "DST", }, + { kRTypeMask, 46, "dsub", "DST", }, + { kRTypeMask, 47, "dsubu", "DST", }, + // TODO: tge[u], tlt[u], teg, tne + // TODO: seleqz, selnez + { kRTypeMask, 56, "dsll", "DTA", }, + { kRTypeMask, 58, "dsrl", "DTA", }, + { kRTypeMask, 59, "dsra", "DTA", }, + { kRTypeMask, 60, "dsll32", "DTA", }, + { kRTypeMask | (0x1f << 21), 62 | (1 << 21), "drotr32", "DTA", }, + { kRTypeMask, 62, "dsrl32", "DTA", }, + { kRTypeMask, 63, "dsra32", "DTA", }, + + // SPECIAL0 + { kSpecial0Mask | 0x7ff, (2 << 6) | 24, "mul", "DST" }, + { kSpecial0Mask | 0x7ff, (3 << 6) | 24, "muh", "DST" }, + { kSpecial0Mask | 0x7ff, (2 << 6) | 25, "mulu", "DST" }, + { kSpecial0Mask | 0x7ff, (3 << 6) | 25, "muhu", "DST" }, + { kSpecial0Mask | 0x7ff, (2 << 6) | 26, "div", "DST" }, + { kSpecial0Mask | 0x7ff, (3 << 6) | 26, "mod", "DST" }, + { kSpecial0Mask | 0x7ff, (2 << 6) | 27, "divu", "DST" }, + { kSpecial0Mask | 0x7ff, (3 << 6) | 27, "modu", "DST" }, + { kSpecial0Mask | 0x7ff, (2 << 6) | 28, "dmul", "DST" }, + { kSpecial0Mask | 0x7ff, (3 << 6) | 28, "dmuh", "DST" }, + { kSpecial0Mask | 0x7ff, (2 << 6) | 29, "dmulu", "DST" }, + { kSpecial0Mask | 0x7ff, (3 << 6) | 29, "dmuhu", "DST" }, + { kSpecial0Mask | 0x7ff, (2 << 6) | 30, "ddiv", "DST" }, + { kSpecial0Mask | 0x7ff, (3 << 6) | 30, "dmod", "DST" }, + { kSpecial0Mask | 0x7ff, (2 << 6) | 31, "ddivu", "DST" }, + { kSpecial0Mask | 0x7ff, (3 << 6) | 31, "dmodu", "DST" }, + // TODO: [d]clz, [d]clo + // TODO: sdbbp // SPECIAL2 { kSpecial2Mask | 0x7ff, (28 << kOpcodeShift) | 2, "mul", "DST" }, @@ -120,6 +151,8 @@ static const MipsInstruction gMipsInstructions[] = { { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (18 << 16), "bltzall", "SB" }, { kITypeMask | (0x1f << 16), 6 << kOpcodeShift | (0 << 16), "blez", "SB" }, { kITypeMask | (0x1f << 16), 7 << kOpcodeShift | (0 << 16), "bgtz", "SB" }, + { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (6 << 16), "dahi", "Si", }, + { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (30 << 16), "dati", "Si", }, { 0xffff0000, (4 << kOpcodeShift), "b", "B" }, { 0xffff0000, (1 << kOpcodeShift) | (17 << 16), "bal", "B" }, @@ -130,27 +163,35 @@ static const MipsInstruction gMipsInstructions[] = { { kITypeMask, 11 << kOpcodeShift, "sltiu", "TSi", }, { kITypeMask, 12 << kOpcodeShift, "andi", "TSi", }, { kITypeMask, 13 << kOpcodeShift, "ori", "TSi", }, - { kITypeMask, 14 << kOpcodeShift, "ori", "TSi", }, - { kITypeMask, 15 << kOpcodeShift, "lui", "TI", }, + { kITypeMask, 14 << kOpcodeShift, "xori", "TSi", }, + { kITypeMask | (0x1f << 21), 15 << kOpcodeShift, "lui", "TI", }, + { kITypeMask, 15 << kOpcodeShift, "aui", "TSI", }, + { kITypeMask, 25 << kOpcodeShift, "daddiu", "TSi", }, + { kITypeMask, 29 << kOpcodeShift, "daui", "TSi", }, { kITypeMask, 32u << kOpcodeShift, "lb", "TO", }, { kITypeMask, 33u << kOpcodeShift, "lh", "TO", }, { kITypeMask, 35u << kOpcodeShift, "lw", "TO", }, { kITypeMask, 36u << kOpcodeShift, "lbu", "TO", }, { kITypeMask, 37u << kOpcodeShift, "lhu", "TO", }, + { kITypeMask, 39u << kOpcodeShift, "lwu", "TO", }, { kITypeMask, 40u << kOpcodeShift, "sb", "TO", }, { kITypeMask, 41u << kOpcodeShift, "sh", "TO", }, { kITypeMask, 43u << kOpcodeShift, "sw", "TO", }, { kITypeMask, 49u << kOpcodeShift, "lwc1", "tO", }, { kITypeMask, 53u << kOpcodeShift, "ldc1", "tO", }, + { kITypeMask, 55u << kOpcodeShift, "ld", "TO", }, { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", }, { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", }, + { kITypeMask, 63u << kOpcodeShift, "sd", "TO", }, // Floating point. - { kFpMask | (0x1f << 21), kCop1 | (0x00 << 21) | 0, "mfc1", "Td" }, - { kFpMask | (0x1f << 21), kCop1 | (0x03 << 21) | 0, "mfhc1", "Td" }, - { kFpMask | (0x1f << 21), kCop1 | (0x04 << 21) | 0, "mtc1", "Td" }, - { kFpMask | (0x1f << 21), kCop1 | (0x07 << 21) | 0, "mthc1", "Td" }, + { kFpMask | (0x1f << 21), kCop1 | (0x00 << 21), "mfc1", "Td" }, + { kFpMask | (0x1f << 21), kCop1 | (0x01 << 21), "dmfc1", "Td" }, + { kFpMask | (0x1f << 21), kCop1 | (0x03 << 21), "mfhc1", "Td" }, + { kFpMask | (0x1f << 21), kCop1 | (0x04 << 21), "mtc1", "Td" }, + { kFpMask | (0x1f << 21), kCop1 | (0x05 << 21), "dmtc1", "Td" }, + { kFpMask | (0x1f << 21), kCop1 | (0x07 << 21), "mthc1", "Td" }, { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 0, "add", "fadt" }, { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 1, "sub", "fadt" }, { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 2, "mul", "fadt" }, @@ -249,7 +290,11 @@ size_t DisassemblerMips::Dump(std::ostream& os, const uint8_t* instr_ptr) { args << StringPrintf("%+d(r%d)", offset, rs); if (rs == 17) { args << " ; "; - Thread::DumpThreadOffset<4>(args, offset); + if (is64bit_) { + Thread::DumpThreadOffset<8>(args, offset); + } else { + Thread::DumpThreadOffset<4>(args, offset); + } } } break; diff --git a/disassembler/disassembler_mips.h b/disassembler/disassembler_mips.h index 00b2f8dbb4..67c3fcb71f 100644 --- a/disassembler/disassembler_mips.h +++ b/disassembler/disassembler_mips.h @@ -26,12 +26,15 @@ namespace mips { class DisassemblerMips FINAL : public Disassembler { public: - explicit DisassemblerMips(DisassemblerOptions* options) : Disassembler(options) {} + explicit DisassemblerMips(DisassemblerOptions* options, bool is64bit) : Disassembler(options), + is64bit_(is64bit) {} size_t Dump(std::ostream& os, const uint8_t* begin) OVERRIDE; void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) OVERRIDE; private: + const bool is64bit_; + DISALLOW_COPY_AND_ASSIGN(DisassemblerMips); }; diff --git a/disassembler/disassembler_mips64.cc b/disassembler/disassembler_mips64.cc deleted file mode 100644 index f1c7d8e0df..0000000000 --- a/disassembler/disassembler_mips64.cc +++ /dev/null @@ -1,289 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "disassembler_mips64.h" - -#include <ostream> -#include <sstream> - -#include "base/logging.h" -#include "base/stringprintf.h" -#include "thread.h" - -namespace art { -namespace mips64 { - - -struct Mips64Instruction { - uint32_t mask; - uint32_t value; - const char* name; - const char* args_fmt; - - bool Matches(uint32_t instruction) const { - return (instruction & mask) == value; - } -}; - -static const uint32_t kOpcodeShift = 26; -static const uint32_t kCop1 = (17 << kOpcodeShift); -static const uint32_t kITypeMask = (0x3f << kOpcodeShift); -static const uint32_t kJTypeMask = (0x3f << kOpcodeShift); -static const uint32_t kRTypeMask = ((0x3f << kOpcodeShift) | (0x3f)); -static const uint32_t kSpecial0Mask = (0x3f << kOpcodeShift); -static const uint32_t kFpMask = kRTypeMask; - -static const Mips64Instruction gMips64Instructions[] = { - // "sll r0, r0, 0" is the canonical "nop", used in delay slots. - { 0xffffffff, 0, "nop", "" }, - - // R-type instructions. - { kRTypeMask, 0, "sll", "DTA", }, - // 0, 1, movci - { kRTypeMask, 2, "srl", "DTA", }, - { kRTypeMask, 3, "sra", "DTA", }, - { kRTypeMask, 4, "sllv", "DTS", }, - { kRTypeMask, 6, "srlv", "DTS", }, - { kRTypeMask, 7, "srav", "DTS", }, - { kRTypeMask | (0x1f << 11), 9 | (31 << 11), "jalr", "S", }, // rd = 31 is implicit. - { kRTypeMask | (0x1f << 11), 9, "jr", "S", }, // rd = 0 is implicit. - { kRTypeMask, 9, "jalr", "DS", }, // General case. - { kRTypeMask, 12, "syscall", "", }, // TODO: code - { kRTypeMask, 13, "break", "", }, // TODO: code - { kRTypeMask, 15, "sync", "", }, // TODO: type - { kRTypeMask, 20, "dsllv", "DTS", }, - { kRTypeMask, 22, "dsrlv", "DTS", }, - { kRTypeMask, 23, "dsrav", "DTS", }, - { kRTypeMask, 33, "addu", "DST", }, - { kRTypeMask, 34, "sub", "DST", }, - { kRTypeMask, 35, "subu", "DST", }, - { kRTypeMask, 36, "and", "DST", }, - { kRTypeMask, 37, "or", "DST", }, - { kRTypeMask, 38, "xor", "DST", }, - { kRTypeMask, 39, "nor", "DST", }, - { kRTypeMask, 42, "slt", "DST", }, - { kRTypeMask, 43, "sltu", "DST", }, - { kRTypeMask, 45, "daddu", "DST", }, - { kRTypeMask, 46, "dsub", "DST", }, - { kRTypeMask, 47, "dsubu", "DST", }, - // TODO: seleqz, selnez - { kRTypeMask, 56, "dsll", "DTA", }, - { kRTypeMask, 58, "dsrl", "DTA", }, - { kRTypeMask, 59, "dsra", "DTA", }, - { kRTypeMask, 60, "dsll32", "DTA", }, - { kRTypeMask | (0x1f << 21), 62 | (1 << 21), "drotr32", "DTA", }, - { kRTypeMask, 62, "dsrl32", "DTA", }, - { kRTypeMask, 63, "dsra32", "DTA", }, - - // SPECIAL0 - { kSpecial0Mask | 0x7ff, (2 << 6) | 24, "mul", "DST" }, - { kSpecial0Mask | 0x7ff, (3 << 6) | 24, "muh", "DST" }, - { kSpecial0Mask | 0x7ff, (2 << 6) | 25, "mulu", "DST" }, - { kSpecial0Mask | 0x7ff, (3 << 6) | 25, "muhu", "DST" }, - { kSpecial0Mask | 0x7ff, (2 << 6) | 26, "div", "DST" }, - { kSpecial0Mask | 0x7ff, (3 << 6) | 26, "mod", "DST" }, - { kSpecial0Mask | 0x7ff, (2 << 6) | 27, "divu", "DST" }, - { kSpecial0Mask | 0x7ff, (3 << 6) | 27, "modu", "DST" }, - { kSpecial0Mask | 0x7ff, (2 << 6) | 28, "dmul", "DST" }, - { kSpecial0Mask | 0x7ff, (3 << 6) | 28, "dmuh", "DST" }, - { kSpecial0Mask | 0x7ff, (2 << 6) | 29, "dmulu", "DST" }, - { kSpecial0Mask | 0x7ff, (3 << 6) | 29, "dmuhu", "DST" }, - { kSpecial0Mask | 0x7ff, (2 << 6) | 30, "ddiv", "DST" }, - { kSpecial0Mask | 0x7ff, (3 << 6) | 30, "dmod", "DST" }, - { kSpecial0Mask | 0x7ff, (2 << 6) | 31, "ddivu", "DST" }, - { kSpecial0Mask | 0x7ff, (3 << 6) | 31, "dmodu", "DST" }, - // TODO: [d]clz, [d]clo - // TODO: sdbbp - - // J-type instructions. - { kJTypeMask, 2 << kOpcodeShift, "j", "L" }, - { kJTypeMask, 3 << kOpcodeShift, "jal", "L" }, - - // I-type instructions. - { kITypeMask, 4 << kOpcodeShift, "beq", "STB" }, - { kITypeMask, 5 << kOpcodeShift, "bne", "STB" }, - { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (1 << 16), "bgez", "SB" }, - { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (0 << 16), "bltz", "SB" }, - { kITypeMask | (0x1f << 16), 6 << kOpcodeShift | (0 << 16), "blez", "SB" }, - { kITypeMask | (0x1f << 16), 7 << kOpcodeShift | (0 << 16), "bgtz", "SB" }, - { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (6 << 16), "dahi", "Si", }, - { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (30 << 16), "dati", "Si", }, - - { 0xffff0000, (4 << kOpcodeShift), "b", "B" }, - { 0xffff0000, (1 << kOpcodeShift) | (17 << 16), "bal", "B" }, - - { kITypeMask, 9 << kOpcodeShift, "addiu", "TSi", }, - { kITypeMask, 10 << kOpcodeShift, "slti", "TSi", }, - { kITypeMask, 11 << kOpcodeShift, "sltiu", "TSi", }, - { kITypeMask, 12 << kOpcodeShift, "andi", "TSi", }, - { kITypeMask, 13 << kOpcodeShift, "ori", "TSi", }, - { kITypeMask, 14 << kOpcodeShift, "xori", "TSi", }, - { kITypeMask | (0x1f << 21), 15 << kOpcodeShift, "lui", "TI", }, - { kITypeMask, 15 << kOpcodeShift, "aui", "TSI", }, - { kITypeMask, 25 << kOpcodeShift, "daddiu", "TSi", }, - { kITypeMask, 29 << kOpcodeShift, "daui", "TSi", }, - - { kITypeMask, 32u << kOpcodeShift, "lb", "TO", }, - { kITypeMask, 33u << kOpcodeShift, "lh", "TO", }, - { kITypeMask, 35u << kOpcodeShift, "lw", "TO", }, - { kITypeMask, 36u << kOpcodeShift, "lbu", "TO", }, - { kITypeMask, 37u << kOpcodeShift, "lhu", "TO", }, - { kITypeMask, 39u << kOpcodeShift, "lwu", "TO", }, - { kITypeMask, 40u << kOpcodeShift, "sb", "TO", }, - { kITypeMask, 41u << kOpcodeShift, "sh", "TO", }, - { kITypeMask, 43u << kOpcodeShift, "sw", "TO", }, - { kITypeMask, 49u << kOpcodeShift, "lwc1", "tO", }, - { kITypeMask, 53u << kOpcodeShift, "ldc1", "tO", }, - { kITypeMask, 55u << kOpcodeShift, "ld", "TO", }, - { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", }, - { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", }, - { kITypeMask, 63u << kOpcodeShift, "sd", "TO", }, - - // Floating point. - { kFpMask | (0x1f << 21), kCop1 | (0x00 << 21), "mfc1", "Td" }, - { kFpMask | (0x1f << 21), kCop1 | (0x01 << 21), "dmfc1", "Td" }, - { kFpMask | (0x1f << 21), kCop1 | (0x04 << 21), "mtc1", "Td" }, - { kFpMask | (0x1f << 21), kCop1 | (0x05 << 21), "dmtc1", "Td" }, - { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 0, "add", "fadt" }, - { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 1, "sub", "fadt" }, - { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 2, "mul", "fadt" }, - { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 3, "div", "fadt" }, - { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 4, "sqrt", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 5, "abs", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 6, "mov", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 7, "neg", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 8, "round.l", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 9, "trunc.l", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 10, "ceil.l", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 11, "floor.l", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 12, "round.w", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 13, "trunc.w", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 14, "ceil.w", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 15, "floor.w", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 32, "cvt.s", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 33, "cvt.d", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 36, "cvt.w", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 37, "cvt.l", "fad" }, - { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 38, "cvt.ps", "fad" }, -}; - -static uint32_t ReadU32(const uint8_t* ptr) { - // We only support little-endian MIPS64. - return ptr[0] | (ptr[1] << 8) | (ptr[2] << 16) | (ptr[3] << 24); -} - -size_t DisassemblerMips64::Dump(std::ostream& os, const uint8_t* instr_ptr) { - uint32_t instruction = ReadU32(instr_ptr); - - uint32_t rs = (instruction >> 21) & 0x1f; // I-type, R-type. - uint32_t rt = (instruction >> 16) & 0x1f; // I-type, R-type. - uint32_t rd = (instruction >> 11) & 0x1f; // R-type. - uint32_t sa = (instruction >> 6) & 0x1f; // R-type. - - std::string opcode; - std::ostringstream args; - - // TODO: remove this! - uint32_t op = (instruction >> 26) & 0x3f; - uint32_t function = (instruction & 0x3f); // R-type. - opcode = StringPrintf("op=%d fn=%d", op, function); - - for (size_t i = 0; i < arraysize(gMips64Instructions); ++i) { - if (gMips64Instructions[i].Matches(instruction)) { - opcode = gMips64Instructions[i].name; - for (const char* args_fmt = gMips64Instructions[i].args_fmt; *args_fmt; ++args_fmt) { - switch (*args_fmt) { - case 'A': // sa (shift amount). - args << sa; - break; - case 'B': // Branch offset. - { - int32_t offset = static_cast<int16_t>(instruction & 0xffff); - offset <<= 2; - offset += 4; // Delay slot. - args << StringPrintf("%p ; %+d", instr_ptr + offset, offset); - } - break; - case 'D': args << 'r' << rd; break; - case 'd': args << 'f' << rd; break; - case 'a': args << 'f' << sa; break; - case 'f': // Floating point "fmt". - { - size_t fmt = (instruction >> 21) & 0x7; // TODO: other fmts? - switch (fmt) { - case 0: opcode += ".s"; break; - case 1: opcode += ".d"; break; - case 4: opcode += ".w"; break; - case 5: opcode += ".l"; break; - case 6: opcode += ".ps"; break; - default: opcode += ".?"; break; - } - continue; // No ", ". - } - case 'I': // Upper 16-bit immediate. - args << reinterpret_cast<void*>((instruction & 0xffff) << 16); - break; - case 'i': // Sign-extended lower 16-bit immediate. - args << static_cast<int16_t>(instruction & 0xffff); - break; - case 'L': // Jump label. - { - // TODO: is this right? - uint32_t instr_index = (instruction & 0x1ffffff); - uint32_t target = (instr_index << 2); - target |= (reinterpret_cast<uintptr_t>(instr_ptr + 4) - & 0xf0000000); - args << reinterpret_cast<void*>(target); - } - break; - case 'O': // +x(rs) - { - int32_t offset = static_cast<int16_t>(instruction & 0xffff); - args << StringPrintf("%+d(r%d)", offset, rs); - if (rs == 17) { - args << " ; "; - Thread::DumpThreadOffset<8>(args, offset); - } - } - break; - case 'S': args << 'r' << rs; break; - case 's': args << 'f' << rs; break; - case 'T': args << 'r' << rt; break; - case 't': args << 'f' << rt; break; - } - if (*(args_fmt + 1)) { - args << ", "; - } - } - break; - } - } - - os << FormatInstructionPointer(instr_ptr) - << StringPrintf(": %08x\t%-7s ", instruction, opcode.c_str()) - << args.str() << '\n'; - return 4; -} - -void DisassemblerMips64::Dump(std::ostream& os, const uint8_t* begin, - const uint8_t* end) { - for (const uint8_t* cur = begin; cur < end; cur += 4) { - Dump(os, cur); - } -} - -} // namespace mips64 -} // namespace art diff --git a/disassembler/disassembler_mips64.h b/disassembler/disassembler_mips64.h deleted file mode 100644 index 06efdc86fc..0000000000 --- a/disassembler/disassembler_mips64.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_DISASSEMBLER_DISASSEMBLER_MIPS64_H_ -#define ART_DISASSEMBLER_DISASSEMBLER_MIPS64_H_ - -#include <vector> - -#include "disassembler.h" - -namespace art { -namespace mips64 { - -class DisassemblerMips64 FINAL : public Disassembler { - public: - explicit DisassemblerMips64(DisassemblerOptions* options) : Disassembler(options) {} - - size_t Dump(std::ostream& os, const uint8_t* begin) OVERRIDE; - void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) OVERRIDE; - - private: - DISALLOW_COPY_AND_ASSIGN(DisassemblerMips64); -}; - -} // namespace mips64 -} // namespace art - -#endif // ART_DISASSEMBLER_DISASSEMBLER_MIPS64_H_ diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc index a1834e1e9a..ba0c0bdebd 100644 --- a/disassembler/disassembler_x86.cc +++ b/disassembler/disassembler_x86.cc @@ -942,7 +942,7 @@ DISASSEMBLER_ENTRY(cmp, opcode1 = "pextrw"; prefix[2] = 0; has_modrm = true; - store = true; + load = true; src_reg_file = SSE; immediate_bytes = 1; } else { diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc index cd59365d0c..5012965eab 100644 --- a/runtime/indirect_reference_table.cc +++ b/runtime/indirect_reference_table.cc @@ -64,7 +64,8 @@ void IndirectReferenceTable::AbortIfNoCheckJNI() { } IndirectReferenceTable::IndirectReferenceTable(size_t initialCount, - size_t maxCount, IndirectRefKind desiredKind) + size_t maxCount, IndirectRefKind desiredKind, + bool abort_on_error) : kind_(desiredKind), max_entries_(maxCount) { CHECK_GT(initialCount, 0U); @@ -75,16 +76,28 @@ IndirectReferenceTable::IndirectReferenceTable(size_t initialCount, const size_t table_bytes = maxCount * sizeof(IrtEntry); table_mem_map_.reset(MemMap::MapAnonymous("indirect ref table", nullptr, table_bytes, PROT_READ | PROT_WRITE, false, false, &error_str)); - CHECK(table_mem_map_.get() != nullptr) << error_str; - CHECK_EQ(table_mem_map_->Size(), table_bytes); + if (abort_on_error) { + CHECK(table_mem_map_.get() != nullptr) << error_str; + CHECK_EQ(table_mem_map_->Size(), table_bytes); + CHECK(table_mem_map_->Begin() != nullptr); + } else if (table_mem_map_.get() == nullptr || + table_mem_map_->Size() != table_bytes || + table_mem_map_->Begin() == nullptr) { + table_mem_map_.reset(); + LOG(ERROR) << error_str; + return; + } table_ = reinterpret_cast<IrtEntry*>(table_mem_map_->Begin()); - CHECK(table_ != nullptr); segment_state_.all = IRT_FIRST_SEGMENT; } IndirectReferenceTable::~IndirectReferenceTable() { } +bool IndirectReferenceTable::IsValid() const { + return table_mem_map_.get() != nullptr; +} + IndirectRef IndirectReferenceTable::Add(uint32_t cookie, mirror::Object* obj) { IRTSegmentState prevState; prevState.all = cookie; diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h index 25b0281767..0072184f62 100644 --- a/runtime/indirect_reference_table.h +++ b/runtime/indirect_reference_table.h @@ -258,10 +258,15 @@ bool inline operator!=(const IrtIterator& lhs, const IrtIterator& rhs) { class IndirectReferenceTable { public: - IndirectReferenceTable(size_t initialCount, size_t maxCount, IndirectRefKind kind); + // WARNING: When using with abort_on_error = false, the object may be in a partially + // initialized state. Use IsValid() to check. + IndirectReferenceTable(size_t initialCount, size_t maxCount, IndirectRefKind kind, + bool abort_on_error = true); ~IndirectReferenceTable(); + bool IsValid() const; + /* * Add a new entry. "obj" must be a valid non-NULL object reference. * diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc index b2d3835405..84fc404b46 100644 --- a/runtime/jni_env_ext.cc +++ b/runtime/jni_env_ext.cc @@ -28,11 +28,29 @@ static constexpr size_t kMonitorsMax = 4096; // Arbitrary sanity check. static constexpr size_t kLocalsInitial = 64; // Arbitrary. +// Checking "locals" requires the mutator lock, but at creation time we're really only interested +// in validity, which isn't changing. To avoid grabbing the mutator lock, factored out and tagged +// with NO_THREAD_SAFETY_ANALYSIS. +static bool CheckLocalsValid(JNIEnvExt* in) NO_THREAD_SAFETY_ANALYSIS { + if (in == nullptr) { + return false; + } + return in->locals.IsValid(); +} + +JNIEnvExt* JNIEnvExt::Create(Thread* self_in, JavaVMExt* vm_in) { + std::unique_ptr<JNIEnvExt> ret(new JNIEnvExt(self_in, vm_in)); + if (CheckLocalsValid(ret.get())) { + return ret.release(); + } + return nullptr; +} + JNIEnvExt::JNIEnvExt(Thread* self_in, JavaVMExt* vm_in) : self(self_in), vm(vm_in), local_ref_cookie(IRT_FIRST_SEGMENT), - locals(kLocalsInitial, kLocalsMax, kLocal), + locals(kLocalsInitial, kLocalsMax, kLocal, false), check_jni(false), critical(0), monitors("monitors", kMonitorsInitial, kMonitorsMax) { diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h index af87cb4226..29d912cb01 100644 --- a/runtime/jni_env_ext.h +++ b/runtime/jni_env_ext.h @@ -34,7 +34,8 @@ class JavaVMExt; static constexpr size_t kLocalsMax = 512; struct JNIEnvExt : public JNIEnv { - JNIEnvExt(Thread* self, JavaVMExt* vm); + static JNIEnvExt* Create(Thread* self, JavaVMExt* vm); + ~JNIEnvExt(); void DumpReferenceTables(std::ostream& os) @@ -87,6 +88,11 @@ struct JNIEnvExt : public JNIEnv { // Used by -Xcheck:jni. const JNINativeInterface* unchecked_functions; + + private: + // The constructor should not be called directly. It may leave the object in an erronuous state, + // and the result needs to be checked. + JNIEnvExt(Thread* self, JavaVMExt* vm); }; // Used to save and restore the JNIEnvExt state when not going through code created by the JNI diff --git a/runtime/leb128.h b/runtime/leb128.h index d36b690aa1..2e27b8ea3d 100644 --- a/runtime/leb128.h +++ b/runtime/leb128.h @@ -136,6 +136,19 @@ static inline void EncodeUnsignedLeb128(std::vector<uint8_t, Allocator>* dest, u dest->push_back(out); } +// Overwrite encoded Leb128 with a new value. The new value must be less than +// or equal to the old value to ensure that it fits the allocated space. +static inline void UpdateUnsignedLeb128(uint8_t* dest, uint32_t value) { + const uint8_t* old_end = dest; + uint32_t old_value = DecodeUnsignedLeb128(&old_end); + DCHECK_LE(value, old_value); + for (uint8_t* end = EncodeUnsignedLeb128(dest, value); end < old_end; end++) { + // Use longer encoding than necessary to fill the allocated space. + end[-1] |= 0x80; + end[0] = 0; + } +} + static inline uint8_t* EncodeSignedLeb128(uint8_t* dest, int32_t value) { uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6; uint8_t out = value & 0x7f; diff --git a/runtime/leb128_test.cc b/runtime/leb128_test.cc index 5d157dc9e5..87e13ff693 100644 --- a/runtime/leb128_test.cc +++ b/runtime/leb128_test.cc @@ -252,6 +252,25 @@ TEST(Leb128Test, SignedStream) { EXPECT_EQ(data_size, static_cast<size_t>(encoded_data_ptr - encoded_data)); } +TEST(Leb128Test, UnsignedUpdate) { + for (size_t i = 0; i < arraysize(uleb128_tests); ++i) { + for (size_t j = 0; j < arraysize(uleb128_tests); ++j) { + uint32_t old_value = uleb128_tests[i].decoded; + uint32_t new_value = uleb128_tests[j].decoded; + // We can only make the encoded value smaller. + if (new_value <= old_value) { + uint8_t encoded_data[5]; + uint8_t* old_end = EncodeUnsignedLeb128(encoded_data, old_value); + UpdateUnsignedLeb128(encoded_data, new_value); + const uint8_t* new_end = encoded_data; + EXPECT_EQ(DecodeUnsignedLeb128(&new_end), new_value); + // Even if the new value needs fewer bytes, we should fill the space. + EXPECT_EQ(new_end, old_end); + } + } + } +} + TEST(Leb128Test, Speed) { std::unique_ptr<Histogram<uint64_t>> enc_hist(new Histogram<uint64_t>("Leb128EncodeSpeedTest", 5)); std::unique_ptr<Histogram<uint64_t>> dec_hist(new Histogram<uint64_t>("Leb128DecodeSpeedTest", 5)); diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index c182a4d9ad..87ae64d1d4 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -297,22 +297,15 @@ static jobjectArray DexFile_getClassNameList(JNIEnv* env, jclass, jobject cookie return result; } -// Java: dalvik.system.DexFile.UP_TO_DATE -static const jbyte kUpToDate = 0; -// Java: dalvik.system.DexFile.DEXOPT_NEEDED -static const jbyte kPatchoatNeeded = 1; -// Java: dalvik.system.DexFile.PATCHOAT_NEEDED -static const jbyte kDexoptNeeded = 2; - -static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename, +static jint GetDexOptNeeded(JNIEnv* env, const char* filename, const char* pkgname, const char* instruction_set, const jboolean defer) { if ((filename == nullptr) || !OS::FileExists(filename)) { - LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename << "' does not exist"; + LOG(ERROR) << "DexFile_getDexOptNeeded file '" << filename << "' does not exist"; ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException")); const char* message = (filename == nullptr) ? "<empty file name>" : filename; env->ThrowNew(fnfe.get(), message); - return kUpToDate; + return OatFileAssistant::kNoDexOptNeeded; } const InstructionSet target_instruction_set = GetInstructionSetFromString(instruction_set); @@ -330,7 +323,7 @@ static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename, // Always treat elements of the bootclasspath as up-to-date. if (oat_file_assistant.IsInBootClassPath()) { - return kUpToDate; + return OatFileAssistant::kNoDexOptNeeded; } // TODO: Checking the profile should probably be done in the GetStatus() @@ -343,7 +336,7 @@ static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename, if (!defer) { oat_file_assistant.CopyProfileFile(); } - return kDexoptNeeded; + return OatFileAssistant::kDex2OatNeeded; } else if (oat_file_assistant.ProfileExists() && !oat_file_assistant.OldProfileExists()) { if (!defer) { @@ -353,16 +346,10 @@ static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename, } } - OatFileAssistant::Status status = oat_file_assistant.GetStatus(); - switch (status) { - case OatFileAssistant::kUpToDate: return kUpToDate; - case OatFileAssistant::kNeedsRelocation: return kPatchoatNeeded; - case OatFileAssistant::kOutOfDate: return kDexoptNeeded; - } - UNREACHABLE(); + return oat_file_assistant.GetDexOptNeeded(); } -static jbyte DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename, +static jint DexFile_getDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename, jstring javaPkgname, jstring javaInstructionSet, jboolean defer) { ScopedUtfChars filename(env, javaFilename); if (env->ExceptionCheck()) { @@ -376,25 +363,25 @@ static jbyte DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFil return 0; } - return IsDexOptNeededInternal(env, filename.c_str(), pkgname.c_str(), - instruction_set.c_str(), defer); + return GetDexOptNeeded(env, filename.c_str(), pkgname.c_str(), + instruction_set.c_str(), defer); } // public API, NULL pkgname static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename) { const char* instruction_set = GetInstructionSetString(kRuntimeISA); ScopedUtfChars filename(env, javaFilename); - return kUpToDate != IsDexOptNeededInternal(env, filename.c_str(), nullptr /* pkgname */, - instruction_set, false /* defer */); + jint status = GetDexOptNeeded(env, filename.c_str(), nullptr /* pkgname */, + instruction_set, false /* defer */); + return (status != OatFileAssistant::kNoDexOptNeeded) ? JNI_TRUE : JNI_FALSE; } - static JNINativeMethod gMethods[] = { NATIVE_METHOD(DexFile, closeDexFile, "(Ljava/lang/Object;)V"), NATIVE_METHOD(DexFile, defineClassNative, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/Object;)Ljava/lang/Class;"), NATIVE_METHOD(DexFile, getClassNameList, "(Ljava/lang/Object;)[Ljava/lang/String;"), NATIVE_METHOD(DexFile, isDexOptNeeded, "(Ljava/lang/String;)Z"), - NATIVE_METHOD(DexFile, isDexOptNeededInternal, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)B"), + NATIVE_METHOD(DexFile, getDexOptNeeded, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)I"), NATIVE_METHOD(DexFile, openDexFileNative, "(Ljava/lang/String;Ljava/lang/String;I)Ljava/lang/Object;"), }; diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc index d92f59bde6..e5c27b2430 100644 --- a/runtime/oat_file_assistant.cc +++ b/runtime/oat_file_assistant.cc @@ -142,31 +142,31 @@ bool OatFileAssistant::Lock(std::string* error_msg) { return true; } -OatFileAssistant::Status OatFileAssistant::GetStatus() { +OatFileAssistant::DexOptNeeded OatFileAssistant::GetDexOptNeeded() { // TODO: If the profiling code is ever restored, it's worth considering // whether we should check to see if the profile is out of date here. - if (OdexFileIsOutOfDate()) { - // The DEX file is not pre-compiled. - // TODO: What if the oat file is not out of date? Could we relocate it - // from itself? - return OatFileIsUpToDate() ? kUpToDate : kOutOfDate; - } else { - // The DEX file is pre-compiled. If the oat file isn't up to date, we can - // patch the pre-compiled version rather than recompiling. - if (OatFileIsUpToDate() || OdexFileIsUpToDate()) { - return kUpToDate; - } else { - return kNeedsRelocation; - } + if (OatFileIsUpToDate() || OdexFileIsUpToDate()) { + return kNoDexOptNeeded; } + + if (OdexFileNeedsRelocation()) { + return kPatchOatNeeded; + } + + if (OatFileNeedsRelocation()) { + return kSelfPatchOatNeeded; + } + + return kDex2OatNeeded; } bool OatFileAssistant::MakeUpToDate(std::string* error_msg) { - switch (GetStatus()) { - case kUpToDate: return true; - case kNeedsRelocation: return RelocateOatFile(error_msg); - case kOutOfDate: return GenerateOatFile(error_msg); + switch (GetDexOptNeeded()) { + case kNoDexOptNeeded: return true; + case kDex2OatNeeded: return GenerateOatFile(error_msg); + case kPatchOatNeeded: return RelocateOatFile(OdexFileName(), error_msg); + case kSelfPatchOatNeeded: return RelocateOatFile(OatFileName(), error_msg); } UNREACHABLE(); } @@ -269,14 +269,14 @@ bool OatFileAssistant::OdexFileExists() { return GetOdexFile() != nullptr; } -OatFileAssistant::Status OatFileAssistant::OdexFileStatus() { +OatFileAssistant::OatStatus OatFileAssistant::OdexFileStatus() { if (OdexFileIsOutOfDate()) { - return kOutOfDate; + return kOatOutOfDate; } if (OdexFileIsUpToDate()) { - return kUpToDate; + return kOatUpToDate; } - return kNeedsRelocation; + return kOatNeedsRelocation; } bool OatFileAssistant::OdexFileIsOutOfDate() { @@ -293,7 +293,7 @@ bool OatFileAssistant::OdexFileIsOutOfDate() { } bool OatFileAssistant::OdexFileNeedsRelocation() { - return OdexFileStatus() == kNeedsRelocation; + return OdexFileStatus() == kOatNeedsRelocation; } bool OatFileAssistant::OdexFileIsUpToDate() { @@ -338,14 +338,14 @@ bool OatFileAssistant::OatFileExists() { return GetOatFile() != nullptr; } -OatFileAssistant::Status OatFileAssistant::OatFileStatus() { +OatFileAssistant::OatStatus OatFileAssistant::OatFileStatus() { if (OatFileIsOutOfDate()) { - return kOutOfDate; + return kOatOutOfDate; } if (OatFileIsUpToDate()) { - return kUpToDate; + return kOatUpToDate; } - return kNeedsRelocation; + return kOatNeedsRelocation; } bool OatFileAssistant::OatFileIsOutOfDate() { @@ -362,7 +362,7 @@ bool OatFileAssistant::OatFileIsOutOfDate() { } bool OatFileAssistant::OatFileNeedsRelocation() { - return OatFileStatus() == kNeedsRelocation; + return OatFileStatus() == kOatNeedsRelocation; } bool OatFileAssistant::OatFileIsUpToDate() { @@ -378,17 +378,17 @@ bool OatFileAssistant::OatFileIsUpToDate() { return cached_oat_file_is_up_to_date_; } -OatFileAssistant::Status OatFileAssistant::GivenOatFileStatus(const OatFile& file) { +OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& file) { // TODO: This could cause GivenOatFileIsOutOfDate to be called twice, which // is more work than we need to do. If performance becomes a concern, and // this method is actually called, this should be fixed. if (GivenOatFileIsOutOfDate(file)) { - return kOutOfDate; + return kOatOutOfDate; } if (GivenOatFileIsUpToDate(file)) { - return kUpToDate; + return kOatUpToDate; } - return kNeedsRelocation; + return kOatNeedsRelocation; } bool OatFileAssistant::GivenOatFileIsOutOfDate(const OatFile& file) { @@ -451,7 +451,7 @@ bool OatFileAssistant::GivenOatFileIsOutOfDate(const OatFile& file) { } bool OatFileAssistant::GivenOatFileNeedsRelocation(const OatFile& file) { - return GivenOatFileStatus(file) == kNeedsRelocation; + return GivenOatFileStatus(file) == kOatNeedsRelocation; } bool OatFileAssistant::GivenOatFileIsUpToDate(const OatFile& file) { @@ -592,16 +592,17 @@ void OatFileAssistant::CopyProfileFile() { } } -bool OatFileAssistant::RelocateOatFile(std::string* error_msg) { +bool OatFileAssistant::RelocateOatFile(const std::string* input_file, + std::string* error_msg) { CHECK(error_msg != nullptr); - if (OdexFileName() == nullptr) { + if (input_file == nullptr) { *error_msg = "Patching of oat file for dex location " + std::string(dex_location_) - + " not attempted because the odex file name could not be determined."; + + " not attempted because the input file name could not be determined."; return false; } - const std::string& odex_file_name = *OdexFileName(); + const std::string& input_file_name = *input_file; if (OatFileName() == nullptr) { *error_msg = "Patching of oat file for dex location " @@ -628,7 +629,7 @@ bool OatFileAssistant::RelocateOatFile(std::string* error_msg) { std::vector<std::string> argv; argv.push_back(runtime->GetPatchoatExecutable()); argv.push_back("--instruction-set=" + std::string(GetInstructionSetString(isa_))); - argv.push_back("--input-oat-file=" + odex_file_name); + argv.push_back("--input-oat-file=" + input_file_name); argv.push_back("--output-oat-file=" + oat_file_name); argv.push_back("--patched-image-location=" + image_info->location); diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h index f2abcf99d3..9e7c2efc45 100644 --- a/runtime/oat_file_assistant.h +++ b/runtime/oat_file_assistant.h @@ -43,20 +43,43 @@ namespace art { // be restored and tested, or removed. class OatFileAssistant { public: - enum Status { - // kOutOfDate - An oat file is said to be out of date if the file does not - // exist, or is out of date with respect to the dex file or boot image. - kOutOfDate, - - // kNeedsRelocation - An oat file is said to need relocation if the code - // is up to date, but not yet properly relocated for address space layout - // randomization (ASLR). In this case, the oat file is neither "out of - // date" nor "up to date". - kNeedsRelocation, - - // kUpToDate - An oat file is said to be up to date if it is not out of + enum DexOptNeeded { + // kNoDexOptNeeded - The code for this dex location is up to date and can + // be used as is. + // Matches Java: dalvik.system.DexFile.NO_DEXOPT_NEEDED = 0 + kNoDexOptNeeded = 0, + + // kDex2OatNeeded - In order to make the code for this dex location up to + // date, dex2oat must be run on the dex file. + // Matches Java: dalvik.system.DexFile.DEX2OAT_NEEDED = 1 + kDex2OatNeeded = 1, + + // kPatchOatNeeded - In order to make the code for this dex location up to + // date, patchoat must be run on the odex file. + // Matches Java: dalvik.system.DexFile.PATCHOAT_NEEDED = 2 + kPatchOatNeeded = 2, + + // kSelfPatchOatNeeded - In order to make the code for this dex location + // up to date, patchoat must be run on the oat file. + // Matches Java: dalvik.system.DexFile.SELF_PATCHOAT_NEEDED = 3 + kSelfPatchOatNeeded = 3, + }; + + enum OatStatus { + // kOatOutOfDate - An oat file is said to be out of date if the file does + // not exist, or is out of date with respect to the dex file or boot + // image. + kOatOutOfDate, + + // kOatNeedsRelocation - An oat file is said to need relocation if the + // code is up to date, but not yet properly relocated for address space + // layout randomization (ASLR). In this case, the oat file is neither + // "out of date" nor "up to date". + kOatNeedsRelocation, + + // kOatUpToDate - An oat file is said to be up to date if it is not out of // date and has been properly relocated for the purposes of ASLR. - kUpToDate, + kOatUpToDate, }; // Constructs an OatFileAssistant object to assist the oat file @@ -67,7 +90,6 @@ class OatFileAssistant { // Typically the dex_location is the absolute path to the original, // un-optimized dex file. // - // // Note: Currently the dex_location must have an extension. // TODO: Relax this restriction? // @@ -121,8 +143,9 @@ class OatFileAssistant { // file. bool Lock(std::string* error_msg); - // Returns the overall compilation status for the given dex location. - Status GetStatus(); + // Return what action needs to be taken to produce up-to-date code for this + // dex location. + DexOptNeeded GetDexOptNeeded(); // Attempts to generate or relocate the oat file as needed to make it up to // date. @@ -164,7 +187,7 @@ class OatFileAssistant { // determined. const std::string* OdexFileName(); bool OdexFileExists(); - Status OdexFileStatus(); + OatStatus OdexFileStatus(); bool OdexFileIsOutOfDate(); bool OdexFileNeedsRelocation(); bool OdexFileIsUpToDate(); @@ -176,20 +199,18 @@ class OatFileAssistant { // the dex location. // // Notes: - // * To get the overall status of the compiled code for this dex_location, - // use the GetStatus() method, not the OatFileStatus() method. // * OatFileName may return null if the oat file name could not be // determined. const std::string* OatFileName(); bool OatFileExists(); - Status OatFileStatus(); + OatStatus OatFileStatus(); bool OatFileIsOutOfDate(); bool OatFileNeedsRelocation(); bool OatFileIsUpToDate(); // These methods return the status for a given opened oat file with respect // to the dex location. - Status GivenOatFileStatus(const OatFile& file); + OatStatus GivenOatFileStatus(const OatFile& file); bool GivenOatFileIsOutOfDate(const OatFile& file); bool GivenOatFileNeedsRelocation(const OatFile& file); bool GivenOatFileIsUpToDate(const OatFile& file); @@ -216,7 +237,7 @@ class OatFileAssistant { // Copy the current profile to the old profile location. void CopyProfileFile(); - // Generates the oat file by relocation from the odex file. + // Generates the oat file by relocation from the named input file. // This does not check the current status before attempting to relocate the // oat file. // Returns true on success. @@ -224,7 +245,7 @@ class OatFileAssistant { // // If there is a failure, the value of error_msg will be set to a string // describing why there was failure. error_msg must not be nullptr. - bool RelocateOatFile(std::string* error_msg); + bool RelocateOatFile(const std::string* input_file, std::string* error_msg); // Generate the oat file from the dex file. // This does not check the current status before attempting to generate the diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc index a1988244c7..d2362a210e 100644 --- a/runtime/oat_file_assistant_test.cc +++ b/runtime/oat_file_assistant_test.cc @@ -29,7 +29,9 @@ #include "common_runtime_test.h" #include "compiler_callbacks.h" #include "mem_map.h" +#include "mirror/art_field-inl.h" #include "os.h" +#include "scoped_thread_state_change.h" #include "thread-inl.h" #include "utils.h" @@ -267,42 +269,42 @@ static void GenerateOatForTest(const char* dex_location) { } // Case: We have a DEX file, but no OAT file for it. -// Expect: The oat file status is kOutOfDate. +// Expect: The status is kDex2OatNeeded. TEST_F(OatFileAssistantTest, DexNoOat) { std::string dex_location = GetScratchDir() + "/DexNoOat.jar"; Copy(GetDexSrc1(), dex_location); OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_FALSE(oat_file_assistant.OdexFileExists()); EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate()); EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation()); EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate()); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.OdexFileStatus()); + EXPECT_EQ(OatFileAssistant::kOatOutOfDate, oat_file_assistant.OdexFileStatus()); EXPECT_FALSE(oat_file_assistant.OatFileExists()); EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate()); EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation()); EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate()); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.OatFileStatus()); + EXPECT_EQ(OatFileAssistant::kOatOutOfDate, oat_file_assistant.OatFileStatus()); } // Case: We have no DEX file and no OAT file. -// Expect: Status is out of date. Loading should fail, but not crash. +// Expect: Status is kDex2OatNeeded. Loading should fail, but not crash. TEST_F(OatFileAssistantTest, NoDexNoOat) { std::string dex_location = GetScratchDir() + "/NoDexNoOat.jar"; OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded()); std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); EXPECT_EQ(nullptr, oat_file.get()); } // Case: We have a DEX file and up-to-date OAT file for it. -// Expect: The oat file status is kUpToDate. +// Expect: The status is kNoDexOptNeeded. TEST_F(OatFileAssistantTest, OatUpToDate) { std::string dex_location = GetScratchDir() + "/OatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); @@ -310,7 +312,7 @@ TEST_F(OatFileAssistantTest, OatUpToDate) { OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); - EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_FALSE(oat_file_assistant.OdexFileExists()); EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate()); @@ -319,18 +321,20 @@ TEST_F(OatFileAssistantTest, OatUpToDate) { EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate()); EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation()); EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate()); - EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.OatFileStatus()); + EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus()); } // Case: We have a MultiDEX file and up-to-date OAT file for it. -// Expect: The oat file status is kUpToDate. +// Expect: The status is kNoDexOptNeeded and we load all dex files. TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) { std::string dex_location = GetScratchDir() + "/MultiDexOatUpToDate.jar"; Copy(GetMultiDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str()); - // Verify we can load both dex files. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded()); + + // Verify we can load both dex files. std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); ASSERT_TRUE(oat_file.get() != nullptr); EXPECT_TRUE(oat_file->IsExecutable()); @@ -341,7 +345,7 @@ TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) { // Case: We have a MultiDEX file and up-to-date OAT file for it with relative // encoded dex locations. -// Expect: The oat file status is kUpToDate. +// Expect: The oat file status is kNoDexOptNeeded. TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) { std::string dex_location = GetScratchDir() + "/RelativeEncodedDexLocation.jar"; std::string oat_location = GetOdexDir() + "/RelativeEncodedDexLocation.oat"; @@ -370,8 +374,8 @@ TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) { EXPECT_EQ(2u, dex_files.size()); } -// Case: We have a DEX file and out of date OAT file. -// Expect: The oat file status is kOutOfDate. +// Case: We have a DEX file and out-of-date OAT file. +// Expect: The status is kDex2OatNeeded. TEST_F(OatFileAssistantTest, OatOutOfDate) { std::string dex_location = GetScratchDir() + "/OatOutOfDate.jar"; @@ -382,7 +386,7 @@ TEST_F(OatFileAssistantTest, OatOutOfDate) { Copy(GetDexSrc2(), dex_location); OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_FALSE(oat_file_assistant.OdexFileExists()); @@ -394,7 +398,7 @@ TEST_F(OatFileAssistantTest, OatOutOfDate) { } // Case: We have a DEX file and an ODEX file, but no OAT file. -// Expect: The oat file status is kNeedsRelocation. +// Expect: The status is kPatchOatNeeded. TEST_F(OatFileAssistantTest, DexOdexNoOat) { std::string dex_location = GetScratchDir() + "/DexOdexNoOat.jar"; std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex"; @@ -406,21 +410,20 @@ TEST_F(OatFileAssistantTest, DexOdexNoOat) { // Verify the status. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); - EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_TRUE(oat_file_assistant.OdexFileExists()); EXPECT_FALSE(oat_file_assistant.OdexFileIsOutOfDate()); EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate()); EXPECT_TRUE(oat_file_assistant.OdexFileNeedsRelocation()); - EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.OdexFileNeedsRelocation()); EXPECT_FALSE(oat_file_assistant.OatFileExists()); EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate()); EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate()); } // Case: We have a stripped DEX file and an ODEX file, but no OAT file. -// Expect: The oat file status is kNeedsRelocation. +// Expect: The status is kPatchOatNeeded TEST_F(OatFileAssistantTest, StrippedDexOdexNoOat) { std::string dex_location = GetScratchDir() + "/StrippedDexOdexNoOat.jar"; std::string odex_location = GetOdexDir() + "/StrippedDexOdexNoOat.odex"; @@ -435,7 +438,7 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexNoOat) { // Verify the status. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); - EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_TRUE(oat_file_assistant.OdexFileExists()); @@ -449,7 +452,7 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexNoOat) { std::string error_msg; ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg; - EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_TRUE(oat_file_assistant.OdexFileExists()); @@ -468,8 +471,8 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexNoOat) { EXPECT_EQ(1u, dex_files.size()); } -// Case: We have a stripped DEX file, an ODEX file, and an out of date OAT file. -// Expect: The oat file status is kNeedsRelocation. +// Case: We have a stripped DEX file, an ODEX file, and an out-of-date OAT file. +// Expect: The status is kPatchOatNeeded. TEST_F(OatFileAssistantTest, StrippedDexOdexOat) { std::string dex_location = GetScratchDir() + "/StrippedDexOdexOat.jar"; std::string odex_location = GetOdexDir() + "/StrippedDexOdexOat.odex"; @@ -488,7 +491,7 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexOat) { // Verify the status. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); - EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_TRUE(oat_file_assistant.OdexFileExists()); @@ -503,7 +506,7 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexOat) { std::string error_msg; ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg; - EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_TRUE(oat_file_assistant.OdexFileExists()); @@ -524,9 +527,59 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexOat) { EXPECT_EQ(1u, dex_files.size()); } +// Case: We have a DEX file, no ODEX file and an OAT file that needs +// relocation. +// Expect: The status is kSelfPatchOatNeeded. +TEST_F(OatFileAssistantTest, SelfRelocation) { + std::string dex_location = GetScratchDir() + "/SelfRelocation.jar"; + std::string oat_location = GetOdexDir() + "/SelfRelocation.oat"; + + // Create the dex and odex files + Copy(GetDexSrc1(), dex_location); + GenerateOdexForTest(dex_location, oat_location); + + OatFileAssistant oat_file_assistant(dex_location.c_str(), + oat_location.c_str(), kRuntimeISA, true); + + EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded, oat_file_assistant.GetDexOptNeeded()); + + EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); + EXPECT_FALSE(oat_file_assistant.OdexFileExists()); + EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate()); + EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation()); + EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate()); + EXPECT_TRUE(oat_file_assistant.OatFileExists()); + EXPECT_TRUE(oat_file_assistant.OatFileNeedsRelocation()); + EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate()); + EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate()); + + // Make the oat file up to date. + std::string error_msg; + ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg; + + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded()); + + EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); + EXPECT_FALSE(oat_file_assistant.OdexFileExists()); + EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate()); + EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation()); + EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate()); + EXPECT_TRUE(oat_file_assistant.OatFileExists()); + EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate()); + EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation()); + EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate()); + + std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); + ASSERT_TRUE(oat_file.get() != nullptr); + EXPECT_TRUE(oat_file->IsExecutable()); + std::vector<std::unique_ptr<const DexFile>> dex_files; + dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str()); + EXPECT_EQ(1u, dex_files.size()); +} + // Case: We have a DEX file, an ODEX file and an OAT file, where the ODEX and // OAT files both have patch delta of 0. -// Expect: It shouldn't crash. +// Expect: It shouldn't crash, and status is kPatchOatNeeded. TEST_F(OatFileAssistantTest, OdexOatOverlap) { std::string dex_location = GetScratchDir() + "/OdexOatOverlap.jar"; std::string odex_location = GetOdexDir() + "/OdexOatOverlap.odex"; @@ -544,7 +597,7 @@ TEST_F(OatFileAssistantTest, OdexOatOverlap) { OatFileAssistant oat_file_assistant(dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true); - EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_TRUE(oat_file_assistant.OdexFileExists()); @@ -564,7 +617,7 @@ TEST_F(OatFileAssistantTest, OdexOatOverlap) { } // Case: We have a DEX file and a PIC ODEX file, but no OAT file. -// Expect: The oat file status is kUpToDate, because PIC needs no relocation. +// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation. TEST_F(OatFileAssistantTest, DexPicOdexNoOat) { std::string dex_location = GetScratchDir() + "/DexPicOdexNoOat.jar"; std::string odex_location = GetOdexDir() + "/DexPicOdexNoOat.odex"; @@ -576,7 +629,7 @@ TEST_F(OatFileAssistantTest, DexPicOdexNoOat) { // Verify the status. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); - EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_TRUE(oat_file_assistant.OdexFileExists()); @@ -661,7 +714,7 @@ TEST_F(OatFileAssistantTest, NonExsistentDexLocation) { OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.OdexFileExists()); EXPECT_FALSE(oat_file_assistant.OatFileExists()); EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate()); @@ -720,7 +773,7 @@ TEST_F(OatFileAssistantTest, NonAbsoluteDexLocation) { OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.OdexFileExists()); EXPECT_FALSE(oat_file_assistant.OatFileExists()); EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate()); @@ -737,7 +790,7 @@ TEST_F(OatFileAssistantTest, ShortDexLocation) { OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.OdexFileExists()); EXPECT_FALSE(oat_file_assistant.OatFileExists()); EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate()); @@ -751,14 +804,14 @@ TEST_F(OatFileAssistantTest, ShortDexLocation) { } // Case: Non-standard extension for dex file. -// Expect: The oat file status is kOutOfDate. +// Expect: The status is kDex2OatNeeded. TEST_F(OatFileAssistantTest, LongDexExtension) { std::string dex_location = GetScratchDir() + "/LongDexExtension.jarx"; Copy(GetDexSrc1(), dex_location); OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_FALSE(oat_file_assistant.OdexFileExists()); @@ -895,6 +948,41 @@ TEST(OatFileAssistantUtilsTest, DexFilenameToOdexFilename) { "/foo/bar/baz_noext", kArm, &odex_file, &error_msg)); } +// Verify the dexopt status values from dalvik.system.DexFile +// match the OatFileAssistant::DexOptStatus values. +TEST_F(OatFileAssistantTest, DexOptStatusValues) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + ClassLinker* linker = Runtime::Current()->GetClassLinker(); + Handle<mirror::Class> dexfile( + hs.NewHandle(linker->FindSystemClass(soa.Self(), "Ldalvik/system/DexFile;"))); + ASSERT_FALSE(dexfile.Get() == nullptr); + linker->EnsureInitialized(soa.Self(), dexfile, true, true); + + mirror::ArtField* no_dexopt_needed = mirror::Class::FindStaticField( + soa.Self(), dexfile, "NO_DEXOPT_NEEDED", "I"); + ASSERT_FALSE(no_dexopt_needed == nullptr); + EXPECT_EQ(no_dexopt_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, no_dexopt_needed->GetInt(dexfile.Get())); + + mirror::ArtField* dex2oat_needed = mirror::Class::FindStaticField( + soa.Self(), dexfile, "DEX2OAT_NEEDED", "I"); + ASSERT_FALSE(dex2oat_needed == nullptr); + EXPECT_EQ(dex2oat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, dex2oat_needed->GetInt(dexfile.Get())); + + mirror::ArtField* patchoat_needed = mirror::Class::FindStaticField( + soa.Self(), dexfile, "PATCHOAT_NEEDED", "I"); + ASSERT_FALSE(patchoat_needed == nullptr); + EXPECT_EQ(patchoat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt); + EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, patchoat_needed->GetInt(dexfile.Get())); + + mirror::ArtField* self_patchoat_needed = mirror::Class::FindStaticField( + soa.Self(), dexfile, "SELF_PATCHOAT_NEEDED", "I"); + ASSERT_FALSE(self_patchoat_needed == nullptr); + EXPECT_EQ(self_patchoat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt); + EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded, self_patchoat_needed->GetInt(dexfile.Get())); +} // TODO: More Tests: // * Test class linker falls back to unquickened dex for DexNoOat diff --git a/runtime/thread.cc b/runtime/thread.cc index d1b0464906..af11f73d89 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -377,7 +377,11 @@ bool Thread::Init(ThreadList* thread_list, JavaVMExt* java_vm) { tls32_.thin_lock_thread_id = thread_list->AllocThreadId(this); - tlsPtr_.jni_env = new JNIEnvExt(this, java_vm); + tlsPtr_.jni_env = JNIEnvExt::Create(this, java_vm); + if (tlsPtr_.jni_env == nullptr) { + return false; + } + thread_list->Register(this); return true; } @@ -2304,8 +2308,8 @@ void Thread::VisitRoots(RootVisitor* visitor) { mapper.VisitShadowFrame(shadow_frame); } } - if (tlsPtr_.method_verifier != nullptr) { - tlsPtr_.method_verifier->VisitRoots(visitor, RootInfo(kRootNativeStack, thread_id)); + for (auto* verifier = tlsPtr_.method_verifier; verifier != nullptr; verifier = verifier->link_) { + verifier->VisitRoots(visitor, RootInfo(kRootNativeStack, thread_id)); } // Visit roots on this thread's stack Context* context = GetLongJumpContext(); @@ -2429,14 +2433,14 @@ void Thread::ClearDebugInvokeReq() { tlsPtr_.debug_invoke_req = nullptr; } -void Thread::SetVerifier(verifier::MethodVerifier* verifier) { - CHECK(tlsPtr_.method_verifier == nullptr); +void Thread::PushVerifier(verifier::MethodVerifier* verifier) { + verifier->link_ = tlsPtr_.method_verifier; tlsPtr_.method_verifier = verifier; } -void Thread::ClearVerifier(verifier::MethodVerifier* verifier) { +void Thread::PopVerifier(verifier::MethodVerifier* verifier) { CHECK_EQ(tlsPtr_.method_verifier, verifier); - tlsPtr_.method_verifier = nullptr; + tlsPtr_.method_verifier = verifier->link_; } } // namespace art diff --git a/runtime/thread.h b/runtime/thread.h index f89e46bec7..b095e22163 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -895,8 +895,8 @@ class Thread { return tls32_.suspended_at_suspend_check; } - void SetVerifier(verifier::MethodVerifier* verifier); - void ClearVerifier(verifier::MethodVerifier* verifier); + void PushVerifier(verifier::MethodVerifier* verifier); + void PopVerifier(verifier::MethodVerifier* verifier); private: explicit Thread(bool daemon); diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc index d0f84686a5..9fc2658b19 100644 --- a/runtime/verifier/method_verifier.cc +++ b/runtime/verifier/method_verifier.cc @@ -395,12 +395,12 @@ MethodVerifier::MethodVerifier(Thread* self, has_virtual_or_interface_invokes_(false), verify_to_dump_(verify_to_dump), allow_thread_suspension_(allow_thread_suspension) { - self->SetVerifier(this); + self->PushVerifier(this); DCHECK(class_def != nullptr); } MethodVerifier::~MethodVerifier() { - Thread::Current()->ClearVerifier(this); + Thread::Current()->PopVerifier(this); STLDeleteElements(&failure_messages_); } diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h index c813634c6e..8c0321ee4a 100644 --- a/runtime/verifier/method_verifier.h +++ b/runtime/verifier/method_verifier.h @@ -31,6 +31,7 @@ namespace art { class Instruction; struct ReferenceMap2Visitor; +class Thread; namespace verifier { @@ -738,6 +739,10 @@ class MethodVerifier { // FindLocksAtDexPC, resulting in deadlocks. const bool allow_thread_suspension_; + // Link, for the method verifier root linked list. + MethodVerifier* link_; + + friend class art::Thread; DISALLOW_COPY_AND_ASSIGN(MethodVerifier); }; std::ostream& operator<<(std::ostream& os, const MethodVerifier::FailureKind& rhs); diff --git a/test/107-int-math2/src/Main.java b/test/107-int-math2/src/Main.java index f0fe934ae9..6a6227cee5 100644 --- a/test/107-int-math2/src/Main.java +++ b/test/107-int-math2/src/Main.java @@ -379,7 +379,7 @@ class Main extends IntMathBase { */ static int lit16Test(int x) { - int[] results = new int[8]; + int[] results = new int[10]; /* try to generate op-int/lit16" instructions */ results[0] = x + 1000; @@ -390,6 +390,9 @@ class Main extends IntMathBase { results[5] = x & 1000; results[6] = x | -1000; results[7] = x ^ -1000; + /* use an 16-bit constant that has its MSB (bit-15) set */ + results[8] = x / 32769; + results[9] = x / -32769; if (results[0] != 78777) { return 1; } if (results[1] != -76777) { return 2; } @@ -399,6 +402,8 @@ class Main extends IntMathBase { if (results[5] != 960) { return 6; } if (results[6] != -39) { return 7; } if (results[7] != -76855) { return 8; } + if (results[8] != 2) { return 9; } + if (results[9] != -2) { return 10; } return 0; } diff --git a/test/407-arrays/src/Main.java b/test/407-arrays/src/Main.java index d5c5604541..4b833bebdc 100644 --- a/test/407-arrays/src/Main.java +++ b/test/407-arrays/src/Main.java @@ -19,9 +19,9 @@ public class Main extends TestCase { public static void main(String[] args) { $opt$testReads(new boolean[1], new byte[1], new char[1], new short[1], - new int[1], new Object[1], new long[1], 0); + new int[1], new Object[1], new long[1], new float[1], new double[1], 0); $opt$testWrites(new boolean[2], new byte[2], new char[2], new short[2], - new int[2], new Object[2], new long[2], 1); + new int[2], new Object[2], new long[2], new float[2], new double[2], 1); ensureThrows(new boolean[2], 2); ensureThrows(new boolean[2], 4); ensureThrows(new boolean[2], -1); @@ -30,7 +30,8 @@ public class Main extends TestCase { } static void $opt$testReads(boolean[] bools, byte[] bytes, char[] chars, short[] shorts, - int[] ints, Object[] objects, long[] longs, int index) { + int[] ints, Object[] objects, long[] longs, float[] floats, + double[] doubles, int index) { assertEquals(false, bools[0]); assertEquals(false, bools[index]); @@ -51,10 +52,17 @@ public class Main extends TestCase { assertEquals(0, longs[0]); assertEquals(0, longs[index]); + + assertEquals(0, floats[0]); + assertEquals(0, floats[index]); + + assertEquals(0, doubles[0]); + assertEquals(0, doubles[index]); } static void $opt$testWrites(boolean[] bools, byte[] bytes, char[] chars, short[] shorts, - int[] ints, Object[] objects, long[] longs, int index) { + int[] ints, Object[] objects, long[] longs, float[] floats, + double doubles[], int index) { bools[0] = true; assertEquals(true, bools[0]); bools[index] = true; @@ -99,6 +107,18 @@ public class Main extends TestCase { // on 32bits. So we call out a long helper to ensure this method gets // optimized. $opt$testLongWrites(longs, index); + + float f = 1.0F; + floats[0] = f / (1 | 0); + assertEquals(f, floats[0]); + floats[index] = f / (1 | 0); + assertEquals(f, floats[index]); + + double d = 1.0F; + doubles[0] = d / (1 | 0); + assertEquals(d, doubles[0]); + doubles[index] = d / (1 | 0); + assertEquals(d, doubles[index]); } public static void $opt$testLongWrites(long[] longs, int index) { diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java index ef6428ddd4..3cbcebbcb6 100644 --- a/test/458-checker-instruction-simplification/src/Main.java +++ b/test/458-checker-instruction-simplification/src/Main.java @@ -40,8 +40,10 @@ public class Main { // CHECK-START: long Main.Add0(long) instruction_simplifier (after) // CHECK-DAG: [[Arg:j\d+]] ParameterValue - // CHECK-NOT: Add // CHECK-DAG: Return [ [[Arg]] ] + // + // CHECK-START: long Main.Add0(long) instruction_simplifier (after) + // CHECK-NOT: Add public static long Add0(long arg) { return 0 + arg; @@ -55,9 +57,11 @@ public class Main { // CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (after) // CHECK-DAG: [[Arg:i\d+]] ParameterValue - // CHECK-NOT: And // CHECK-DAG: Return [ [[Arg]] ] + // CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (after) + // CHECK-NOT: And + public static int AndAllOnes(int arg) { return arg & -1; } @@ -70,9 +74,11 @@ public class Main { // CHECK-START: long Main.Div1(long) instruction_simplifier (after) // CHECK-DAG: [[Arg:j\d+]] ParameterValue - // CHECK-NOT: Div // CHECK-DAG: Return [ [[Arg]] ] + // CHECK-START: long Main.Div1(long) instruction_simplifier (after) + // CHECK-NOT: Div + public static long Div1(long arg) { return arg / 1; } @@ -86,9 +92,11 @@ public class Main { // CHECK-START: int Main.DivN1(int) instruction_simplifier (after) // CHECK-DAG: [[Arg:i\d+]] ParameterValue // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Arg]] ] - // CHECK-NOT: Div // CHECK-DAG: Return [ [[Neg]] ] + // CHECK-START: int Main.DivN1(int) instruction_simplifier (after) + // CHECK-NOT: Div + public static int DivN1(int arg) { return arg / -1; } @@ -101,9 +109,11 @@ public class Main { // CHECK-START: long Main.Mul1(long) instruction_simplifier (after) // CHECK-DAG: [[Arg:j\d+]] ParameterValue - // CHECK-NOT: Mul // CHECK-DAG: Return [ [[Arg]] ] + // CHECK-START: long Main.Mul1(long) instruction_simplifier (after) + // CHECK-NOT: Mul + public static long Mul1(long arg) { return arg * 1; } @@ -117,9 +127,11 @@ public class Main { // CHECK-START: int Main.MulN1(int) instruction_simplifier (after) // CHECK-DAG: [[Arg:i\d+]] ParameterValue // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Arg]] ] - // CHECK-NOT: Mul // CHECK-DAG: Return [ [[Neg]] ] + // CHECK-START: int Main.MulN1(int) instruction_simplifier (after) + // CHECK-NOT: Mul + public static int MulN1(int arg) { return arg * -1; } @@ -134,9 +146,11 @@ public class Main { // CHECK-DAG: [[Arg:j\d+]] ParameterValue // CHECK-DAG: [[Const7:i\d+]] IntConstant 7 // CHECK-DAG: [[Shl:j\d+]] Shl [ [[Arg]] [[Const7]] ] - // CHECK-NOT: Mul // CHECK-DAG: Return [ [[Shl]] ] + // CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (after) + // CHECK-NOT: Mul + public static long MulPowerOfTwo128(long arg) { return arg * 128; } @@ -149,9 +163,11 @@ public class Main { // CHECK-START: int Main.Or0(int) instruction_simplifier (after) // CHECK-DAG: [[Arg:i\d+]] ParameterValue - // CHECK-NOT: Or // CHECK-DAG: Return [ [[Arg]] ] + // CHECK-START: int Main.Or0(int) instruction_simplifier (after) + // CHECK-NOT: Or + public static int Or0(int arg) { return arg | 0; } @@ -163,9 +179,11 @@ public class Main { // CHECK-START: long Main.OrSame(long) instruction_simplifier (after) // CHECK-DAG: [[Arg:j\d+]] ParameterValue - // CHECK-NOT: Or // CHECK-DAG: Return [ [[Arg]] ] + // CHECK-START: long Main.OrSame(long) instruction_simplifier (after) + // CHECK-NOT: Or + public static long OrSame(long arg) { return arg | arg; } @@ -178,9 +196,11 @@ public class Main { // CHECK-START: int Main.Shl0(int) instruction_simplifier (after) // CHECK-DAG: [[Arg:i\d+]] ParameterValue - // CHECK-NOT: Shl // CHECK-DAG: Return [ [[Arg]] ] + // CHECK-START: int Main.Shl0(int) instruction_simplifier (after) + // CHECK-NOT: Shl + public static int Shl0(int arg) { return arg << 0; } @@ -193,9 +213,11 @@ public class Main { // CHECK-START: long Main.Shr0(long) instruction_simplifier (after) // CHECK-DAG: [[Arg:j\d+]] ParameterValue - // CHECK-NOT: Shr // CHECK-DAG: Return [ [[Arg]] ] + // CHECK-START: long Main.Shr0(long) instruction_simplifier (after) + // CHECK-NOT: Shr + public static long Shr0(long arg) { return arg >> 0; } @@ -208,9 +230,11 @@ public class Main { // CHECK-START: long Main.Sub0(long) instruction_simplifier (after) // CHECK-DAG: [[Arg:j\d+]] ParameterValue - // CHECK-NOT: Sub // CHECK-DAG: Return [ [[Arg]] ] + // CHECK-START: long Main.Sub0(long) instruction_simplifier (after) + // CHECK-NOT: Sub + public static long Sub0(long arg) { return arg - 0; } @@ -224,9 +248,11 @@ public class Main { // CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (after) // CHECK-DAG: [[Arg:i\d+]] ParameterValue // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Arg]] ] - // CHECK-NOT: Sub // CHECK-DAG: Return [ [[Neg]] ] + // CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (after) + // CHECK-NOT: Sub + public static int SubAliasNeg(int arg) { return 0 - arg; } @@ -239,9 +265,11 @@ public class Main { // CHECK-START: long Main.UShr0(long) instruction_simplifier (after) // CHECK-DAG: [[Arg:j\d+]] ParameterValue - // CHECK-NOT: UShr // CHECK-DAG: Return [ [[Arg]] ] + // CHECK-START: long Main.UShr0(long) instruction_simplifier (after) + // CHECK-NOT: UShr + public static long UShr0(long arg) { return arg >>> 0; } @@ -254,9 +282,11 @@ public class Main { // CHECK-START: int Main.Xor0(int) instruction_simplifier (after) // CHECK-DAG: [[Arg:i\d+]] ParameterValue - // CHECK-NOT: Xor // CHECK-DAG: Return [ [[Arg]] ] + // CHECK-START: int Main.Xor0(int) instruction_simplifier (after) + // CHECK-NOT: Xor + public static int Xor0(int arg) { return arg ^ 0; } @@ -270,13 +300,466 @@ public class Main { // CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (after) // CHECK-DAG: [[Arg:i\d+]] ParameterValue // CHECK-DAG: [[Not:i\d+]] Not [ [[Arg]] ] - // CHECK-NOT: Xor // CHECK-DAG: Return [ [[Not]] ] + // CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (after) + // CHECK-NOT: Xor + public static int XorAllOnes(int arg) { return arg ^ -1; } + /** + * Test that addition or subtraction operation with both inputs negated are + * optimized to use a single negation after the operation. + * The transformation tested is implemented in + * `InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop`. + */ + + // CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (before) + // CHECK-DAG: [[Arg1:i\d+]] ParameterValue + // CHECK-DAG: [[Arg2:i\d+]] ParameterValue + // CHECK-DAG: [[Neg1:i\d+]] Neg [ [[Arg1]] ] + // CHECK-DAG: [[Neg2:i\d+]] Neg [ [[Arg2]] ] + // CHECK-DAG: [[Add:i\d+]] Add [ [[Neg1]] [[Neg2]] ] + // CHECK-DAG: Return [ [[Add]] ] + + // CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (after) + // CHECK-DAG: [[Arg1:i\d+]] ParameterValue + // CHECK-DAG: [[Arg2:i\d+]] ParameterValue + // CHECK-NOT: Neg + // CHECK-DAG: [[Add:i\d+]] Add [ [[Arg1]] [[Arg2]] ] + // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Add]] ] + // CHECK-DAG: Return [ [[Neg]] ] + + public static int AddNegs1(int arg1, int arg2) { + return -arg1 + -arg2; + } + + /** + * This is similar to the test-case AddNegs1, but the negations have + * multiple uses. + * The transformation tested is implemented in + * `InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop`. + * The current code won't perform the previous optimization. The + * transformations do not look at other uses of their inputs. As they don't + * know what will happen with other uses, they do not take the risk of + * increasing the register pressure by creating or extending live ranges. + */ + + // CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (before) + // CHECK-DAG: [[Arg1:i\d+]] ParameterValue + // CHECK-DAG: [[Arg2:i\d+]] ParameterValue + // CHECK-DAG: [[Neg1:i\d+]] Neg [ [[Arg1]] ] + // CHECK-DAG: [[Neg2:i\d+]] Neg [ [[Arg2]] ] + // CHECK-DAG: [[Add1:i\d+]] Add [ [[Neg1]] [[Neg2]] ] + // CHECK-DAG: [[Add2:i\d+]] Add [ [[Neg1]] [[Neg2]] ] + // CHECK-DAG: [[Or:i\d+]] Or [ [[Add1]] [[Add2]] ] + // CHECK-DAG: Return [ [[Or]] ] + + // CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (after) + // CHECK-DAG: [[Arg1:i\d+]] ParameterValue + // CHECK-DAG: [[Arg2:i\d+]] ParameterValue + // CHECK-DAG: [[Neg1:i\d+]] Neg [ [[Arg1]] ] + // CHECK-DAG: [[Neg2:i\d+]] Neg [ [[Arg2]] ] + // CHECK-DAG: [[Add1:i\d+]] Add [ [[Neg1]] [[Neg2]] ] + // CHECK-DAG: [[Add2:i\d+]] Add [ [[Neg1]] [[Neg2]] ] + // CHECK-NOT: Neg + // CHECK-DAG: [[Or:i\d+]] Or [ [[Add1]] [[Add2]] ] + // CHECK-DAG: Return [ [[Or]] ] + + public static int AddNegs2(int arg1, int arg2) { + int temp1 = -arg1; + int temp2 = -arg2; + return (temp1 + temp2) | (temp1 + temp2); + } + + /** + * This follows test-cases AddNegs1 and AddNegs2. + * The transformation tested is implemented in + * `InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop`. + * The optimization should not happen if it moves an additional instruction in + * the loop. + */ + + // CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (before) + // -------------- Arguments and initial negation operations. + // CHECK-DAG: [[Arg1:j\d+]] ParameterValue + // CHECK-DAG: [[Arg2:j\d+]] ParameterValue + // CHECK-DAG: [[Neg1:j\d+]] Neg [ [[Arg1]] ] + // CHECK-DAG: [[Neg2:j\d+]] Neg [ [[Arg2]] ] + // CHECK: Goto + // -------------- Loop + // CHECK: SuspendCheck + // CHECK: [[Add:j\d+]] Add [ [[Neg1]] [[Neg2]] ] + // CHECK: Goto + + // CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (after) + // -------------- Arguments and initial negation operations. + // CHECK-DAG: [[Arg1:j\d+]] ParameterValue + // CHECK-DAG: [[Arg2:j\d+]] ParameterValue + // CHECK-DAG: [[Neg1:j\d+]] Neg [ [[Arg1]] ] + // CHECK-DAG: [[Neg2:j\d+]] Neg [ [[Arg2]] ] + // CHECK: Goto + // -------------- Loop + // CHECK: SuspendCheck + // CHECK: [[Add:j\d+]] Add [ [[Neg1]] [[Neg2]] ] + // CHECK-NOT: Neg + // CHECK: Goto + + public static long AddNegs3(long arg1, long arg2) { + long res = 0; + long n_arg1 = -arg1; + long n_arg2 = -arg2; + for (long i = 0; i < 1; i++) { + res += n_arg1 + n_arg2 + i; + } + return res; + } + + /** + * Test the simplification of an addition with a negated argument into a + * subtraction. + * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitAdd`. + */ + + // CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (before) + // CHECK-DAG: [[Arg1:j\d+]] ParameterValue + // CHECK-DAG: [[Arg2:j\d+]] ParameterValue + // CHECK-DAG: [[Neg:j\d+]] Neg [ [[Arg1]] ] + // CHECK-DAG: [[Add:j\d+]] Add [ [[Neg]] [[Arg2]] ] + // CHECK-DAG: Return [ [[Add]] ] + + // CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after) + // CHECK-DAG: [[Arg1:j\d+]] ParameterValue + // CHECK-DAG: [[Arg2:j\d+]] ParameterValue + // CHECK-DAG: [[Sub:j\d+]] Sub [ [[Arg2]] [[Arg1]] ] + // CHECK-DAG: Return [ [[Sub]] ] + + // CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after) + // CHECK-NOT: Neg + // CHECK-NOT: Add + + public static long AddNeg1(long arg1, long arg2) { + return -arg1 + arg2; + } + + /** + * This is similar to the test-case AddNeg1, but the negation has two uses. + * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitAdd`. + * The current code won't perform the previous optimization. The + * transformations do not look at other uses of their inputs. As they don't + * know what will happen with other uses, they do not take the risk of + * increasing the register pressure by creating or extending live ranges. + */ + + // CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (before) + // CHECK-DAG: [[Arg1:j\d+]] ParameterValue + // CHECK-DAG: [[Arg2:j\d+]] ParameterValue + // CHECK-DAG: [[Neg:j\d+]] Neg [ [[Arg2]] ] + // CHECK-DAG: [[Add1:j\d+]] Add [ [[Arg1]] [[Neg]] ] + // CHECK-DAG: [[Add2:j\d+]] Add [ [[Arg1]] [[Neg]] ] + // CHECK-DAG: [[Res:j\d+]] Or [ [[Add1]] [[Add2]] ] + // CHECK-DAG: Return [ [[Res]] ] + + // CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after) + // CHECK-DAG: [[Arg1:j\d+]] ParameterValue + // CHECK-DAG: [[Arg2:j\d+]] ParameterValue + // CHECK-DAG: [[Neg:j\d+]] Neg [ [[Arg2]] ] + // CHECK-DAG: [[Add1:j\d+]] Add [ [[Arg1]] [[Neg]] ] + // CHECK-DAG: [[Add2:j\d+]] Add [ [[Arg1]] [[Neg]] ] + // CHECK-DAG: [[Res:j\d+]] Or [ [[Add1]] [[Add2]] ] + // CHECK-DAG: Return [ [[Res]] ] + + // CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after) + // CHECK-NOT: Sub + + public static long AddNeg2(long arg1, long arg2) { + long temp = -arg2; + return (arg1 + temp) | (arg1 + temp); + } + + /** + * Test simplification of the `-(-var)` pattern. + * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`. + */ + + // CHECK-START: long Main.NegNeg1(long) instruction_simplifier (before) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Neg1:j\d+]] Neg [ [[Arg]] ] + // CHECK-DAG: [[Neg2:j\d+]] Neg [ [[Neg1]] ] + // CHECK-DAG: Return [ [[Neg2]] ] + + // CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: Return [ [[Arg]] ] + + // CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after) + // CHECK-NOT: Neg + + public static long NegNeg1(long arg) { + return -(-arg); + } + + /** + * Test 'multi-step' simplification, where a first transformation yields a + * new simplification possibility for the current instruction. + * The transformations tested are implemented in `InstructionSimplifierVisitor::VisitNeg` + * and in `InstructionSimplifierVisitor::VisitAdd`. + */ + + // CHECK-START: int Main.NegNeg2(int) instruction_simplifier (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Neg1:i\d+]] Neg [ [[Arg]] ] + // CHECK-DAG: [[Neg2:i\d+]] Neg [ [[Neg1]] ] + // CHECK-DAG: [[Add:i\d+]] Add [ [[Neg1]] [[Neg2]] ] + // CHECK-DAG: Return [ [[Add]] ] + + // CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Arg]] [[Arg]] ] + // CHECK-DAG: Return [ [[Sub]] ] + + // CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after) + // CHECK-NOT: Neg + // CHECK-NOT: Add + + public static int NegNeg2(int arg) { + int temp = -arg; + return temp + -temp; + } + + /** + * Test another 'multi-step' simplification, where a first transformation + * yields a new simplification possibility for the current instruction. + * The transformations tested are implemented in `InstructionSimplifierVisitor::VisitNeg` + * and in `InstructionSimplifierVisitor::VisitSub`. + */ + + // CHECK-START: long Main.NegNeg3(long) instruction_simplifier (before) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[Const0:j\d+]] LongConstant 0 + // CHECK-DAG: [[Neg:j\d+]] Neg [ [[Arg]] ] + // CHECK-DAG: [[Sub:j\d+]] Sub [ [[Const0]] [[Neg]] ] + // CHECK-DAG: Return [ [[Sub]] ] + + // CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: Return [ [[Arg]] ] + + // CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after) + // CHECK-NOT: Neg + // CHECK-NOT: Sub + + public static long NegNeg3(long arg) { + return 0 - -arg; + } + + /** + * Test that a negated subtraction is simplified to a subtraction with its + * arguments reversed. + * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`. + */ + + // CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (before) + // CHECK-DAG: [[Arg1:i\d+]] ParameterValue + // CHECK-DAG: [[Arg2:i\d+]] ParameterValue + // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Arg1]] [[Arg2]] ] + // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Sub]] ] + // CHECK-DAG: Return [ [[Neg]] ] + + // CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after) + // CHECK-DAG: [[Arg1:i\d+]] ParameterValue + // CHECK-DAG: [[Arg2:i\d+]] ParameterValue + // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Arg2]] [[Arg1]] ] + // CHECK-DAG: Return [ [[Sub]] ] + + // CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after) + // CHECK-NOT: Neg + + public static int NegSub1(int arg1, int arg2) { + return -(arg1 - arg2); + } + + /** + * This is similar to the test-case NegSub1, but the subtraction has + * multiple uses. + * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`. + * The current code won't perform the previous optimization. The + * transformations do not look at other uses of their inputs. As they don't + * know what will happen with other uses, they do not take the risk of + * increasing the register pressure by creating or extending live ranges. + */ + + // CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (before) + // CHECK-DAG: [[Arg1:i\d+]] ParameterValue + // CHECK-DAG: [[Arg2:i\d+]] ParameterValue + // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Arg1]] [[Arg2]] ] + // CHECK-DAG: [[Neg1:i\d+]] Neg [ [[Sub]] ] + // CHECK-DAG: [[Neg2:i\d+]] Neg [ [[Sub]] ] + // CHECK-DAG: [[Or:i\d+]] Or [ [[Neg1]] [[Neg2]] ] + // CHECK-DAG: Return [ [[Or]] ] + + // CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (after) + // CHECK-DAG: [[Arg1:i\d+]] ParameterValue + // CHECK-DAG: [[Arg2:i\d+]] ParameterValue + // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Arg1]] [[Arg2]] ] + // CHECK-DAG: [[Neg1:i\d+]] Neg [ [[Sub]] ] + // CHECK-DAG: [[Neg2:i\d+]] Neg [ [[Sub]] ] + // CHECK-DAG: [[Or:i\d+]] Or [ [[Neg1]] [[Neg2]] ] + // CHECK-DAG: Return [ [[Or]] ] + + public static int NegSub2(int arg1, int arg2) { + int temp = arg1 - arg2; + return -temp | -temp; + } + + /** + * Test simplification of the `~~var` pattern. + * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNot`. + */ + + // CHECK-START: long Main.NotNot1(long) instruction_simplifier (before) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: [[ConstF1:j\d+]] LongConstant -1 + // CHECK-DAG: [[Xor1:j\d+]] Xor [ [[Arg]] [[ConstF1]] ] + // CHECK-DAG: [[Xor2:j\d+]] Xor [ [[Xor1]] [[ConstF1]] ] + // CHECK-DAG: Return [ [[Xor2]] ] + + // CHECK-START: long Main.NotNot1(long) instruction_simplifier (after) + // CHECK-DAG: [[Arg:j\d+]] ParameterValue + // CHECK-DAG: Return [ [[Arg]] ] + + // CHECK-START: long Main.NotNot1(long) instruction_simplifier (after) + // CHECK-NOT: Xor + + public static long NotNot1(long arg) { + return ~~arg; + } + + // CHECK-START: int Main.NotNot2(int) instruction_simplifier (before) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[ConstF1:i\d+]] IntConstant -1 + // CHECK-DAG: [[Xor1:i\d+]] Xor [ [[Arg]] [[ConstF1]] ] + // CHECK-DAG: [[Xor2:i\d+]] Xor [ [[Xor1]] [[ConstF1]] ] + // CHECK-DAG: [[Add:i\d+]] Add [ [[Xor1]] [[Xor2]] ] + // CHECK-DAG: Return [ [[Add]] ] + + // CHECK-START: int Main.NotNot2(int) instruction_simplifier (after) + // CHECK-DAG: [[Arg:i\d+]] ParameterValue + // CHECK-DAG: [[Not:i\d+]] Not [ [[Arg]] ] + // CHECK-DAG: [[Add:i\d+]] Add [ [[Not]] [[Arg]] ] + // CHECK-DAG: Return [ [[Add]] ] + + // CHECK-START: int Main.NotNot2(int) instruction_simplifier (after) + // CHECK-NOT: Xor + + public static int NotNot2(int arg) { + int temp = ~arg; + return temp + ~temp; + } + + /** + * Test the simplification of a subtraction with a negated argument. + * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitSub`. + */ + + // CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (before) + // CHECK-DAG: [[Arg1:i\d+]] ParameterValue + // CHECK-DAG: [[Arg2:i\d+]] ParameterValue + // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Arg1]] ] + // CHECK-DAG: [[Sub:i\d+]] Sub [ [[Neg]] [[Arg2]] ] + // CHECK-DAG: Return [ [[Sub]] ] + + // CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after) + // CHECK-DAG: [[Arg1:i\d+]] ParameterValue + // CHECK-DAG: [[Arg2:i\d+]] ParameterValue + // CHECK-DAG: [[Add:i\d+]] Add [ [[Arg1]] [[Arg2]] ] + // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Add]] ] + // CHECK-DAG: Return [ [[Neg]] ] + + // CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after) + // CHECK-NOT: Sub + + public static int SubNeg1(int arg1, int arg2) { + return -arg1 - arg2; + } + + /** + * This is similar to the test-case SubNeg1, but the negation has + * multiple uses. + * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitSub`. + * The current code won't perform the previous optimization. The + * transformations do not look at other uses of their inputs. As they don't + * know what will happen with other uses, they do not take the risk of + * increasing the register pressure by creating or extending live ranges. + */ + + // CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (before) + // CHECK-DAG: [[Arg1:i\d+]] ParameterValue + // CHECK-DAG: [[Arg2:i\d+]] ParameterValue + // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Arg1]] ] + // CHECK-DAG: [[Sub1:i\d+]] Sub [ [[Neg]] [[Arg2]] ] + // CHECK-DAG: [[Sub2:i\d+]] Sub [ [[Neg]] [[Arg2]] ] + // CHECK-DAG: [[Or:i\d+]] Or [ [[Sub1]] [[Sub2]] ] + // CHECK-DAG: Return [ [[Or]] ] + + // CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after) + // CHECK-DAG: [[Arg1:i\d+]] ParameterValue + // CHECK-DAG: [[Arg2:i\d+]] ParameterValue + // CHECK-DAG: [[Neg:i\d+]] Neg [ [[Arg1]] ] + // CHECK-DAG: [[Sub1:i\d+]] Sub [ [[Neg]] [[Arg2]] ] + // CHECK-DAG: [[Sub2:i\d+]] Sub [ [[Neg]] [[Arg2]] ] + // CHECK-DAG: [[Or:i\d+]] Or [ [[Sub1]] [[Sub2]] ] + // CHECK-DAG: Return [ [[Or]] ] + + // CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after) + // CHECK-NOT: Add + + public static int SubNeg2(int arg1, int arg2) { + int temp = -arg1; + return (temp - arg2) | (temp - arg2); + } + + /** + * This follows test-cases SubNeg1 and SubNeg2. + * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitSub`. + * The optimization should not happen if it moves an additional instruction in + * the loop. + */ + + // CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (before) + // -------------- Arguments and initial negation operation. + // CHECK-DAG: [[Arg1:j\d+]] ParameterValue + // CHECK-DAG: [[Arg2:j\d+]] ParameterValue + // CHECK-DAG: [[Neg:j\d+]] Neg [ [[Arg1]] ] + // CHECK: Goto + // -------------- Loop + // CHECK: SuspendCheck + // CHECK: [[Sub:j\d+]] Sub [ [[Neg]] [[Arg2]] ] + // CHECK: Goto + + // CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (after) + // -------------- Arguments and initial negation operation. + // CHECK-DAG: [[Arg1:j\d+]] ParameterValue + // CHECK-DAG: [[Arg2:j\d+]] ParameterValue + // CHECK-DAG: [[Neg:j\d+]] Neg [ [[Arg1]] ] + // CHECK-DAG: Goto + // -------------- Loop + // CHECK: SuspendCheck + // CHECK: [[Sub:j\d+]] Sub [ [[Neg]] [[Arg2]] ] + // CHECK-NOT: Neg + // CHECK: Goto + + public static long SubNeg3(long arg1, long arg2) { + long res = 0; + long temp = -arg1; + for (long i = 0; i < 1; i++) { + res += temp - arg2 - i; + } + return res; + } + public static void main(String[] args) { int arg = 123456; @@ -296,5 +779,20 @@ public class Main { assertLongEquals(UShr0(arg), arg); assertIntEquals(Xor0(arg), arg); assertIntEquals(XorAllOnes(arg), ~arg); + assertIntEquals(AddNegs1(arg, arg + 1), -(arg + arg + 1)); + assertIntEquals(AddNegs2(arg, arg + 1), -(arg + arg + 1)); + assertLongEquals(AddNegs3(arg, arg + 1), -(2 * arg + 1)); + assertLongEquals(AddNeg1(arg, arg + 1), 1); + assertLongEquals(AddNeg2(arg, arg + 1), -1); + assertLongEquals(NegNeg1(arg), arg); + assertIntEquals(NegNeg2(arg), 0); + assertLongEquals(NegNeg3(arg), arg); + assertIntEquals(NegSub1(arg, arg + 1), 1); + assertIntEquals(NegSub2(arg, arg + 1), 1); + assertLongEquals(NotNot1(arg), arg); + assertIntEquals(NotNot2(arg), -1); + assertIntEquals(SubNeg1(arg, arg + 1), -(arg + arg + 1)); + assertIntEquals(SubNeg2(arg, arg + 1), -(arg + arg + 1)); + assertLongEquals(SubNeg3(arg, arg + 1), -(2 * arg + 1)); } } |