diff options
90 files changed, 3419 insertions, 1270 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 0e2dad9355..10bb90b5ad 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -188,6 +188,7 @@ COMPILER_GTEST_COMMON_SRC_FILES := \ compiler/dex/local_value_numbering_test.cc \ compiler/dex/mir_graph_test.cc \ compiler/dex/mir_optimization_test.cc \ + compiler/dex/quick/quick_cfi_test.cc \ compiler/dwarf/dwarf_test.cc \ compiler/driver/compiler_driver_test.cc \ compiler/elf_writer_test.cc \ @@ -405,7 +406,7 @@ define define-art-gtest LOCAL_CPP_EXTENSION := $$(ART_CPP_EXTENSION) LOCAL_SRC_FILES := $$(art_gtest_filename) LOCAL_C_INCLUDES += $$(ART_C_INCLUDES) art/runtime $$(art_gtest_extra_c_includes) - LOCAL_SHARED_LIBRARIES += libartd $$(art_gtest_extra_shared_libraries) libart-gtest + LOCAL_SHARED_LIBRARIES += libartd $$(art_gtest_extra_shared_libraries) libart-gtest libart-disassembler LOCAL_WHOLE_STATIC_LIBRARIES += libsigchain LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk diff --git a/compiler/Android.mk b/compiler/Android.mk index eaea031b62..94322a8315 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -41,6 +41,7 @@ LIBART_COMPILER_SRC_FILES := \ dex/quick/gen_common.cc \ dex/quick/gen_invoke.cc \ dex/quick/gen_loadstore.cc \ + dex/quick/lazy_debug_frame_opcode_writer.cc \ dex/quick/local_optimizations.cc \ dex/quick/mips/assemble_mips.cc \ dex/quick/mips/call_mips.cc \ @@ -103,6 +104,7 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/code_generator_arm64.cc \ optimizing/code_generator_x86.cc \ optimizing/code_generator_x86_64.cc \ + optimizing/code_generator_utils.cc \ optimizing/constant_folding.cc \ optimizing/dead_code_elimination.cc \ optimizing/graph_checker.cc \ @@ -138,7 +140,6 @@ LIBART_COMPILER_SRC_FILES := \ utils/arm64/assembler_arm64.cc \ utils/arm64/managed_register_arm64.cc \ utils/assembler.cc \ - utils/dwarf_cfi.cc \ utils/mips/assembler_mips.cc \ utils/mips/managed_register_mips.cc \ utils/mips64/assembler_mips64.cc \ diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h new file mode 100644 index 0000000000..f550395dad --- /dev/null +++ b/compiler/cfi_test.h @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_CFI_TEST_H_ +#define ART_COMPILER_CFI_TEST_H_ + +#include <vector> +#include <memory> +#include <sstream> + +#include "arch/instruction_set.h" +#include "dwarf/debug_frame_writer.h" +#include "dwarf/dwarf_test.h" +#include "disassembler/disassembler.h" +#include "gtest/gtest.h" + +namespace art { + +class CFITest : public dwarf::DwarfTest { + public: + void GenerateExpected(FILE* f, InstructionSet isa, const char* isa_str, + const std::vector<uint8_t>& actual_asm, + const std::vector<uint8_t>& actual_cfi) { + std::vector<std::string> lines; + // Print the raw bytes. + fprintf(f, "static constexpr uint8_t expected_asm_%s[] = {", isa_str); + HexDump(f, actual_asm); + fprintf(f, "\n};\n"); + fprintf(f, "static constexpr uint8_t expected_cfi_%s[] = {", isa_str); + HexDump(f, actual_cfi); + fprintf(f, "\n};\n"); + // Pretty-print CFI opcodes. + dwarf::DebugFrameWriter<> eh_frame(&eh_frame_data_, false); + eh_frame.WriteCIE(dwarf::Reg(8), {}); + eh_frame.WriteFDE(0, actual_asm.size(), actual_cfi.data(), actual_cfi.size()); + ReformatCfi(Objdump(false, "-W"), &lines); + // Pretty-print assembly. + auto* opts = new DisassemblerOptions(false, actual_asm.data(), true); + std::unique_ptr<Disassembler> disasm(Disassembler::Create(isa, opts)); + std::stringstream stream; + const uint8_t* base = actual_asm.data() + (isa == kThumb2 ? 1 : 0); + disasm->Dump(stream, base, base + actual_asm.size()); + ReformatAsm(&stream, &lines); + // Print CFI and assembly interleaved. + std::stable_sort(lines.begin(), lines.end(), CompareByAddress); + for (const std::string& line : lines) { + fprintf(f, "// %s\n", line.c_str()); + } + fprintf(f, "\n"); + } + + private: + // Helper - get offset just past the end of given string. + static size_t FindEndOf(const std::string& str, const char* substr) { + size_t pos = str.find(substr); + CHECK_NE(std::string::npos, pos); + return pos + strlen(substr); + } + + // Spit to lines and remove raw instruction bytes. + static void ReformatAsm(std::stringstream* stream, + std::vector<std::string>* output) { + std::string line; + while (std::getline(*stream, line)) { + line = line.substr(0, FindEndOf(line, ": ")) + + line.substr(FindEndOf(line, "\t")); + size_t pos; + while ((pos = line.find(" ")) != std::string::npos) { + line = line.replace(pos, 2, " "); + } + while (!line.empty() && line.back() == ' ') { + line.pop_back(); + } + output->push_back(line); + } + } + + // Find interesting parts of objdump output and prefix the lines with address. + static void ReformatCfi(const std::vector<std::string>& lines, + std::vector<std::string>* output) { + std::string address; + for (const std::string& line : lines) { + if (line.find("DW_CFA_nop") != std::string::npos) { + // Ignore. + } else if (line.find("DW_CFA_advance_loc") != std::string::npos) { + // The last 8 characters are the address. + address = "0x" + line.substr(line.size() - 8); + } else if (line.find("DW_CFA_") != std::string::npos) { + std::string new_line(line); + // "bad register" warning is caused by always using host (x86) objdump. + const char* bad_reg = "bad register: "; + size_t pos; + if ((pos = new_line.find(bad_reg)) != std::string::npos) { + new_line = new_line.replace(pos, strlen(bad_reg), ""); + } + // Remove register names in parentheses since they have x86 names. + if ((pos = new_line.find(" (")) != std::string::npos) { + new_line = new_line.replace(pos, FindEndOf(new_line, ")") - pos, ""); + } + // Use the .cfi_ prefix. + new_line = ".cfi_" + new_line.substr(FindEndOf(new_line, "DW_CFA_")); + output->push_back(address + ": " + new_line); + } + } + } + + // Compare strings by the address prefix. + static bool CompareByAddress(const std::string& lhs, const std::string& rhs) { + EXPECT_EQ(lhs[10], ':'); + EXPECT_EQ(rhs[10], ':'); + return strncmp(lhs.c_str(), rhs.c_str(), 10) < 0; + } + + // Pretty-print byte array. 12 bytes per line. + static void HexDump(FILE* f, const std::vector<uint8_t>& data) { + for (size_t i = 0; i < data.size(); i++) { + fprintf(f, i % 12 == 0 ? "\n " : " "); // Whitespace. + fprintf(f, "0x%02X,", data[i]); + } + } +}; + +} // namespace art + +#endif // ART_COMPILER_CFI_TEST_H_ diff --git a/compiler/compiler.h b/compiler/compiler.h index 6ec39f9605..a04641e3fa 100644 --- a/compiler/compiler.h +++ b/compiler/compiler.h @@ -107,6 +107,9 @@ class Compiler { return driver_; } + // Whether to produce 64-bit ELF files for 64-bit targets. Leave this off for now. + static constexpr bool kProduce64BitELFFiles = false; + private: CompilerDriver* const driver_; const uint64_t maximum_compilation_time_before_warning_; diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h index 93d83c6fd4..0850f42a9a 100644 --- a/compiler/dex/bb_optimizations.h +++ b/compiler/dex/bb_optimizations.h @@ -403,13 +403,6 @@ class SuspendCheckElimination : public PassME { DCHECK(bb != nullptr); return c_unit->mir_graph->EliminateSuspendChecks(bb); } - - void End(PassDataHolder* data) const { - DCHECK(data != nullptr); - CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; - DCHECK(c_unit != nullptr); - c_unit->mir_graph->EliminateSuspendChecksEnd(); - } }; } // namespace art diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc index f638b0bf4d..2a920a4e29 100644 --- a/compiler/dex/mir_dataflow.cc +++ b/compiler/dex/mir_dataflow.cc @@ -1396,6 +1396,13 @@ void MIRGraph::CompilerInitializeSSAConversion() { InitializeBasicBlockDataFlow(); } +uint32_t MIRGraph::GetUseCountWeight(BasicBlock* bb) const { + // Each level of nesting adds *100 to count, up to 3 levels deep. + uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth)); + uint32_t weight = std::max(1U, depth * 100); + return weight; +} + /* * Count uses, weighting by loop nesting depth. This code only * counts explicitly used s_regs. A later phase will add implicit @@ -1405,9 +1412,7 @@ void MIRGraph::CountUses(BasicBlock* bb) { if (bb->block_type != kDalvikByteCode) { return; } - // Each level of nesting adds *100 to count, up to 3 levels deep. - uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth)); - uint32_t weight = std::max(1U, depth * 100); + uint32_t weight = GetUseCountWeight(bb); for (MIR* mir = bb->first_mir_insn; (mir != NULL); mir = mir->next) { if (mir->ssa_rep == NULL) { continue; @@ -1417,23 +1422,6 @@ void MIRGraph::CountUses(BasicBlock* bb) { raw_use_counts_[s_reg] += 1u; use_counts_[s_reg] += weight; } - if (!(cu_->disable_opt & (1 << kPromoteCompilerTemps))) { - uint64_t df_attributes = GetDataFlowAttributes(mir); - // Implicit use of Method* ? */ - if (df_attributes & DF_UMS) { - /* - * Some invokes will not use Method* - need to perform test similar - * to that found in GenInvoke() to decide whether to count refs - * for Method* on invoke-class opcodes. This is a relatively expensive - * operation, so should only be done once. - * TODO: refactor InvokeUsesMethodStar() to perform check at parse time, - * and save results for both here and GenInvoke. For now, go ahead - * and assume all invokes use method*. - */ - raw_use_counts_[method_sreg_] += 1u; - use_counts_[method_sreg_] += weight; - } - } } } diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 58f12c94e4..4d340387f2 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -1609,8 +1609,8 @@ void MIRGraph::ReplaceSpecialChars(std::string& str) { } std::string MIRGraph::GetSSAName(int ssa_reg) { - // TODO: This value is needed for LLVM and debugging. Currently, we compute this and then copy to - // the arena. We should be smarter and just place straight into the arena, or compute the + // TODO: This value is needed for debugging. Currently, we compute this and then copy to the + // arena. We should be smarter and just place straight into the arena, or compute the // value more lazily. int vreg = SRegToVReg(ssa_reg); if (vreg >= static_cast<int>(GetFirstTempVR())) { diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 3298af1162..85b13448da 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -960,6 +960,12 @@ class MIRGraph { */ CompilerTemp* GetNewCompilerTemp(CompilerTempType ct_type, bool wide); + /** + * @brief Used to remove last created compiler temporary when it's not needed. + * @param temp the temporary to remove. + */ + void RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp); + bool MethodIsLeaf() { return attributes_ & METHOD_IS_LEAF; } @@ -1079,7 +1085,6 @@ class MIRGraph { void EliminateDeadCodeEnd(); bool EliminateSuspendChecksGate(); bool EliminateSuspendChecks(BasicBlock* bb); - void EliminateSuspendChecksEnd(); uint16_t GetGvnIFieldId(MIR* mir) const { DCHECK(IsInstructionIGetOrIPut(mir->dalvikInsn.opcode)); @@ -1185,6 +1190,12 @@ class MIRGraph { void DoConstantPropagation(BasicBlock* bb); /** + * @brief Get use count weight for a given block. + * @param bb the BasicBlock. + */ + uint32_t GetUseCountWeight(BasicBlock* bb) const; + + /** * @brief Count the uses in the BasicBlock * @param bb the BasicBlock */ @@ -1396,10 +1407,6 @@ class MIRGraph { uint16_t* sfield_ids; // Ditto. GvnDeadCodeElimination* dce; } gvn; - // Suspend check elimination. - struct { - DexFileMethodInliner* inliner; - } sce; } temp_; static const int kInvalidEntry = -1; ArenaVector<BasicBlock*> block_list_; @@ -1451,6 +1458,7 @@ class MIRGraph { friend class GvnDeadCodeEliminationTest; friend class LocalValueNumberingTest; friend class TopologicalSortOrderTest; + friend class QuickCFITest; }; } // namespace art diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc index 831ad42682..0c84b82edd 100644 --- a/compiler/dex/mir_method_info.cc +++ b/compiler/dex/mir_method_info.cc @@ -16,6 +16,8 @@ # include "mir_method_info.h" +#include "dex/quick/dex_file_method_inliner.h" +#include "dex/quick/dex_file_to_method_inliner_map.h" #include "dex/verified_method.h" #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" @@ -64,6 +66,9 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver, const DexFile* const dex_file = mUnit->GetDexFile(); const bool use_jit = runtime->UseJit(); const VerifiedMethod* const verified_method = mUnit->GetVerifiedMethod(); + DexFileToMethodInlinerMap* inliner_map = compiler_driver->GetMethodInlinerMap(); + DexFileMethodInliner* default_inliner = + (inliner_map != nullptr) ? inliner_map->GetMethodInliner(dex_file) : nullptr; for (auto it = method_infos, end = method_infos + count; it != end; ++it) { // For quickened invokes, the dex method idx is actually the mir offset. @@ -122,6 +127,7 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver, if (UNLIKELY(resolved_method == nullptr)) { continue; } + compiler_driver->GetResolvedMethodDexFileLocation(resolved_method, &it->declaring_dex_file_, &it->declaring_class_idx_, &it->declaring_method_idx_); if (!it->IsQuickened()) { @@ -133,6 +139,7 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver, it->vtable_idx_ = compiler_driver->GetResolvedMethodVTableIndex(resolved_method, invoke_type); } + MethodReference target_method(it->target_dex_file_, it->target_method_idx_); int fast_path_flags = compiler_driver->IsFastInvoke( soa, current_dex_cache, class_loader, mUnit, referrer_class.Get(), resolved_method, @@ -140,10 +147,23 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver, const bool is_referrers_class = referrer_class.Get() == resolved_method->GetDeclaringClass(); const bool is_class_initialized = compiler_driver->IsMethodsClassInitialized(referrer_class.Get(), resolved_method); + + // Check if the target method is intrinsic or special. + InlineMethodFlags is_intrinsic_or_special = kNoInlineMethodFlags; + if (inliner_map != nullptr) { + auto* inliner = (target_method.dex_file == dex_file) + ? default_inliner + : inliner_map->GetMethodInliner(target_method.dex_file); + is_intrinsic_or_special = inliner->IsIntrinsicOrSpecial(target_method.dex_method_index); + } + uint16_t other_flags = it->flags_ & - ~(kFlagFastPath | kFlagClassIsInitialized | (kInvokeTypeMask << kBitSharpTypeBegin)); + ~(kFlagFastPath | kFlagIsIntrinsic | kFlagIsSpecial | kFlagClassIsInitialized | + (kInvokeTypeMask << kBitSharpTypeBegin)); it->flags_ = other_flags | (fast_path_flags != 0 ? kFlagFastPath : 0u) | + ((is_intrinsic_or_special & kInlineIntrinsic) != 0 ? kFlagIsIntrinsic : 0u) | + ((is_intrinsic_or_special & kInlineSpecial) != 0 ? kFlagIsSpecial : 0u) | (static_cast<uint16_t>(invoke_type) << kBitSharpTypeBegin) | (is_referrers_class ? kFlagIsReferrersClass : 0u) | (is_class_initialized ? kFlagClassIsInitialized : 0u); diff --git a/compiler/dex/mir_method_info.h b/compiler/dex/mir_method_info.h index e131c96a81..7230c462cd 100644 --- a/compiler/dex/mir_method_info.h +++ b/compiler/dex/mir_method_info.h @@ -127,6 +127,14 @@ class MirMethodLoweringInfo : public MirMethodInfo { return (flags_ & kFlagFastPath) != 0u; } + bool IsIntrinsic() const { + return (flags_ & kFlagIsIntrinsic) != 0u; + } + + bool IsSpecial() const { + return (flags_ & kFlagIsSpecial) != 0u; + } + bool IsReferrersClass() const { return (flags_ & kFlagIsReferrersClass) != 0; } @@ -188,9 +196,11 @@ class MirMethodLoweringInfo : public MirMethodInfo { private: enum { kBitFastPath = kMethodInfoBitEnd, + kBitIsIntrinsic, + kBitIsSpecial, kBitInvokeTypeBegin, kBitInvokeTypeEnd = kBitInvokeTypeBegin + 3, // 3 bits for invoke type. - kBitSharpTypeBegin, + kBitSharpTypeBegin = kBitInvokeTypeEnd, kBitSharpTypeEnd = kBitSharpTypeBegin + 3, // 3 bits for sharp type. kBitIsReferrersClass = kBitSharpTypeEnd, kBitClassIsInitialized, @@ -199,6 +209,8 @@ class MirMethodLoweringInfo : public MirMethodInfo { }; static_assert(kMethodLoweringInfoBitEnd <= 16, "Too many flags"); static constexpr uint16_t kFlagFastPath = 1u << kBitFastPath; + static constexpr uint16_t kFlagIsIntrinsic = 1u << kBitIsIntrinsic; + static constexpr uint16_t kFlagIsSpecial = 1u << kBitIsSpecial; static constexpr uint16_t kFlagIsReferrersClass = 1u << kBitIsReferrersClass; static constexpr uint16_t kFlagClassIsInitialized = 1u << kBitClassIsInitialized; static constexpr uint16_t kFlagQuickened = 1u << kBitQuickened; diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index c85c3b6f21..9d7b4b4dfd 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -318,9 +318,11 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide) // Since VR temps cannot be requested once the BE temps are requested, we // allow reservation of VR temps as well for BE. We size_t available_temps = reserved_temps_for_backend_ + GetNumAvailableVRTemps(); - if (available_temps <= 0 || (available_temps <= 1 && wide)) { + size_t needed_temps = wide ? 2u : 1u; + if (available_temps < needed_temps) { if (verbose) { - LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str << " are available."; + LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str + << " are available."; } return nullptr; } @@ -328,12 +330,8 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide) // Update the remaining reserved temps since we have now used them. // Note that the code below is actually subtracting to remove them from reserve // once they have been claimed. It is careful to not go below zero. - if (reserved_temps_for_backend_ >= 1) { - reserved_temps_for_backend_--; - } - if (wide && reserved_temps_for_backend_ >= 1) { - reserved_temps_for_backend_--; - } + reserved_temps_for_backend_ = + std::max(reserved_temps_for_backend_, needed_temps) - needed_temps; // The new non-special compiler temp must receive a unique v_reg. compiler_temp->v_reg = GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_; @@ -407,6 +405,36 @@ CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide) return compiler_temp; } +void MIRGraph::RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp) { + // Once the compiler temps have been committed, it's too late for any modifications. + DCHECK_EQ(compiler_temps_committed_, false); + + size_t used_temps = wide ? 2u : 1u; + + if (ct_type == kCompilerTempBackend) { + DCHECK(requested_backend_temp_); + + // Make the temps available to backend again. + reserved_temps_for_backend_ += used_temps; + } else if (ct_type == kCompilerTempVR) { + DCHECK(!requested_backend_temp_); + } else { + UNIMPLEMENTED(FATAL) << "No handling for compiler temp type " << static_cast<int>(ct_type); + } + + // Reduce the number of non-special compiler temps. + DCHECK_LE(used_temps, num_non_special_compiler_temps_); + num_non_special_compiler_temps_ -= used_temps; + + // Check that this was really the last temp. + DCHECK_EQ(static_cast<size_t>(temp->v_reg), + GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_); + + if (cu_->verbose) { + LOG(INFO) << "Last temporary has been removed."; + } +} + static bool EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) { bool is_taken; switch (opcode) { @@ -1489,7 +1517,7 @@ void MIRGraph::InlineSpecialMethods(BasicBlock* bb) { continue; } const MirMethodLoweringInfo& method_info = GetMethodLoweringInfo(mir); - if (!method_info.FastPath()) { + if (!method_info.FastPath() || !method_info.IsSpecial()) { continue; } @@ -1631,10 +1659,6 @@ bool MIRGraph::EliminateSuspendChecksGate() { !HasInvokes()) { // No invokes to actually eliminate any suspend checks. return false; } - if (cu_->compiler_driver != nullptr && cu_->compiler_driver->GetMethodInlinerMap() != nullptr) { - temp_.sce.inliner = - cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file); - } suspend_checks_in_loops_ = arena_->AllocArray<uint32_t>(GetNumBlocks(), kArenaAllocMisc); return true; } @@ -1652,9 +1676,9 @@ bool MIRGraph::EliminateSuspendChecks(BasicBlock* bb) { uint32_t suspend_checks_in_loops = (1u << bb->nesting_depth) - 1u; // Start with all loop heads. bool found_invoke = false; for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) { - if (IsInstructionInvoke(mir->dalvikInsn.opcode) && - (temp_.sce.inliner == nullptr || - !temp_.sce.inliner->IsIntrinsic(mir->dalvikInsn.vB, nullptr))) { + if ((IsInstructionInvoke(mir->dalvikInsn.opcode) || + IsInstructionQuickInvoke(mir->dalvikInsn.opcode)) && + !GetMethodLoweringInfo(mir).IsIntrinsic()) { // Non-intrinsic invoke, rely on a suspend point in the invoked method. found_invoke = true; break; @@ -1717,10 +1741,6 @@ bool MIRGraph::EliminateSuspendChecks(BasicBlock* bb) { return true; } -void MIRGraph::EliminateSuspendChecksEnd() { - temp_.sce.inliner = nullptr; -} - bool MIRGraph::CanThrow(MIR* mir) const { if ((mir->dalvikInsn.FlagsOf() & Instruction::kThrow) == 0) { return false; diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc index 9ce5ebbc1b..10a4337cf5 100644 --- a/compiler/dex/mir_optimization_test.cc +++ b/compiler/dex/mir_optimization_test.cc @@ -474,7 +474,6 @@ class SuspendCheckEliminationTest : public MirOptimizationTest { for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) { change = cu_.mir_graph->EliminateSuspendChecks(bb); } - cu_.mir_graph->EliminateSuspendChecksEnd(); } SuspendCheckEliminationTest() diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index e6158c3200..3d18af6169 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -29,6 +29,7 @@ #include "mirror/object_array-inl.h" #include "entrypoints/quick/quick_entrypoints.h" #include "utils.h" +#include "utils/dex_cache_arrays_layout-inl.h" namespace art { @@ -354,7 +355,16 @@ void ArmMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) { FreeTemp(reg_card_no); } +static dwarf::Reg DwarfCoreReg(int num) { + return dwarf::Reg::ArmCore(num); +} + +static dwarf::Reg DwarfFpReg(int num) { + return dwarf::Reg::ArmFp(num); +} + void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); // empty stack. int spill_count = num_core_spills_ + num_fp_spills_; /* * On entry, r0, r1, r2 & r3 are live. Let the register allocation @@ -402,28 +412,32 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { } } /* Spill core callee saves */ - if (core_spill_mask_ == 0u) { - // Nothing to spill. - } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) { - // Spilling only low regs and/or LR, use 16-bit PUSH. - constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8; - NewLIR1(kThumbPush, - (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) | - ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift)); - } else if (IsPowerOfTwo(core_spill_mask_)) { - // kThumb2Push cannot be used to spill a single register. - NewLIR1(kThumb2Push1, CTZ(core_spill_mask_)); - } else { - NewLIR1(kThumb2Push, core_spill_mask_); + if (core_spill_mask_ != 0u) { + if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) { + // Spilling only low regs and/or LR, use 16-bit PUSH. + constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8; + NewLIR1(kThumbPush, + (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) | + ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift)); + } else if (IsPowerOfTwo(core_spill_mask_)) { + // kThumb2Push cannot be used to spill a single register. + NewLIR1(kThumb2Push1, CTZ(core_spill_mask_)); + } else { + NewLIR1(kThumb2Push, core_spill_mask_); + } + cfi_.AdjustCFAOffset(num_core_spills_ * kArmPointerSize); + cfi_.RelOffsetForMany(DwarfCoreReg(0), 0, core_spill_mask_, kArmPointerSize); } /* Need to spill any FP regs? */ - if (num_fp_spills_) { + if (num_fp_spills_ != 0u) { /* * NOTE: fp spills are a little different from core spills in that * they are pushed as a contiguous block. When promoting from * the fp set, we must allocate all singles from s16..highest-promoted */ NewLIR1(kThumb2VPushCS, num_fp_spills_); + cfi_.AdjustCFAOffset(num_fp_spills_ * kArmPointerSize); + cfi_.RelOffsetForMany(DwarfFpReg(0), 0, fp_spill_mask_, kArmPointerSize); } const int spill_size = spill_count * 4; @@ -444,12 +458,14 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR); } m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_); + m2l_->cfi().AdjustCFAOffset(-sp_displace_); m2l_->ClobberCallerSave(); ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow); // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes // codegen and target are in thumb2 mode. // NOTE: native pointer. m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC); + m2l_->cfi().AdjustCFAOffset(sp_displace_); } private: @@ -464,6 +480,7 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { // Need to restore LR since we used it as a temp. AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size)); OpRegCopy(rs_rARM_SP, rs_rARM_LR); // Establish stack + cfi_.AdjustCFAOffset(frame_size_without_spills); } else { /* * If the frame is small enough we are guaranteed to have enough space that remains to @@ -474,6 +491,7 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { MarkTemp(rs_rARM_LR); FreeTemp(rs_rARM_LR); OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills); + cfi_.AdjustCFAOffset(frame_size_without_spills); Clobber(rs_rARM_LR); UnmarkTemp(rs_rARM_LR); LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr); @@ -483,13 +501,23 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { // Implicit stack overflow check has already been done. Just make room on the // stack for the frame now. OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); + cfi_.AdjustCFAOffset(frame_size_without_spills); } } else { OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills); + cfi_.AdjustCFAOffset(frame_size_without_spills); } FlushIns(ArgLocs, rl_method); + // We can promote a PC-relative reference to dex cache arrays to a register + // if it's used at least twice. Without investigating where we should lazily + // load the reference, we conveniently load it after flushing inputs. + if (dex_cache_arrays_base_reg_.Valid()) { + OpPcRelDexCacheArrayAddr(cu_->dex_file, dex_cache_arrays_min_offset_, + dex_cache_arrays_base_reg_); + } + FreeTemp(rs_r0); FreeTemp(rs_r1); FreeTemp(rs_r2); @@ -498,7 +526,9 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { } void ArmMir2Lir::GenExitSequence() { + cfi_.RememberState(); int spill_count = num_core_spills_ + num_fp_spills_; + /* * In the exit path, r0/r1 are live - make sure they aren't * allocated by the register utilities as temps. @@ -506,34 +536,47 @@ void ArmMir2Lir::GenExitSequence() { LockTemp(rs_r0); LockTemp(rs_r1); - OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4)); + int adjust = frame_size_ - (spill_count * kArmPointerSize); + OpRegImm(kOpAdd, rs_rARM_SP, adjust); + cfi_.AdjustCFAOffset(-adjust); /* Need to restore any FP callee saves? */ if (num_fp_spills_) { NewLIR1(kThumb2VPopCS, num_fp_spills_); + cfi_.AdjustCFAOffset(-num_fp_spills_ * kArmPointerSize); + cfi_.RestoreMany(DwarfFpReg(0), fp_spill_mask_); } - if ((core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0) { - /* Unspill rARM_LR to rARM_PC */ + bool unspill_LR_to_PC = (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0; + if (unspill_LR_to_PC) { core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum()); core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum()); } - if (core_spill_mask_ == 0u) { - // Nothing to unspill. - } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) { - // Unspilling only low regs and/or PC, use 16-bit POP. - constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8; - NewLIR1(kThumbPop, - (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) | - ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift)); - } else if (IsPowerOfTwo(core_spill_mask_)) { - // kThumb2Pop cannot be used to unspill a single register. - NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_)); - } else { - NewLIR1(kThumb2Pop, core_spill_mask_); + if (core_spill_mask_ != 0u) { + if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) { + // Unspilling only low regs and/or PC, use 16-bit POP. + constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8; + NewLIR1(kThumbPop, + (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) | + ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift)); + } else if (IsPowerOfTwo(core_spill_mask_)) { + // kThumb2Pop cannot be used to unspill a single register. + NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_)); + } else { + NewLIR1(kThumb2Pop, core_spill_mask_); + } + // If we pop to PC, there is no further epilogue code. + if (!unspill_LR_to_PC) { + cfi_.AdjustCFAOffset(-num_core_spills_ * kArmPointerSize); + cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_); + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); // empty stack. + } } - if ((core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum())) == 0) { + if (!unspill_LR_to_PC) { /* We didn't pop to rARM_PC, so must do a bv rARM_LR */ NewLIR1(kThumbBx, rs_rARM_LR.GetReg()); } + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size_); } void ArmMir2Lir::GenSpecialExitSequence() { @@ -555,11 +598,16 @@ void ArmMir2Lir::GenSpecialEntryForSuspend() { NewLIR1(kThumbPush, (1u << rs_r0.GetRegNum()) | // ArtMethod* (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) | // Spills other than LR. (1u << 8)); // LR encoded for 16-bit push. + cfi_.AdjustCFAOffset(frame_size_); + // Do not generate CFI for scratch register r0. + cfi_.RelOffsetForMany(DwarfCoreReg(0), 4, core_spill_mask_, kArmPointerSize); } void ArmMir2Lir::GenSpecialExitForSuspend() { // Pop the frame. (ArtMethod* no longer needed but restore it anyway.) NewLIR1(kThumb2Pop, (1u << rs_r0.GetRegNum()) | core_spill_mask_); // 32-bit because of LR. + cfi_.AdjustCFAOffset(-frame_size_); + cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_); } static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) { @@ -571,12 +619,12 @@ static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& targe * Bit of a hack here - in the absence of a real scheduling pass, * emit the next instruction in static & direct invoke sequences. */ -static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED, - int state, const MethodReference& target_method, - uint32_t unused_idx ATTRIBUTE_UNUSED, - uintptr_t direct_code, uintptr_t direct_method, - InvokeType type) { - Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); +int ArmMir2Lir::ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED, + int state, const MethodReference& target_method, + uint32_t unused_idx ATTRIBUTE_UNUSED, + uintptr_t direct_code, uintptr_t direct_method, + InvokeType type) { + ArmMir2Lir* cg = static_cast<ArmMir2Lir*>(cu->cg.get()); if (direct_code != 0 && direct_method != 0) { switch (state) { case 0: // Get the current Method* [sets kArg0] @@ -597,17 +645,24 @@ static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSE return -1; } } else { + bool use_pc_rel = cg->CanUseOpPcRelDexCacheArrayLoad(); RegStorage arg0_ref = cg->TargetReg(kArg0, kRef); switch (state) { case 0: // Get the current Method* [sets kArg0] // TUNING: we can save a reg copy if Method* has been promoted. - cg->LoadCurrMethodDirect(arg0_ref); - break; + if (!use_pc_rel) { + cg->LoadCurrMethodDirect(arg0_ref); + break; + } + ++state; + FALLTHROUGH_INTENDED; case 1: // Get method->dex_cache_resolved_methods_ - cg->LoadRefDisp(arg0_ref, - mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), - arg0_ref, - kNotVolatile); + if (!use_pc_rel) { + cg->LoadRefDisp(arg0_ref, + mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), + arg0_ref, + kNotVolatile); + } // Set up direct code if known. if (direct_code != 0) { if (direct_code != static_cast<uintptr_t>(-1)) { @@ -619,14 +674,23 @@ static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSE cg->LoadCodeAddress(target_method, type, kInvokeTgt); } } - break; + if (!use_pc_rel || direct_code != 0) { + break; + } + ++state; + FALLTHROUGH_INTENDED; case 2: // Grab target method* CHECK_EQ(cu->dex_file, target_method.dex_file); - cg->LoadRefDisp(arg0_ref, - mirror::ObjectArray<mirror::Object>::OffsetOfElement( - target_method.dex_method_index).Int32Value(), - arg0_ref, - kNotVolatile); + if (!use_pc_rel) { + cg->LoadRefDisp(arg0_ref, + mirror::ObjectArray<mirror::Object>::OffsetOfElement( + target_method.dex_method_index).Int32Value(), + arg0_ref, + kNotVolatile); + } else { + size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index); + cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, arg0_ref); + } break; case 3: // Grab the code from the method* if (direct_code == 0) { diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 4141bcfe98..83b27df939 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -82,6 +82,9 @@ class ArmMir2Lir FINAL : public Mir2Lir { /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage) void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE; + bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE; + void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) OVERRIDE; + // Required for target - register utilities. RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE; RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) OVERRIDE { @@ -257,6 +260,9 @@ class ArmMir2Lir FINAL : public Mir2Lir { */ LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE; + void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) OVERRIDE; + void DoPromotion() OVERRIDE; + /* * @brief Handle ARM specific literals. */ @@ -300,6 +306,13 @@ class ArmMir2Lir FINAL : public Mir2Lir { ArenaVector<LIR*> call_method_insns_; + // Instructions needing patching with PC relative code addresses. + ArenaVector<LIR*> dex_cache_access_insns_; + + // Register with a reference to the dex cache arrays at dex_cache_arrays_min_offset_, + // if promoted. + RegStorage dex_cache_arrays_base_reg_; + /** * @brief Given float register pair, returns Solo64 float register. * @param reg #RegStorage containing a float register pair (e.g. @c s2 and @c s3). @@ -329,6 +342,14 @@ class ArmMir2Lir FINAL : public Mir2Lir { } int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE; + + static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED, + int state, const MethodReference& target_method, + uint32_t unused_idx ATTRIBUTE_UNUSED, + uintptr_t direct_code, uintptr_t direct_method, + InvokeType type); + + void OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest); }; } // namespace art diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 9193e1b23c..47669db979 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -1087,6 +1087,36 @@ void ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { lir->target = target; } +bool ArmMir2Lir::CanUseOpPcRelDexCacheArrayLoad() const { + return dex_cache_arrays_layout_.Valid(); +} + +void ArmMir2Lir::OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest) { + LIR* movw = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), 0); + LIR* movt = NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), 0); + ArmOpcode add_pc_opcode = (r_dest.GetRegNum() < 8) ? kThumbAddRRLH : kThumbAddRRHH; + LIR* add_pc = NewLIR2(add_pc_opcode, r_dest.GetReg(), rs_rARM_PC.GetReg()); + add_pc->flags.fixup = kFixupLabel; + movw->operands[2] = WrapPointer(dex_file); + movw->operands[3] = offset; + movw->operands[4] = WrapPointer(add_pc); + movt->operands[2] = movw->operands[2]; + movt->operands[3] = movw->operands[3]; + movt->operands[4] = movw->operands[4]; + dex_cache_access_insns_.push_back(movw); + dex_cache_access_insns_.push_back(movt); +} + +void ArmMir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) { + if (dex_cache_arrays_base_reg_.Valid()) { + LoadRefDisp(dex_cache_arrays_base_reg_, offset - dex_cache_arrays_min_offset_, + r_dest, kNotVolatile); + } else { + OpPcRelDexCacheArrayAddr(dex_file, offset, r_dest); + LoadRefDisp(r_dest, 0, r_dest, kNotVolatile); + } +} + LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) { return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count); } diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index 9812d9ff99..5f27338e6b 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -575,7 +575,9 @@ RegisterClass ArmMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatil ArmMir2Lir::ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) : Mir2Lir(cu, mir_graph, arena), - call_method_insns_(arena->Adapter()) { + call_method_insns_(arena->Adapter()), + dex_cache_access_insns_(arena->Adapter()), + dex_cache_arrays_base_reg_(RegStorage::InvalidReg()) { call_method_insns_.reserve(100); // Sanity check - make sure encoding map lines up. for (int i = 0; i < kArmLast; i++) { @@ -901,14 +903,28 @@ RegStorage ArmMir2Lir::AllocPreservedSingle(int s_reg) { } void ArmMir2Lir::InstallLiteralPools() { + patches_.reserve(call_method_insns_.size() + dex_cache_access_insns_.size()); + // PC-relative calls to methods. - patches_.reserve(call_method_insns_.size()); for (LIR* p : call_method_insns_) { - DCHECK_EQ(p->opcode, kThumb2Bl); - uint32_t target_method_idx = p->operands[1]; - const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]); - patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset, - target_dex_file, target_method_idx)); + DCHECK_EQ(p->opcode, kThumb2Bl); + uint32_t target_method_idx = p->operands[1]; + const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]); + patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset, + target_dex_file, target_method_idx)); + } + + // PC-relative dex cache array accesses. + for (LIR* p : dex_cache_access_insns_) { + DCHECK(p->opcode = kThumb2MovImm16 || p->opcode == kThumb2MovImm16H); + const LIR* add_pc = UnwrapPointer<LIR>(p->operands[4]); + DCHECK(add_pc->opcode == kThumbAddRRLH || add_pc->opcode == kThumbAddRRHH); + const DexFile* dex_file = UnwrapPointer<DexFile>(p->operands[2]); + uint32_t offset = p->operands[3]; + DCHECK(!p->flags.is_nop); + DCHECK(!add_pc->flags.is_nop); + patches_.push_back(LinkerPatch::DexCacheArrayPatch(p->offset, + dex_file, add_pc->offset, offset)); } // And do the normal processing. diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index e4bd2a33ae..c3371cf329 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -19,6 +19,7 @@ #include "arch/arm/instruction_set_features_arm.h" #include "arm_lir.h" #include "base/logging.h" +#include "dex/mir_graph.h" #include "dex/quick/mir_to_lir-inl.h" #include "dex/reg_storage_eq.h" #include "driver/compiler_driver.h" @@ -1266,4 +1267,38 @@ size_t ArmMir2Lir::GetInstructionOffset(LIR* lir) { return offset; } +void ArmMir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) { + // Start with the default counts. + Mir2Lir::CountRefs(core_counts, fp_counts, num_regs); + + if (pc_rel_temp_ != nullptr) { + // Now, if the dex cache array base temp is used only once outside any loops (weight = 1), + // avoid the promotion, otherwise boost the weight by factor 4 because the full PC-relative + // load sequence is 4 instructions long. + int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low); + if (core_counts[p_map_idx].count == 1) { + core_counts[p_map_idx].count = 0; + } else { + core_counts[p_map_idx].count *= 4; + } + } +} + +void ArmMir2Lir::DoPromotion() { + if (CanUseOpPcRelDexCacheArrayLoad()) { + pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false); + } + + Mir2Lir::DoPromotion(); + + if (pc_rel_temp_ != nullptr) { + // Now, if the dex cache array base temp is promoted, remember the register but + // always remove the temp's stack location to avoid unnecessarily bloating the stack. + dex_cache_arrays_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg; + DCHECK(!dex_cache_arrays_base_reg_.Valid() || !dex_cache_arrays_base_reg_.IsFloat()); + mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_); + pc_rel_temp_ = nullptr; + } +} + } // namespace art diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 6b47bba884..4abbd77d88 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -282,7 +282,13 @@ void Arm64Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) { FreeTemp(reg_card_no); } +static dwarf::Reg DwarfCoreReg(int num) { + return dwarf::Reg::Arm64Core(num); +} + void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); // empty stack. + /* * On entry, x0 to x7 are live. Let the register allocation * mechanism know so it doesn't try to use any of them when @@ -345,6 +351,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) if (spilled_already != frame_size_) { OpRegImm(kOpSub, rs_sp, frame_size_without_spills); + cfi_.AdjustCFAOffset(frame_size_without_spills); } if (!skip_overflow_check) { @@ -361,12 +368,14 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) GenerateTargetLabel(kPseudoThrowTarget); // Unwinds stack. m2l_->OpRegImm(kOpAdd, rs_sp, sp_displace_); + m2l_->cfi().AdjustCFAOffset(-sp_displace_); m2l_->ClobberCallerSave(); ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow); m2l_->LockTemp(rs_xIP0); m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_xIP0); m2l_->NewLIR1(kA64Br1x, rs_xIP0.GetReg()); m2l_->FreeTemp(rs_xIP0); + m2l_->cfi().AdjustCFAOffset(sp_displace_); } private: @@ -393,6 +402,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) } void Arm64Mir2Lir::GenExitSequence() { + cfi_.RememberState(); /* * In the exit path, r0/r1 are live - make sure they aren't * allocated by the register utilities as temps. @@ -403,6 +413,9 @@ void Arm64Mir2Lir::GenExitSequence() { // Finally return. NewLIR0(kA64Ret); + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size_); } void Arm64Mir2Lir::GenSpecialExitSequence() { @@ -419,11 +432,16 @@ void Arm64Mir2Lir::GenSpecialEntryForSuspend() { core_vmap_table_.clear(); fp_vmap_table_.clear(); NewLIR4(WIDE(kA64StpPre4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), -frame_size_ / 8); + cfi_.AdjustCFAOffset(frame_size_); + // Do not generate CFI for scratch register x0. + cfi_.RelOffset(DwarfCoreReg(rxLR), 8); } void Arm64Mir2Lir::GenSpecialExitForSuspend() { // Pop the frame. (ArtMethod* no longer needed but restore it anyway.) NewLIR4(WIDE(kA64LdpPost4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), frame_size_ / 8); + cfi_.AdjustCFAOffset(-frame_size_); + cfi_.Restore(DwarfCoreReg(rxLR)); } static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) { diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index a9d9f3d463..20f61f2261 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -1458,6 +1458,14 @@ static uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) { return reg_mask; } +static dwarf::Reg DwarfCoreReg(int num) { + return dwarf::Reg::Arm64Core(num); +} + +static dwarf::Reg DwarfFpReg(int num) { + return dwarf::Reg::Arm64Fp(num); +} + static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) { int reg1 = -1, reg2 = -1; const int reg_log2_size = 3; @@ -1466,9 +1474,12 @@ static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32 reg_mask = GenPairWise(reg_mask, & reg1, & reg2); if (UNLIKELY(reg2 < 0)) { m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), offset << reg_log2_size); } else { m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().RelOffset(DwarfCoreReg(reg2), offset << reg_log2_size); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), (offset + 1) << reg_log2_size); } } } @@ -1483,9 +1494,12 @@ static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t if (UNLIKELY(reg2 < 0)) { m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().RelOffset(DwarfFpReg(reg1), offset << reg_log2_size); } else { m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().RelOffset(DwarfFpReg(reg2), offset << reg_log2_size); + m2l->cfi().RelOffset(DwarfFpReg(reg1), (offset + 1) << reg_log2_size); } } } @@ -1493,6 +1507,7 @@ static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t static int SpillRegsPreSub(Arm64Mir2Lir* m2l, uint32_t core_reg_mask, uint32_t fp_reg_mask, int frame_size) { m2l->OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size); + m2l->cfi().AdjustCFAOffset(frame_size); int core_count = POPCOUNT(core_reg_mask); @@ -1552,11 +1567,15 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core RegStorage::FloatSolo64(reg1).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize); } else { m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg1).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfFpReg(reg1), 0); cur_offset = 0; // That core reg needs to go into the upper half. } } else { @@ -1564,10 +1583,15 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core fp_reg_mask = GenPairWise(fp_reg_mask, ®1, ®2); m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfFpReg(reg2), 0); + m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize); } else { fp_reg_mask = ExtractReg(fp_reg_mask, ®1); m2l->NewLIR4(WIDE(kA64StpPre4ffXD), rs_d0.GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize); } } } else { @@ -1580,12 +1604,19 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core core_reg_mask = ExtractReg(core_reg_mask, ®1); m2l->NewLIR4(WIDE(kA64StpPre4rrXD), rs_xzr.GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize); } else { core_reg_mask = GenPairWise(core_reg_mask, ®1, ®2); m2l->NewLIR4(WIDE(kA64StpPre4rrXD), RegStorage::Solo64(reg2).GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset); + m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfCoreReg(reg2), 0); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize); } } + DCHECK_EQ(m2l->cfi().GetCurrentCFAOffset(), + static_cast<int>(all_offset * kArm64PointerSize)); if (fp_count != 0) { for (; fp_reg_mask != 0;) { @@ -1594,10 +1625,13 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core if (UNLIKELY(reg2 < 0)) { m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset); + m2l->cfi().RelOffset(DwarfFpReg(reg1), cur_offset * kArm64PointerSize); // Do not increment offset here, as the second half will be filled by a core reg. } else { m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset); + m2l->cfi().RelOffset(DwarfFpReg(reg2), cur_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfFpReg(reg1), (cur_offset + 1) * kArm64PointerSize); cur_offset += 2; } } @@ -1610,6 +1644,7 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core core_reg_mask = ExtractReg(core_reg_mask, ®1); m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset + 1); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize); cur_offset += 2; // Half-slot filled now. } } @@ -1620,6 +1655,8 @@ static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core core_reg_mask = GenPairWise(core_reg_mask, ®1, ®2); m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset); + m2l->cfi().RelOffset(DwarfCoreReg(reg2), cur_offset * kArm64PointerSize); + m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize); } DCHECK_EQ(cur_offset, all_offset); @@ -1650,10 +1687,13 @@ static void UnSpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint reg_mask = GenPairWise(reg_mask, & reg1, & reg2); if (UNLIKELY(reg2 < 0)) { m2l->NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().Restore(DwarfCoreReg(reg1)); } else { DCHECK_LE(offset, 63); m2l->NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().Restore(DwarfCoreReg(reg2)); + m2l->cfi().Restore(DwarfCoreReg(reg1)); } } } @@ -1667,9 +1707,12 @@ static void UnSpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32 if (UNLIKELY(reg2 < 0)) { m2l->NewLIR3(WIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().Restore(DwarfFpReg(reg1)); } else { m2l->NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset); + m2l->cfi().Restore(DwarfFpReg(reg2)); + m2l->cfi().Restore(DwarfFpReg(reg1)); } } } @@ -1711,6 +1754,7 @@ void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t early_drop = RoundDown(early_drop, 16); OpRegImm64(kOpAdd, rs_sp, early_drop); + cfi_.AdjustCFAOffset(-early_drop); } // Unspill. @@ -1724,7 +1768,9 @@ void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t } // Drop the (rest of) the frame. - OpRegImm64(kOpAdd, rs_sp, frame_size - early_drop); + int adjust = frame_size - early_drop; + OpRegImm64(kOpAdd, rs_sp, adjust); + cfi_.AdjustCFAOffset(-adjust); } bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) { diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index f944c11931..ff5f735255 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -1070,6 +1070,11 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena mask_cache_(arena), safepoints_(arena->Adapter()), dex_cache_arrays_layout_(cu->compiler_driver->GetDexCacheArraysLayout(cu->dex_file)), + pc_rel_temp_(nullptr), + dex_cache_arrays_min_offset_(std::numeric_limits<uint32_t>::max()), + cfi_(&last_lir_insn_, + cu->compiler_driver->GetCompilerOptions().GetGenerateGDBInformation(), + arena), in_to_reg_storage_mapping_(arena) { switch_tables_.reserve(4); fill_array_data_.reserve(4); @@ -1154,14 +1159,6 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { return lhs.LiteralOffset() < rhs.LiteralOffset(); }); - std::unique_ptr<std::vector<uint8_t>> cfi_info( - cu_->compiler_driver->GetCompilerOptions().GetGenerateGDBInformation() ? - ReturnFrameDescriptionEntry() : - nullptr); - ArrayRef<const uint8_t> cfi_ref; - if (cfi_info.get() != nullptr) { - cfi_ref = ArrayRef<const uint8_t>(*cfi_info); - } return CompiledMethod::SwapAllocCompiledMethod( cu_->compiler_driver, cu_->instruction_set, ArrayRef<const uint8_t>(code_buffer_), @@ -1170,7 +1167,7 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { ArrayRef<const uint8_t>(encoded_mapping_table_), ArrayRef<const uint8_t>(vmap_encoder.GetData()), ArrayRef<const uint8_t>(native_gc_map_), - cfi_ref, + ArrayRef<const uint8_t>(*cfi_.Patch(code_buffer_.size())), ArrayRef<const LinkerPatch>(patches_)); } @@ -1332,11 +1329,6 @@ void Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file ATTRIBUTE_UNUSED, UNREACHABLE(); } -std::vector<uint8_t>* Mir2Lir::ReturnFrameDescriptionEntry() { - // Default case is to do nothing. - return nullptr; -} - RegLocation Mir2Lir::NarrowRegLoc(RegLocation loc) { if (loc.location == kLocPhysReg) { DCHECK(!loc.reg.Is32Bit()); diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index 8e3f4ef726..4ac6c0c5b5 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -413,6 +413,17 @@ bool DexFileMethodInliner::AnalyseMethodCode(verifier::MethodVerifier* verifier) return success && AddInlineMethod(verifier->GetMethodReference().dex_method_index, method); } +InlineMethodFlags DexFileMethodInliner::IsIntrinsicOrSpecial(uint32_t method_index) { + ReaderMutexLock mu(Thread::Current(), lock_); + auto it = inline_methods_.find(method_index); + if (it != inline_methods_.end()) { + DCHECK_NE(it->second.flags & (kInlineIntrinsic | kInlineSpecial), 0); + return it->second.flags; + } else { + return kNoInlineMethodFlags; + } +} + bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) { ReaderMutexLock mu(Thread::Current(), lock_); auto it = inline_methods_.find(method_index); diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h index cb521da9df..d1e562119c 100644 --- a/compiler/dex/quick/dex_file_method_inliner.h +++ b/compiler/dex/quick/dex_file_method_inliner.h @@ -65,6 +65,11 @@ class DexFileMethodInliner { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_); /** + * Check whether a particular method index corresponds to an intrinsic or special function. + */ + InlineMethodFlags IsIntrinsicOrSpecial(uint32_t method_index) LOCKS_EXCLUDED(lock_); + + /** * Check whether a particular method index corresponds to an intrinsic function. */ bool IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) LOCKS_EXCLUDED(lock_); diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 1813e0939e..b132c4cc54 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -94,6 +94,97 @@ void Mir2Lir::GenIfNullUseHelperImmMethod( r_method, r_result)); } +RegStorage Mir2Lir::GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info, + int opt_flags) { + DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex); + // May do runtime call so everything to home locations. + FlushAllRegs(); + RegStorage r_base = TargetReg(kArg0, kRef); + LockTemp(r_base); + RegStorage r_method = RegStorage::InvalidReg(); // Loaded lazily, maybe in the slow-path. + if (CanUseOpPcRelDexCacheArrayLoad()) { + uint32_t offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex()); + OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, r_base); + } else { + // Using fixed register to sync with possible call to runtime support. + r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef)); + LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base, + kNotVolatile); + int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value(); + LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile); + } + // r_base now points at static storage (Class*) or nullptr if the type is not yet resolved. + LIR* unresolved_branch = nullptr; + if (!field_info.IsClassInDexCache() && (opt_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) { + // Check if r_base is nullptr. + unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, nullptr); + } + LIR* uninit_branch = nullptr; + if (!field_info.IsClassInitialized() && (opt_flags & MIR_CLASS_IS_INITIALIZED) == 0) { + // Check if r_base is not yet initialized class. + RegStorage r_tmp = TargetReg(kArg2, kNotWide); + LockTemp(r_tmp); + uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base, + mirror::Class::StatusOffset().Int32Value(), + mirror::Class::kStatusInitialized, nullptr, nullptr); + FreeTemp(r_tmp); + } + if (unresolved_branch != nullptr || uninit_branch != nullptr) { + // + // Slow path to ensure a class is initialized for sget/sput. + // + class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath { + public: + // There are up to two branches to the static field slow path, the "unresolved" when the type + // entry in the dex cache is nullptr, and the "uninit" when the class is not yet initialized. + // At least one will be non-nullptr here, otherwise we wouldn't generate the slow path. + StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index, + RegStorage r_base_in, RegStorage r_method_in) + : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont), + second_branch_(unresolved != nullptr ? uninit : nullptr), + storage_index_(storage_index), r_base_(r_base_in), r_method_(r_method_in) { + } + + void Compile() { + LIR* target = GenerateTargetLabel(); + if (second_branch_ != nullptr) { + second_branch_->target = target; + } + if (r_method_.Valid()) { + // ArtMethod* was loaded in normal path - use it. + m2l_->CallRuntimeHelperImmReg(kQuickInitializeStaticStorage, storage_index_, r_method_, + true); + } else { + // ArtMethod* wasn't loaded in normal path - use a helper that loads it. + m2l_->CallRuntimeHelperImmMethod(kQuickInitializeStaticStorage, storage_index_, true); + } + // Copy helper's result into r_base, a no-op on all but MIPS. + m2l_->OpRegCopy(r_base_, m2l_->TargetReg(kRet0, kRef)); + + m2l_->OpUnconditionalBranch(cont_); + } + + private: + // Second branch to the slow path, or nullptr if there's only one branch. + LIR* const second_branch_; + + const int storage_index_; + const RegStorage r_base_; + RegStorage r_method_; + }; + + // The slow path is invoked if the r_base is nullptr or the class pointed + // to by it is not initialized. + LIR* cont = NewLIR0(kPseudoTargetLabel); + AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont, + field_info.StorageIndex(), r_base, r_method)); + } + if (IsTemp(r_method)) { + FreeTemp(r_method); + } + return r_base; +} + /* * Generate a kPseudoBarrier marker to indicate the boundary of special * blocks. @@ -609,41 +700,6 @@ void Mir2Lir::GenFillArrayData(MIR* mir, DexOffset table_offset, RegLocation rl_ CallRuntimeHelperImmRegLocation(kQuickHandleFillArrayData, table_offset_from_start, rl_src, true); } -// -// Slow path to ensure a class is initialized for sget/sput. -// -class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath { - public: - // There are up to two branches to the static field slow path, the "unresolved" when the type - // entry in the dex cache is null, and the "uninit" when the class is not yet initialized. - // At least one will be non-null here, otherwise we wouldn't generate the slow path. - StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index, - RegStorage r_base) - : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont), - second_branch_(unresolved != nullptr ? uninit : nullptr), - storage_index_(storage_index), r_base_(r_base) { - } - - void Compile() { - LIR* target = GenerateTargetLabel(); - if (second_branch_ != nullptr) { - second_branch_->target = target; - } - m2l_->CallRuntimeHelperImm(kQuickInitializeStaticStorage, storage_index_, true); - // Copy helper's result into r_base, a no-op on all but MIPS. - m2l_->OpRegCopy(r_base_, m2l_->TargetReg(kRet0, kRef)); - - m2l_->OpUnconditionalBranch(cont_); - } - - private: - // Second branch to the slow path, or null if there's only one branch. - LIR* const second_branch_; - - const int storage_index_; - const RegStorage r_base_; -}; - void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, OpSize size) { const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir); DCHECK_EQ(SPutMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType()); @@ -653,65 +709,23 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, OpSize size) { RegStorage r_base; if (field_info.IsReferrersClass()) { // Fast path, static storage base is this method's class - RegLocation rl_method = LoadCurrMethod(); r_base = AllocTempRef(); - LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base, + RegStorage r_method = LoadCurrMethodWithHint(r_base); + LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base, kNotVolatile); - if (IsTemp(rl_method.reg)) { - FreeTemp(rl_method.reg); - } } else { // Medium path, static storage base in a different class which requires checks that the other // class is initialized. - // TODO: remove initialized check now that we are initializing classes in the compiler driver. - DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex); - // May do runtime call so everything to home locations. - FlushAllRegs(); - // Using fixed register to sync with possible call to runtime support. - RegStorage r_method = TargetReg(kArg1, kRef); - LockTemp(r_method); - LoadCurrMethodDirect(r_method); - r_base = TargetReg(kArg0, kRef); - LockTemp(r_base); - LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base, - kNotVolatile); - int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value(); - LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile); - // r_base now points at static storage (Class*) or NULL if the type is not yet resolved. - LIR* unresolved_branch = nullptr; - if (!field_info.IsClassInDexCache() && - (mir->optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) { - // Check if r_base is NULL. - unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL); - } - LIR* uninit_branch = nullptr; + r_base = GenGetOtherTypeForSgetSput(field_info, mir->optimization_flags); if (!field_info.IsClassInitialized() && (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0) { - // Check if r_base is not yet initialized class. - RegStorage r_tmp = TargetReg(kArg2, kNotWide); - LockTemp(r_tmp); - uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base, - mirror::Class::StatusOffset().Int32Value(), - mirror::Class::kStatusInitialized, nullptr, nullptr); - FreeTemp(r_tmp); - } - if (unresolved_branch != nullptr || uninit_branch != nullptr) { - // The slow path is invoked if the r_base is NULL or the class pointed - // to by it is not initialized. - LIR* cont = NewLIR0(kPseudoTargetLabel); - AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont, - field_info.StorageIndex(), r_base)); - - if (uninit_branch != nullptr) { - // Ensure load of status and store of value don't re-order. - // TODO: Presumably the actual value store is control-dependent on the status load, - // and will thus not be reordered in any case, since stores are never speculated. - // Does later code "know" that the class is now initialized? If so, we still - // need the barrier to guard later static loads. - GenMemBarrier(kLoadAny); - } + // Ensure load of status and store of value don't re-order. + // TODO: Presumably the actual value store is control-dependent on the status load, + // and will thus not be reordered in any case, since stores are never speculated. + // Does later code "know" that the class is now initialized? If so, we still + // need the barrier to guard later static loads. + GenMemBarrier(kLoadAny); } - FreeTemp(r_method); } // rBase now holds static storage base RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile()); @@ -773,57 +787,19 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, OpSize size, Primitive::Typ RegStorage r_base; if (field_info.IsReferrersClass()) { // Fast path, static storage base is this method's class - RegLocation rl_method = LoadCurrMethod(); r_base = AllocTempRef(); - LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base, + RegStorage r_method = LoadCurrMethodWithHint(r_base); + LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base, kNotVolatile); } else { // Medium path, static storage base in a different class which requires checks that the other // class is initialized - DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex); - // May do runtime call so everything to home locations. - FlushAllRegs(); - // Using fixed register to sync with possible call to runtime support. - RegStorage r_method = TargetReg(kArg1, kRef); - LockTemp(r_method); - LoadCurrMethodDirect(r_method); - r_base = TargetReg(kArg0, kRef); - LockTemp(r_base); - LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base, - kNotVolatile); - int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value(); - LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile); - // r_base now points at static storage (Class*) or NULL if the type is not yet resolved. - LIR* unresolved_branch = nullptr; - if (!field_info.IsClassInDexCache() && - (mir->optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) { - // Check if r_base is NULL. - unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL); - } - LIR* uninit_branch = nullptr; + r_base = GenGetOtherTypeForSgetSput(field_info, mir->optimization_flags); if (!field_info.IsClassInitialized() && (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0) { - // Check if r_base is not yet initialized class. - RegStorage r_tmp = TargetReg(kArg2, kNotWide); - LockTemp(r_tmp); - uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base, - mirror::Class::StatusOffset().Int32Value(), - mirror::Class::kStatusInitialized, nullptr, nullptr); - FreeTemp(r_tmp); - } - if (unresolved_branch != nullptr || uninit_branch != nullptr) { - // The slow path is invoked if the r_base is NULL or the class pointed - // to by it is not initialized. - LIR* cont = NewLIR0(kPseudoTargetLabel); - AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont, - field_info.StorageIndex(), r_base)); - - if (uninit_branch != nullptr) { - // Ensure load of status and load of value don't re-order. - GenMemBarrier(kLoadAny); - } + // Ensure load of status and load of value don't re-order. + GenMemBarrier(kLoadAny); } - FreeTemp(r_method); } // r_base now holds static storage base RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile()); diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index e747239894..db7095dafb 100755 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -1435,10 +1435,12 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long, void Mir2Lir::GenInvoke(CallInfo* info) { DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr); - const DexFile* dex_file = info->method_ref.dex_file; - if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(dex_file) - ->GenIntrinsic(this, info)) { - return; + if (mir_graph_->GetMethodLoweringInfo(info->mir).IsIntrinsic()) { + const DexFile* dex_file = info->method_ref.dex_file; + auto* inliner = cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(dex_file); + if (inliner->GenIntrinsic(this, info)) { + return; + } } GenInvokeNoInline(info); } diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc b/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc new file mode 100644 index 0000000000..03cf4bef8b --- /dev/null +++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "lazy_debug_frame_opcode_writer.h" +#include "mir_to_lir.h" + +namespace art { +namespace dwarf { + +const ArenaVector<uint8_t>* LazyDebugFrameOpCodeWriter::Patch(size_t code_size) { + if (!enable_writes_) { + DCHECK(this->data()->empty()); + return this->data(); + } + if (!patched_) { + patched_ = true; + // Move our data buffer to temporary variable. + ArenaVector<uint8_t> old_opcodes(this->opcodes_.get_allocator()); + old_opcodes.swap(this->opcodes_); + // Refill our data buffer with patched opcodes. + this->opcodes_.reserve(old_opcodes.size() + advances_.size() + 4); + size_t pos = 0; + for (auto advance : advances_) { + DCHECK_GE(advance.pos, pos); + // Copy old data up to the point when advance was issued. + this->opcodes_.insert(this->opcodes_.end(), + old_opcodes.begin() + pos, + old_opcodes.begin() + advance.pos); + pos = advance.pos; + // This may be null if there is no slow-path code after return. + LIR* next_lir = NEXT_LIR(advance.last_lir_insn); + // Insert the advance command with its final offset. + Base::AdvancePC(next_lir != nullptr ? next_lir->offset : code_size); + } + // Copy the final segment. + this->opcodes_.insert(this->opcodes_.end(), + old_opcodes.begin() + pos, + old_opcodes.end()); + Base::AdvancePC(code_size); + } + return this->data(); +} + +} // namespace dwarf +} // namespace art diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.h b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h new file mode 100644 index 0000000000..d71a87d567 --- /dev/null +++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_ +#define ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_ + +#include "base/arena_allocator.h" +#include "base/arena_containers.h" +#include "dwarf/debug_frame_opcode_writer.h" + +namespace art { +struct LIR; +namespace dwarf { + +// When we are generating the CFI code, we do not know the instuction offsets, +// this class stores the LIR references and patches the instruction stream later. +class LazyDebugFrameOpCodeWriter FINAL + : private DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> { + typedef DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> Base; + public: + // This method is implicitely called the by opcode writers. + virtual void ImplicitlyAdvancePC() OVERRIDE { + DCHECK_EQ(patched_, false); + DCHECK_EQ(this->current_pc_, 0); + advances_.push_back({this->data()->size(), *last_lir_insn_}); + } + + // The register was unspilled. + void Restore(Reg reg) { + if (enable_writes_) { + Base::Restore(reg); + } + } + + // Custom alias - unspill many registers based on bitmask. + void RestoreMany(Reg reg_base, uint32_t reg_mask) { + if (enable_writes_) { + Base::RestoreMany(reg_base, reg_mask); + } + } + + // Remember the state of register spills. + void RememberState() { + if (enable_writes_) { + Base::RememberState(); + } + } + + // Restore the state of register spills. + void RestoreState() { + if (enable_writes_) { + Base::RestoreState(); + } + } + + // Set the frame pointer (CFA) to (stack_pointer + offset). + void DefCFAOffset(int offset) { + if (enable_writes_) { + Base::DefCFAOffset(offset); + } + this->current_cfa_offset_ = offset; + } + + // The stack size was increased by given delta. + void AdjustCFAOffset(int delta) { + DefCFAOffset(this->current_cfa_offset_ + delta); + } + + // The register was spilled to (stack_pointer + offset). + void RelOffset(Reg reg, int offset) { + if (enable_writes_) { + Base::RelOffset(reg, offset); + } + } + + // Custom alias - spill many registers based on bitmask. + void RelOffsetForMany(Reg reg_base, int offset, uint32_t reg_mask, int reg_size) { + if (enable_writes_) { + Base::RelOffsetForMany(reg_base, offset, reg_mask, reg_size); + } + } + + using Base::GetCurrentCFAOffset; + using Base::SetCurrentCFAOffset; + using Base::GetCurrentPC; + + const ArenaVector<uint8_t>* Patch(size_t code_size); + + explicit LazyDebugFrameOpCodeWriter(LIR** last_lir_insn, bool enable_writes, + ArenaAllocator* allocator) + : Base(allocator->Adapter()), + last_lir_insn_(last_lir_insn), + enable_writes_(enable_writes), + advances_(allocator->Adapter()), + patched_(false) { + } + + private: + typedef struct { + size_t pos; + LIR* last_lir_insn; + } Advance; + + LIR** last_lir_insn_; + bool enable_writes_; + ArenaVector<Advance> advances_; + bool patched_; + + DISALLOW_COPY_AND_ASSIGN(LazyDebugFrameOpCodeWriter); +}; + +} // namespace dwarf +} // namespace art + +#endif // ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_ diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc index c932df6dc9..7d4f20e335 100644 --- a/compiler/dex/quick/mips/call_mips.cc +++ b/compiler/dex/quick/mips/call_mips.cc @@ -238,7 +238,12 @@ void MipsMir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) { FreeTemp(reg_card_no); } +static dwarf::Reg DwarfCoreReg(int num) { + return dwarf::Reg::MipsCore(num); +} + void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0); int spill_count = num_core_spills_ + num_fp_spills_; /* * On entry, A0, A1, A2 & A3 are live. On Mips64, A4, A5, A6 & A7 are also live. @@ -304,10 +309,12 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) // RA is offset 0 since we push in reverse order. m2l_->LoadWordDisp(m2l_->TargetPtrReg(kSp), 0, m2l_->TargetPtrReg(kLr)); m2l_->OpRegImm(kOpAdd, m2l_->TargetPtrReg(kSp), sp_displace_); + m2l_->cfi().AdjustCFAOffset(-sp_displace_); m2l_->ClobberCallerSave(); RegStorage r_tgt = m2l_->CallHelperSetup(kQuickThrowStackOverflow); // Doesn't clobber LR. m2l_->CallHelper(r_tgt, kQuickThrowStackOverflow, false /* MarkSafepointPC */, false /* UseLink */); + m2l_->cfi().AdjustCFAOffset(sp_displace_); } private: @@ -318,8 +325,10 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_count * ptr_size)); // TODO: avoid copy for small frame sizes. OpRegCopy(rs_sp, new_sp); // Establish stack. + cfi_.AdjustCFAOffset(frame_sub); } else { OpRegImm(kOpSub, rs_sp, frame_sub); + cfi_.AdjustCFAOffset(frame_sub); } FlushIns(ArgLocs, rl_method); @@ -337,6 +346,7 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) } void MipsMir2Lir::GenExitSequence() { + cfi_.RememberState(); /* * In the exit path, rMIPS_RET0/rMIPS_RET1 are live - make sure they aren't * allocated by the register utilities as temps. @@ -346,6 +356,9 @@ void MipsMir2Lir::GenExitSequence() { UnSpillCoreRegs(); OpReg(kOpBx, TargetPtrReg(kLr)); + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size_); } void MipsMir2Lir::GenSpecialExitSequence() { @@ -364,15 +377,20 @@ void MipsMir2Lir::GenSpecialEntryForSuspend() { fp_vmap_table_.clear(); const RegStorage rs_sp = TargetPtrReg(kSp); OpRegImm(kOpSub, rs_sp, frame_size_); + cfi_.AdjustCFAOffset(frame_size_); StoreWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr)); + cfi_.RelOffset(DwarfCoreReg(rRA), frame_size_ - (cu_->target64 ? 8 : 4)); StoreWordDisp(rs_sp, 0, TargetPtrReg(kArg0)); + // Do not generate CFI for scratch register A0. } void MipsMir2Lir::GenSpecialExitForSuspend() { // Pop the frame. Don't pop ArtMethod*, it's no longer needed. const RegStorage rs_sp = TargetPtrReg(kSp); LoadWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr)); + cfi_.Restore(DwarfCoreReg(rRA)); OpRegImm(kOpAdd, rs_sp, frame_size_); + cfi_.AdjustCFAOffset(-frame_size_); } /* diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index a94fad7534..4c0bd8378b 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -830,6 +830,10 @@ LIR* MipsMir2Lir::GenAtomic64Store(RegStorage r_base, int displacement, RegStora return OpReg(kOpBlx, r_tgt); } +static dwarf::Reg DwarfCoreReg(int num) { + return dwarf::Reg::MipsCore(num); +} + void MipsMir2Lir::SpillCoreRegs() { if (num_core_spills_ == 0) { return; @@ -839,11 +843,13 @@ void MipsMir2Lir::SpillCoreRegs() { int offset = num_core_spills_ * ptr_size; const RegStorage rs_sp = TargetPtrReg(kSp); OpRegImm(kOpSub, rs_sp, offset); + cfi_.AdjustCFAOffset(offset); for (int reg = 0; mask; mask >>= 1, reg++) { if (mask & 0x1) { offset -= ptr_size; StoreWordDisp(rs_sp, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg)); + cfi_.RelOffset(DwarfCoreReg(reg), offset); } } } @@ -861,9 +867,11 @@ void MipsMir2Lir::UnSpillCoreRegs() { offset -= ptr_size; LoadWordDisp(rs_sp, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg)); + cfi_.Restore(DwarfCoreReg(reg)); } } OpRegImm(kOpAdd, rs_sp, frame_size_); + cfi_.AdjustCFAOffset(-frame_size_); } bool MipsMir2Lir::IsUnconditionalBranch(LIR* lir) { diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index ed8e21e817..961cd4f06b 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -1253,11 +1253,14 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { AppendLIR(NewLIR0(kPseudoPrologueBegin)); GenEntrySequence(&mir_graph_->reg_location_[start_vreg], mir_graph_->GetMethodLoc()); AppendLIR(NewLIR0(kPseudoPrologueEnd)); + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_); } else if (bb->block_type == kExitBlock) { ResetRegPool(); + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_); AppendLIR(NewLIR0(kPseudoEpilogueBegin)); GenExitSequence(); AppendLIR(NewLIR0(kPseudoEpilogueEnd)); + DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_); } for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index bb8fbae8f6..5995f33e18 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -29,6 +29,7 @@ #include "dex/quick/resource_mask.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "invoke_type.h" +#include "lazy_debug_frame_opcode_writer.h" #include "leb128.h" #include "safe_map.h" #include "utils/array_ref.h" @@ -135,6 +136,7 @@ class BasicBlock; class BitVector; struct CallInfo; struct CompilationUnit; +struct CompilerTemp; struct InlineMethod; class MIR; struct LIR; @@ -142,6 +144,7 @@ struct RegisterInfo; class DexFileMethodInliner; class MIRGraph; class MirMethodLoweringInfo; +class MirSFieldLoweringInfo; typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int, const MethodReference& target_method, @@ -774,9 +777,10 @@ class Mir2Lir { */ virtual RegLocation EvalLoc(RegLocation loc, int reg_class, bool update); - void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs); + void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight); + virtual void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs); void DumpCounts(const RefCounts* arr, int size, const char* msg); - void DoPromotion(); + virtual void DoPromotion(); int VRegOffset(int v_reg); int SRegOffset(int s_reg); RegLocation GetReturnWide(RegisterClass reg_class); @@ -1505,6 +1509,12 @@ class Mir2Lir { return 0; } + /** + * @brief Buffer of DWARF's Call Frame Information opcodes. + * @details It is used by debuggers and other tools to unwind the call stack. + */ + dwarf::LazyDebugFrameOpCodeWriter& cfi() { return cfi_; } + protected: Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); @@ -1570,11 +1580,6 @@ class Mir2Lir { bool can_assume_type_is_in_dex_cache, uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src); - /* - * @brief Generate the eh_frame FDE information if possible. - * @returns pointer to vector containg FDE information, or NULL. - */ - virtual std::vector<uint8_t>* ReturnFrameDescriptionEntry(); /** * @brief Used to insert marker that can be used to associate MIR with LIR. @@ -1692,6 +1697,13 @@ class Mir2Lir { void GenIfNullUseHelperImmMethod( RegStorage r_result, QuickEntrypointEnum trampoline, int imm, RegStorage r_method); + /** + * @brief Generate code to retrieve Class* for another type to be used by SGET/SPUT. + * @param field_info information about the field to be accessed. + * @param opt_flags the optimization flags of the MIR. + */ + RegStorage GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info, int opt_flags); + void AddDivZeroCheckSlowPath(LIR* branch); // Copy arg0 and arg1 to kArg0 and kArg1 safely, possibly using @@ -1765,6 +1777,13 @@ class Mir2Lir { // Update references from prev_mir to mir. void UpdateReferenceVRegs(MIR* mir, MIR* prev_mir, BitVector* references); + /** + * Returns true if the frame spills the given core register. + */ + bool CoreSpillMaskContains(int reg) { + return (core_spill_mask_ & (1u << reg)) != 0; + } + public: // TODO: add accessors for these. LIR* literal_list_; // Constants. @@ -1841,6 +1860,20 @@ class Mir2Lir { // The layout of the cu_->dex_file's dex cache arrays for PC-relative addressing. const DexCacheArraysLayout dex_cache_arrays_layout_; + // For architectures that don't have true PC-relative addressing, we can promote + // a PC of an instruction (or another PC-relative address such as a pointer to + // the dex cache arrays if supported) to a register. This is indicated to the + // register promotion by allocating a backend temp. + CompilerTemp* pc_rel_temp_; + + // For architectures that don't have true PC-relative addressing (see pc_rel_temp_ + // above) and also have a limited range of offsets for loads, it's be useful to + // know the minimum offset into the dex cache arrays, so we calculate that as well + // if pc_rel_temp_ isn't nullptr. + uint32_t dex_cache_arrays_min_offset_; + + dwarf::LazyDebugFrameOpCodeWriter cfi_; + // ABI support class ShortyArg { public: @@ -1900,6 +1933,8 @@ class Mir2Lir { private: static bool SizeMatchesTypeForEntrypoint(OpSize size, Primitive::Type type); + + friend class QuickCFITest; }; // Class Mir2Lir } // namespace art diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc new file mode 100644 index 0000000000..0540a8c962 --- /dev/null +++ b/compiler/dex/quick/quick_cfi_test.cc @@ -0,0 +1,139 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vector> +#include <memory> + +#include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" +#include "cfi_test.h" +#include "dex/compiler_ir.h" +#include "dex/mir_graph.h" +#include "dex/pass_manager.h" +#include "dex/quick/dex_file_to_method_inliner_map.h" +#include "dex/quick/quick_compiler.h" +#include "dex/quick/mir_to_lir.h" +#include "dex/verification_results.h" +#include "driver/compiler_driver.h" +#include "driver/compiler_options.h" +#include "gtest/gtest.h" + +#include "dex/quick/quick_cfi_test_expected.inc" + +namespace art { + +// Run the tests only on host. +#ifndef HAVE_ANDROID_OS + +class QuickCFITest : public CFITest { + public: + // Enable this flag to generate the expected outputs. + static constexpr bool kGenerateExpected = false; + + void TestImpl(InstructionSet isa, const char* isa_str, + const std::vector<uint8_t>& expected_asm, + const std::vector<uint8_t>& expected_cfi) { + // Setup simple compiler context. + ArenaPool pool; + ArenaAllocator arena(&pool); + CompilerOptions compiler_options( + CompilerOptions::kDefaultCompilerFilter, + CompilerOptions::kDefaultHugeMethodThreshold, + CompilerOptions::kDefaultLargeMethodThreshold, + CompilerOptions::kDefaultSmallMethodThreshold, + CompilerOptions::kDefaultTinyMethodThreshold, + CompilerOptions::kDefaultNumDexMethodsThreshold, + true, // generate_gdb_information. + false, + CompilerOptions::kDefaultTopKProfileThreshold, + false, + true, // include_debug_symbols. + false, + false, + false, + false, + nullptr, + new PassManagerOptions(), + nullptr, + false); + VerificationResults verification_results(&compiler_options); + DexFileToMethodInlinerMap method_inliner_map; + std::unique_ptr<const InstructionSetFeatures> isa_features; + std::string error; + isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); + CompilerDriver driver(&compiler_options, &verification_results, &method_inliner_map, + Compiler::kQuick, isa, isa_features.get(), + false, 0, 0, 0, false, false, "", 0, -1, ""); + ClassLinker* linker = nullptr; + CompilationUnit cu(&pool, isa, &driver, linker); + DexFile::CodeItem code_item { 0, 0, 0, 0, 0, 0, { 0 } }; // NOLINT + cu.mir_graph.reset(new MIRGraph(&cu, &arena)); + cu.mir_graph->current_code_item_ = &code_item; + + // Generate empty method with some spills. + Mir2Lir* m2l = QuickCompiler::GetCodeGenerator(&cu, NULL); + m2l->frame_size_ = 64u; + m2l->CompilerInitializeRegAlloc(); + for (const auto& info : m2l->reg_pool_->core_regs_) { + if (m2l->num_core_spills_ < 2 && !info->IsTemp() && !info->InUse()) { + m2l->core_spill_mask_ |= 1 << info->GetReg().GetReg(); + m2l->num_core_spills_++; + } + } + for (const auto& info : m2l->reg_pool_->sp_regs_) { + if (m2l->num_fp_spills_ < 2 && !info->IsTemp() && !info->InUse()) { + m2l->fp_spill_mask_ |= 1 << info->GetReg().GetReg(); + m2l->num_fp_spills_++; + } + } + m2l->AdjustSpillMask(); + m2l->GenEntrySequence(NULL, m2l->LocCReturnRef()); + m2l->GenExitSequence(); + m2l->HandleSlowPaths(); + m2l->AssembleLIR(); + std::vector<uint8_t> actual_asm(m2l->code_buffer_.begin(), m2l->code_buffer_.end()); + auto const& cfi_data = m2l->cfi().Patch(actual_asm.size()); + std::vector<uint8_t> actual_cfi(cfi_data->begin(), cfi_data->end()); + EXPECT_EQ(m2l->cfi().GetCurrentPC(), static_cast<int>(actual_asm.size())); + + if (kGenerateExpected) { + GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi); + } else { + EXPECT_EQ(expected_asm, actual_asm); + EXPECT_EQ(expected_cfi, actual_cfi); + } + } +}; + +#define TEST_ISA(isa) \ + TEST_F(QuickCFITest, isa) { \ + std::vector<uint8_t> expected_asm(expected_asm_##isa, \ + expected_asm_##isa + arraysize(expected_asm_##isa)); \ + std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \ + expected_cfi_##isa + arraysize(expected_cfi_##isa)); \ + TestImpl(isa, #isa, expected_asm, expected_cfi); \ + } + +TEST_ISA(kThumb2) +TEST_ISA(kArm64) +TEST_ISA(kX86) +TEST_ISA(kX86_64) +TEST_ISA(kMips) +TEST_ISA(kMips64) + +#endif // HAVE_ANDROID_OS + +} // namespace art diff --git a/compiler/dex/quick/quick_cfi_test_expected.inc b/compiler/dex/quick/quick_cfi_test_expected.inc new file mode 100644 index 0000000000..634fdeead0 --- /dev/null +++ b/compiler/dex/quick/quick_cfi_test_expected.inc @@ -0,0 +1,217 @@ +static constexpr uint8_t expected_asm_kThumb2[] = { + 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0, + 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kThumb2[] = { + 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14, + 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42, + 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x44, 0x0B, 0x0E, + 0x40, +}; +// 0x00000000: push {r5, r6, lr} +// 0x00000002: .cfi_def_cfa_offset: 12 +// 0x00000002: .cfi_offset: r5 at cfa-12 +// 0x00000002: .cfi_offset: r6 at cfa-8 +// 0x00000002: .cfi_offset: r14 at cfa-4 +// 0x00000002: vpush.f32 {s16-s17} +// 0x00000006: .cfi_def_cfa_offset: 20 +// 0x00000006: .cfi_offset_extended: r80 at cfa-20 +// 0x00000006: .cfi_offset_extended: r81 at cfa-16 +// 0x00000006: sub sp, sp, #44 +// 0x00000008: .cfi_def_cfa_offset: 64 +// 0x00000008: str r0, [sp, #0] +// 0x0000000a: .cfi_remember_state +// 0x0000000a: add sp, sp, #44 +// 0x0000000c: .cfi_def_cfa_offset: 20 +// 0x0000000c: vpop.f32 {s16-s17} +// 0x00000010: .cfi_def_cfa_offset: 12 +// 0x00000010: .cfi_restore_extended: r80 +// 0x00000010: .cfi_restore_extended: r81 +// 0x00000010: pop {r5, r6, pc} +// 0x00000012: lsls r0, r0, #0 +// 0x00000014: .cfi_restore_state +// 0x00000014: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kArm64[] = { + 0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF4, 0xD7, 0x02, 0xA9, + 0xFE, 0x1F, 0x00, 0xF9, 0xE0, 0x03, 0x00, 0xB9, 0xE8, 0xA7, 0x41, 0x6D, + 0xF4, 0xD7, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, + 0xC0, 0x03, 0x5F, 0xD6, +}; +static constexpr uint8_t expected_cfi_kArm64[] = { + 0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x94, + 0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06, + 0x49, 0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, + 0x40, +}; +// 0x00000000: sub sp, sp, #0x40 (64) +// 0x00000004: .cfi_def_cfa_offset: 64 +// 0x00000004: stp d8, d9, [sp, #24] +// 0x00000008: .cfi_offset_extended: r72 at cfa-40 +// 0x00000008: .cfi_offset_extended: r73 at cfa-32 +// 0x00000008: stp x20, x21, [sp, #40] +// 0x0000000c: .cfi_offset: r20 at cfa-24 +// 0x0000000c: .cfi_offset: r21 at cfa-16 +// 0x0000000c: str lr, [sp, #56] +// 0x00000010: .cfi_offset: r30 at cfa-8 +// 0x00000010: str w0, [sp] +// 0x00000014: .cfi_remember_state +// 0x00000014: ldp d8, d9, [sp, #24] +// 0x00000018: .cfi_restore_extended: r72 +// 0x00000018: .cfi_restore_extended: r73 +// 0x00000018: ldp x20, x21, [sp, #40] +// 0x0000001c: .cfi_restore: r20 +// 0x0000001c: .cfi_restore: r21 +// 0x0000001c: ldr lr, [sp, #56] +// 0x00000020: .cfi_restore: r30 +// 0x00000020: add sp, sp, #0x40 (64) +// 0x00000024: .cfi_def_cfa_offset: 0 +// 0x00000024: ret +// 0x00000028: .cfi_restore_state +// 0x00000028: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kX86[] = { + 0x83, 0xEC, 0x3C, 0x89, 0x6C, 0x24, 0x34, 0x89, 0x74, 0x24, 0x38, 0x89, + 0x04, 0x24, 0x8B, 0x6C, 0x24, 0x34, 0x8B, 0x74, 0x24, 0x38, 0x83, 0xC4, + 0x3C, 0xC3, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kX86[] = { + 0x43, 0x0E, 0x40, 0x44, 0x85, 0x03, 0x44, 0x86, 0x02, 0x43, 0x0A, 0x44, + 0xC5, 0x44, 0xC6, 0x43, 0x0E, 0x04, 0x43, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: sub esp, 60 +// 0x00000003: .cfi_def_cfa_offset: 64 +// 0x00000003: mov [esp + 52], ebp +// 0x00000007: .cfi_offset: r5 at cfa-12 +// 0x00000007: mov [esp + 56], esi +// 0x0000000b: .cfi_offset: r6 at cfa-8 +// 0x0000000b: mov [esp], eax +// 0x0000000e: .cfi_remember_state +// 0x0000000e: mov ebp, [esp + 52] +// 0x00000012: .cfi_restore: r5 +// 0x00000012: mov esi, [esp + 56] +// 0x00000016: .cfi_restore: r6 +// 0x00000016: add esp, 60 +// 0x00000019: .cfi_def_cfa_offset: 4 +// 0x00000019: ret +// 0x0000001a: addb [eax], al +// 0x0000001c: .cfi_restore_state +// 0x0000001c: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kX86_64[] = { + 0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0x5C, 0x24, 0x28, 0x48, 0x89, 0x6C, + 0x24, 0x30, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, + 0x11, 0x6C, 0x24, 0x20, 0x48, 0x8B, 0xC7, 0x89, 0x3C, 0x24, 0x48, 0x8B, + 0x5C, 0x24, 0x28, 0x48, 0x8B, 0x6C, 0x24, 0x30, 0xF2, 0x44, 0x0F, 0x10, + 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24, 0x20, 0x48, 0x83, + 0xC4, 0x38, 0xC3, 0x00, +}; +static constexpr uint8_t expected_cfi_kX86_64[] = { + 0x44, 0x0E, 0x40, 0x45, 0x83, 0x06, 0x45, 0x86, 0x04, 0x47, 0x9D, 0x0A, + 0x47, 0x9E, 0x08, 0x46, 0x0A, 0x45, 0xC3, 0x45, 0xC6, 0x47, 0xDD, 0x47, + 0xDE, 0x44, 0x0E, 0x08, 0x42, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: subq rsp, 56 +// 0x00000004: .cfi_def_cfa_offset: 64 +// 0x00000004: movq [rsp + 40], rbx +// 0x00000009: .cfi_offset: r3 at cfa-24 +// 0x00000009: movq [rsp + 48], rbp +// 0x0000000e: .cfi_offset: r6 at cfa-16 +// 0x0000000e: movsd [rsp + 24], xmm12 +// 0x00000015: .cfi_offset: r29 at cfa-40 +// 0x00000015: movsd [rsp + 32], xmm13 +// 0x0000001c: .cfi_offset: r30 at cfa-32 +// 0x0000001c: movq rax, rdi +// 0x0000001f: mov [rsp], edi +// 0x00000022: .cfi_remember_state +// 0x00000022: movq rbx, [rsp + 40] +// 0x00000027: .cfi_restore: r3 +// 0x00000027: movq rbp, [rsp + 48] +// 0x0000002c: .cfi_restore: r6 +// 0x0000002c: movsd xmm12, [rsp + 24] +// 0x00000033: .cfi_restore: r29 +// 0x00000033: movsd xmm13, [rsp + 32] +// 0x0000003a: .cfi_restore: r30 +// 0x0000003a: addq rsp, 56 +// 0x0000003e: .cfi_def_cfa_offset: 8 +// 0x0000003e: ret +// 0x0000003f: addb al, al +// 0x00000040: .cfi_restore_state +// 0x00000040: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips[] = { + 0xF4, 0xFF, 0xBD, 0x27, 0x08, 0x00, 0xB2, 0xAF, 0x04, 0x00, 0xB3, 0xAF, + 0x00, 0x00, 0xBF, 0xAF, 0xCC, 0xFF, 0xBD, 0x27, 0x25, 0x10, 0x80, 0x00, + 0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xB2, 0x8F, 0x38, 0x00, 0xB3, 0x8F, + 0x34, 0x00, 0xBF, 0x8F, 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, + 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips[] = { + 0x44, 0x0E, 0x0C, 0x44, 0x92, 0x01, 0x44, 0x93, 0x02, 0x44, 0x9F, 0x03, + 0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44, + 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: addiu r29, r29, -12 +// 0x00000004: .cfi_def_cfa_offset: 12 +// 0x00000004: sw r18, +8(r29) +// 0x00000008: .cfi_offset: r18 at cfa-4 +// 0x00000008: sw r19, +4(r29) +// 0x0000000c: .cfi_offset: r19 at cfa-8 +// 0x0000000c: sw r31, +0(r29) +// 0x00000010: .cfi_offset: r31 at cfa-12 +// 0x00000010: addiu r29, r29, -52 +// 0x00000014: .cfi_def_cfa_offset: 64 +// 0x00000014: or r2, r4, r0 +// 0x00000018: sw r4, +0(r29) +// 0x0000001c: .cfi_remember_state +// 0x0000001c: lw r18, +60(r29) +// 0x00000020: .cfi_restore: r18 +// 0x00000020: lw r19, +56(r29) +// 0x00000024: .cfi_restore: r19 +// 0x00000024: lw r31, +52(r29) +// 0x00000028: .cfi_restore: r31 +// 0x00000028: addiu r29, r29, 64 +// 0x0000002c: .cfi_def_cfa_offset: 0 +// 0x0000002c: jalr r0, r31 +// 0x00000030: nop +// 0x00000034: .cfi_restore_state +// 0x00000034: .cfi_def_cfa_offset: 64 + +static constexpr uint8_t expected_asm_kMips64[] = { + 0xE8, 0xFF, 0xBD, 0x67, 0x10, 0x00, 0xB2, 0xFF, 0x08, 0x00, 0xB3, 0xFF, + 0x00, 0x00, 0xBF, 0xFF, 0xD8, 0xFF, 0xBD, 0x67, 0x25, 0x10, 0x80, 0x00, + 0x00, 0x00, 0xA4, 0xAF, 0x38, 0x00, 0xB2, 0xDF, 0x30, 0x00, 0xB3, 0xDF, + 0x28, 0x00, 0xBF, 0xDF, 0x40, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, + 0x00, 0x00, 0x00, 0x00, +}; +static constexpr uint8_t expected_cfi_kMips64[] = { + 0x44, 0x0E, 0x18, 0x44, 0x92, 0x02, 0x44, 0x93, 0x04, 0x44, 0x9F, 0x06, + 0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44, + 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, +}; +// 0x00000000: daddiu r29, r29, -24 +// 0x00000004: .cfi_def_cfa_offset: 24 +// 0x00000004: sd r18, +16(r29) +// 0x00000008: .cfi_offset: r18 at cfa-8 +// 0x00000008: sd r19, +8(r29) +// 0x0000000c: .cfi_offset: r19 at cfa-16 +// 0x0000000c: sd r31, +0(r29) +// 0x00000010: .cfi_offset: r31 at cfa-24 +// 0x00000010: daddiu r29, r29, -40 +// 0x00000014: .cfi_def_cfa_offset: 64 +// 0x00000014: or r2, r4, r0 +// 0x00000018: sw r4, +0(r29) +// 0x0000001c: .cfi_remember_state +// 0x0000001c: ld r18, +56(r29) +// 0x00000020: .cfi_restore: r18 +// 0x00000020: ld r19, +48(r29) +// 0x00000024: .cfi_restore: r19 +// 0x00000024: ld r31, +40(r29) +// 0x00000028: .cfi_restore: r31 +// 0x00000028: daddiu r29, r29, 64 +// 0x0000002c: .cfi_def_cfa_offset: 0 +// 0x0000002c: jr r31 +// 0x00000030: nop +// 0x00000034: .cfi_restore_state +// 0x00000034: .cfi_def_cfa_offset: 64 + diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc index 8baafc7fd2..2c0bd47405 100644 --- a/compiler/dex/quick/quick_compiler.cc +++ b/compiler/dex/quick/quick_compiler.cc @@ -798,11 +798,16 @@ bool QuickCompiler::WriteElf(art::File* file, const std::vector<const art::DexFile*>& dex_files, const std::string& android_root, bool is_host) const { - return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, - *GetCompilerDriver()); + if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) { + return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, + *GetCompilerDriver()); + } else { + return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, + *GetCompilerDriver()); + } } -Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const { +Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) { UNUSED(compilation_unit); Mir2Lir* mir_to_lir = nullptr; switch (cu->instruction_set) { diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h index 5153a9e82e..09b08ace77 100644 --- a/compiler/dex/quick/quick_compiler.h +++ b/compiler/dex/quick/quick_compiler.h @@ -60,7 +60,7 @@ class QuickCompiler : public Compiler { OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const; + static Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit); void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE; diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index 741657bc69..e779479780 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -19,9 +19,11 @@ #include "mir_to_lir-inl.h" #include "dex/compiler_ir.h" +#include "dex/dataflow_iterator-inl.h" #include "dex/mir_graph.h" #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" +#include "utils/dex_cache_arrays_layout-inl.h" namespace art { @@ -1128,6 +1130,152 @@ RegLocation Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) { return loc; } +void Mir2Lir::AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) { + // NOTE: This should be in sync with functions that actually generate code for + // the opcodes below. However, if we get this wrong, the generated code will + // still be correct even if it may be sub-optimal. + int opcode = mir->dalvikInsn.opcode; + bool uses_method = false; + bool uses_pc_rel_load = false; + uint32_t dex_cache_array_offset = std::numeric_limits<uint32_t>::max(); + switch (opcode) { + case Instruction::CHECK_CAST: + case Instruction::INSTANCE_OF: { + if ((opcode == Instruction::CHECK_CAST) && + (mir->optimization_flags & MIR_IGNORE_CHECK_CAST) != 0) { + break; // No code generated. + } + uint32_t type_idx = + (opcode == Instruction::CHECK_CAST) ? mir->dalvikInsn.vB : mir->dalvikInsn.vC; + bool type_known_final, type_known_abstract, use_declaring_class; + bool needs_access_check = !cu_->compiler_driver->CanAccessTypeWithoutChecks( + cu_->method_idx, *cu_->dex_file, type_idx, + &type_known_final, &type_known_abstract, &use_declaring_class); + if (opcode == Instruction::CHECK_CAST && !needs_access_check && + cu_->compiler_driver->IsSafeCast( + mir_graph_->GetCurrentDexCompilationUnit(), mir->offset)) { + break; // No code generated. + } + if (!needs_access_check && !use_declaring_class && CanUseOpPcRelDexCacheArrayLoad()) { + uses_pc_rel_load = true; // And ignore method use in slow path. + dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(type_idx); + } else { + uses_method = true; + } + break; + } + + case Instruction::CONST_CLASS: + if (CanUseOpPcRelDexCacheArrayLoad() && + cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file, + mir->dalvikInsn.vB)) { + uses_pc_rel_load = true; // And ignore method use in slow path. + dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(mir->dalvikInsn.vB); + } else { + uses_method = true; + } + break; + + case Instruction::CONST_STRING: + case Instruction::CONST_STRING_JUMBO: + if (CanUseOpPcRelDexCacheArrayLoad()) { + uses_pc_rel_load = true; // And ignore method use in slow path. + dex_cache_array_offset = dex_cache_arrays_layout_.StringOffset(mir->dalvikInsn.vB); + } else { + uses_method = true; + } + break; + + case Instruction::INVOKE_VIRTUAL: + case Instruction::INVOKE_SUPER: + case Instruction::INVOKE_DIRECT: + case Instruction::INVOKE_STATIC: + case Instruction::INVOKE_INTERFACE: + case Instruction::INVOKE_VIRTUAL_RANGE: + case Instruction::INVOKE_SUPER_RANGE: + case Instruction::INVOKE_DIRECT_RANGE: + case Instruction::INVOKE_STATIC_RANGE: + case Instruction::INVOKE_INTERFACE_RANGE: + case Instruction::INVOKE_VIRTUAL_QUICK: + case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: { + const MirMethodLoweringInfo& info = mir_graph_->GetMethodLoweringInfo(mir); + InvokeType sharp_type = info.GetSharpType(); + if (info.IsIntrinsic()) { + // Nothing to do, if an intrinsic uses ArtMethod* it's in the slow-path - don't count it. + } else if (!info.FastPath() || (sharp_type != kStatic && sharp_type != kDirect)) { + // Nothing to do, the generated code or entrypoint uses method from the stack. + } else if (info.DirectCode() != 0 && info.DirectMethod() != 0) { + // Nothing to do, the generated code uses method from the stack. + } else if (CanUseOpPcRelDexCacheArrayLoad()) { + uses_pc_rel_load = true; + dex_cache_array_offset = dex_cache_arrays_layout_.MethodOffset(mir->dalvikInsn.vB); + } else { + uses_method = true; + } + break; + } + + case Instruction::NEW_INSTANCE: + case Instruction::NEW_ARRAY: + case Instruction::FILLED_NEW_ARRAY: + case Instruction::FILLED_NEW_ARRAY_RANGE: + uses_method = true; + break; + case Instruction::FILL_ARRAY_DATA: + // Nothing to do, the entrypoint uses method from the stack. + break; + case Instruction::THROW: + // Nothing to do, the entrypoint uses method from the stack. + break; + + case Instruction::SGET: + case Instruction::SGET_WIDE: + case Instruction::SGET_OBJECT: + case Instruction::SGET_BOOLEAN: + case Instruction::SGET_BYTE: + case Instruction::SGET_CHAR: + case Instruction::SGET_SHORT: + case Instruction::SPUT: + case Instruction::SPUT_WIDE: + case Instruction::SPUT_OBJECT: + case Instruction::SPUT_BOOLEAN: + case Instruction::SPUT_BYTE: + case Instruction::SPUT_CHAR: + case Instruction::SPUT_SHORT: { + const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir); + bool fast = IsInstructionSGet(static_cast<Instruction::Code>(opcode)) + ? field_info.FastGet() + : field_info.FastPut(); + if (fast && (cu_->enable_debug & (1 << kDebugSlowFieldPath)) == 0) { + if (!field_info.IsReferrersClass() && CanUseOpPcRelDexCacheArrayLoad()) { + uses_pc_rel_load = true; // And ignore method use in slow path. + dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex()); + } else { + uses_method = true; + } + } else { + // Nothing to do, the entrypoint uses method from the stack. + } + break; + } + + default: + break; + } + if (uses_method) { + core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count += weight; + } + if (uses_pc_rel_load) { + if (pc_rel_temp_ != nullptr) { + core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight; + DCHECK_NE(dex_cache_array_offset, std::numeric_limits<uint32_t>::max()); + dex_cache_arrays_min_offset_ = std::min(dex_cache_arrays_min_offset_, dex_cache_array_offset); + } else { + // Nothing to do, using PC-relative addressing without promoting base PC to register. + } + } +} + /* USE SSA names to count references of base Dalvik v_regs. */ void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) { for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) { @@ -1157,6 +1305,22 @@ void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num } } } + + // Now analyze the ArtMethod* and pc_rel_temp_ uses. + DCHECK_EQ(core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count, 0); + if (pc_rel_temp_ != nullptr) { + DCHECK_EQ(core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count, 0); + } + PreOrderDfsIterator iter(mir_graph_); + for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) { + if (bb->block_type == kDead) { + continue; + } + uint32_t weight = mir_graph_->GetUseCountWeight(bb); + for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) { + AnalyzeMIR(core_counts, mir, weight); + } + } } /* qsort callback function, sort descending */ diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index fd23692d24..7f42536c35 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -150,6 +150,10 @@ void X86Mir2Lir::UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) { FreeTemp(reg_card_no); } +static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) { + return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num); +} + void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { /* * On entry, rX86_ARG0, rX86_ARG1, rX86_ARG2 are live. Let the register @@ -184,8 +188,9 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { } /* Build frame, return address already on stack */ - stack_decrement_ = OpRegImm(kOpSub, rs_rSP, frame_size_ - - GetInstructionSetPointerSize(cu_->instruction_set)); + cfi_.SetCurrentCFAOffset(GetInstructionSetPointerSize(cu_->instruction_set)); + OpRegImm(kOpSub, rs_rSP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set)); + cfi_.DefCFAOffset(frame_size_); /* Spill core callee saves */ SpillCoreRegs(); @@ -202,10 +207,12 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { GenerateTargetLabel(kPseudoThrowTarget); const RegStorage local_rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; m2l_->OpRegImm(kOpAdd, local_rs_rSP, sp_displace_); + m2l_->cfi().AdjustCFAOffset(-sp_displace_); m2l_->ClobberCallerSave(); // Assumes codegen and target are in thumb2 mode. m2l_->CallHelper(RegStorage::InvalidReg(), kQuickThrowStackOverflow, false /* MarkSafepointPC */, false /* UseLink */); + m2l_->cfi().AdjustCFAOffset(sp_displace_); } private: @@ -252,6 +259,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { } void X86Mir2Lir::GenExitSequence() { + cfi_.RememberState(); /* * In the exit path, rX86_RET0/rX86_RET1 are live - make sure they aren't * allocated by the register utilities as temps. @@ -263,9 +271,14 @@ void X86Mir2Lir::GenExitSequence() { UnSpillFPRegs(); /* Remove frame except for return address */ const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; - stack_increment_ = OpRegImm(kOpAdd, rs_rSP, - frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set)); + int adjust = frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set); + OpRegImm(kOpAdd, rs_rSP, adjust); + cfi_.AdjustCFAOffset(-adjust); + // There is only the return PC on the stack now. NewLIR0(kX86Ret); + // The CFI should be restored for any code that follows the exit block. + cfi_.RestoreState(); + cfi_.DefCFAOffset(frame_size_); } void X86Mir2Lir::GenSpecialExitSequence() { @@ -276,6 +289,8 @@ void X86Mir2Lir::GenSpecialEntryForSuspend() { // Keep 16-byte stack alignment, there's already the return address, so // - for 32-bit push EAX, i.e. ArtMethod*, ESI, EDI, // - for 64-bit push RAX, i.e. ArtMethod*. + const int kRegSize = cu_->target64 ? 8 : 4; + cfi_.SetCurrentCFAOffset(kRegSize); // Return address. if (!cu_->target64) { DCHECK(!IsTemp(rs_rSI)); DCHECK(!IsTemp(rs_rDI)); @@ -293,17 +308,29 @@ void X86Mir2Lir::GenSpecialEntryForSuspend() { fp_vmap_table_.clear(); if (!cu_->target64) { NewLIR1(kX86Push32R, rs_rDI.GetReg()); + cfi_.AdjustCFAOffset(kRegSize); + cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()), 0); NewLIR1(kX86Push32R, rs_rSI.GetReg()); + cfi_.AdjustCFAOffset(kRegSize); + cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()), 0); } NewLIR1(kX86Push32R, TargetReg(kArg0, kRef).GetReg()); // ArtMethod* + cfi_.AdjustCFAOffset(kRegSize); + // Do not generate CFI for scratch register. } void X86Mir2Lir::GenSpecialExitForSuspend() { + const int kRegSize = cu_->target64 ? 8 : 4; // Pop the frame. (ArtMethod* no longer needed but restore it anyway.) NewLIR1(kX86Pop32R, TargetReg(kArg0, kRef).GetReg()); // ArtMethod* + cfi_.AdjustCFAOffset(-kRegSize); if (!cu_->target64) { NewLIR1(kX86Pop32R, rs_rSI.GetReg()); + cfi_.AdjustCFAOffset(-kRegSize); + cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum())); NewLIR1(kX86Pop32R, rs_rDI.GetReg()); + cfi_.AdjustCFAOffset(-kRegSize); + cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum())); } } diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 758684e835..a98a99ec4e 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -380,12 +380,6 @@ class X86Mir2Lir : public Mir2Lir { */ void InstallLiteralPools() OVERRIDE; - /* - * @brief Generate the debug_frame FDE information. - * @returns pointer to vector containing CFE information - */ - std::vector<uint8_t>* ReturnFrameDescriptionEntry() OVERRIDE; - LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE; protected: @@ -958,12 +952,6 @@ class X86Mir2Lir : public Mir2Lir { // Instructions needing patching with PC relative code addresses. ArenaVector<LIR*> dex_cache_access_insns_; - // Prologue decrement of stack pointer. - LIR* stack_decrement_; - - // Epilogue increment of stack pointer. - LIR* stack_increment_; - // The list of const vector literals. LIR* const_vectors_; diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 5def5c8bb0..931294e2ff 100755 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -830,6 +830,10 @@ RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, return rl_result; } +static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) { + return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num); +} + bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); @@ -928,6 +932,7 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { // Do we have a free register for intermediate calculations? RegStorage tmp = AllocTemp(false); + const int kRegSize = cu_->target64 ? 8 : 4; if (tmp == RegStorage::InvalidReg()) { /* * No, will use 'edi'. @@ -946,6 +951,11 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { IsTemp(rl_result.reg.GetHigh())); tmp = rs_rDI; NewLIR1(kX86Push32R, tmp.GetReg()); + cfi_.AdjustCFAOffset(kRegSize); + // Record cfi only if it is not already spilled. + if (!CoreSpillMaskContains(tmp.GetReg())) { + cfi_.RelOffset(DwarfCoreReg(cu_->target64, tmp.GetReg()), 0); + } } // Now we are ready to do calculations. @@ -957,6 +967,10 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { // Let's put pop 'edi' here to break a bit the dependency chain. if (tmp == rs_rDI) { NewLIR1(kX86Pop32R, tmp.GetReg()); + cfi_.AdjustCFAOffset(-kRegSize); + if (!CoreSpillMaskContains(tmp.GetReg())) { + cfi_.Restore(DwarfCoreReg(cu_->target64, tmp.GetReg())); + } } else { FreeTemp(tmp); } @@ -1104,6 +1118,7 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { // If is_long, high half is in info->args[5] RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object // If is_long, high half is in info->args[7] + const int kRegSize = cu_->target64 ? 8 : 4; if (is_long && cu_->target64) { // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX. @@ -1125,7 +1140,6 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { FreeTemp(rs_r0q); } else if (is_long) { // TODO: avoid unnecessary loads of SI and DI when the values are in registers. - // TODO: CFI support. FlushAllRegs(); LockCallTemps(); RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX); @@ -1148,11 +1162,21 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { NewLIR1(kX86Push32R, rs_rDI.GetReg()); MarkTemp(rs_rDI); LockTemp(rs_rDI); + cfi_.AdjustCFAOffset(kRegSize); + // Record cfi only if it is not already spilled. + if (!CoreSpillMaskContains(rs_rDI.GetReg())) { + cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0); + } } if (push_si) { NewLIR1(kX86Push32R, rs_rSI.GetReg()); MarkTemp(rs_rSI); LockTemp(rs_rSI); + cfi_.AdjustCFAOffset(kRegSize); + // Record cfi only if it is not already spilled. + if (!CoreSpillMaskContains(rs_rSI.GetReg())) { + cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetReg()), 0); + } } ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u); @@ -1183,11 +1207,19 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { FreeTemp(rs_rSI); UnmarkTemp(rs_rSI); NewLIR1(kX86Pop32R, rs_rSI.GetReg()); + cfi_.AdjustCFAOffset(-kRegSize); + if (!CoreSpillMaskContains(rs_rSI.GetReg())) { + cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum())); + } } if (push_di) { FreeTemp(rs_rDI); UnmarkTemp(rs_rDI); NewLIR1(kX86Pop32R, rs_rDI.GetReg()); + cfi_.AdjustCFAOffset(-kRegSize); + if (!CoreSpillMaskContains(rs_rDI.GetReg())) { + cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum())); + } } FreeCallTemps(); } else { diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index cad82a183e..926b75e35f 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -32,7 +32,6 @@ #include "mirror/string.h" #include "oat.h" #include "x86_lir.h" -#include "utils/dwarf_cfi.h" namespace art { @@ -725,6 +724,14 @@ int X86Mir2Lir::NumReservableVectorRegisters(bool long_or_fp) { return long_or_fp ? num_vector_temps - 2 : num_vector_temps - 1; } +static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) { + return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num); +} + +static dwarf::Reg DwarfFpReg(bool is_x86_64, int num) { + return is_x86_64 ? dwarf::Reg::X86_64Fp(num) : dwarf::Reg::X86Fp(num); +} + void X86Mir2Lir::SpillCoreRegs() { if (num_core_spills_ == 0) { return; @@ -735,11 +742,11 @@ void X86Mir2Lir::SpillCoreRegs() { frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); OpSize size = cu_->target64 ? k64 : k32; const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; - for (int reg = 0; mask; mask >>= 1, reg++) { - if (mask & 0x1) { - StoreBaseDisp(rs_rSP, offset, - cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg), - size, kNotVolatile); + for (int reg = 0; mask != 0u; mask >>= 1, reg++) { + if ((mask & 0x1) != 0u) { + RegStorage r_src = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg); + StoreBaseDisp(rs_rSP, offset, r_src, size, kNotVolatile); + cfi_.RelOffset(DwarfCoreReg(cu_->target64, reg), offset); offset += GetInstructionSetPointerSize(cu_->instruction_set); } } @@ -754,10 +761,11 @@ void X86Mir2Lir::UnSpillCoreRegs() { int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); OpSize size = cu_->target64 ? k64 : k32; const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; - for (int reg = 0; mask; mask >>= 1, reg++) { - if (mask & 0x1) { - LoadBaseDisp(rs_rSP, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg), - size, kNotVolatile); + for (int reg = 0; mask != 0u; mask >>= 1, reg++) { + if ((mask & 0x1) != 0u) { + RegStorage r_dest = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg); + LoadBaseDisp(rs_rSP, offset, r_dest, size, kNotVolatile); + cfi_.Restore(DwarfCoreReg(cu_->target64, reg)); offset += GetInstructionSetPointerSize(cu_->instruction_set); } } @@ -771,9 +779,10 @@ void X86Mir2Lir::SpillFPRegs() { int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_)); const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; - for (int reg = 0; mask; mask >>= 1, reg++) { - if (mask & 0x1) { + for (int reg = 0; mask != 0u; mask >>= 1, reg++) { + if ((mask & 0x1) != 0u) { StoreBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg), k64, kNotVolatile); + cfi_.RelOffset(DwarfFpReg(cu_->target64, reg), offset); offset += sizeof(double); } } @@ -786,10 +795,11 @@ void X86Mir2Lir::UnSpillFPRegs() { int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_)); const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; - for (int reg = 0; mask; mask >>= 1, reg++) { - if (mask & 0x1) { + for (int reg = 0; mask != 0u; mask >>= 1, reg++) { + if ((mask & 0x1) != 0u) { LoadBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg), k64, kNotVolatile); + cfi_.Restore(DwarfFpReg(cu_->target64, reg)); offset += sizeof(double); } } @@ -830,7 +840,6 @@ X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* class_type_address_insns_(arena->Adapter()), call_method_insns_(arena->Adapter()), dex_cache_access_insns_(arena->Adapter()), - stack_decrement_(nullptr), stack_increment_(nullptr), const_vectors_(nullptr) { method_address_insns_.reserve(100); class_type_address_insns_.reserve(100); @@ -1317,6 +1326,11 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { if (!cu_->target64) { // EDI is promotable in 32-bit mode. NewLIR1(kX86Push32R, rs_rDI.GetReg()); + cfi_.AdjustCFAOffset(4); + // Record cfi only if it is not already spilled. + if (!CoreSpillMaskContains(rs_rDI.GetReg())) { + cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0); + } } if (zero_based) { @@ -1412,8 +1426,13 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { // And join up at the end. all_done->target = NewLIR0(kPseudoTargetLabel); - if (!cu_->target64) + if (!cu_->target64) { NewLIR1(kX86Pop32R, rs_rDI.GetReg()); + cfi_.AdjustCFAOffset(-4); + if (!CoreSpillMaskContains(rs_rDI.GetReg())) { + cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetReg())); + } + } // Out of line code returns here. if (slowpath_branch != nullptr) { @@ -1426,100 +1445,6 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { return true; } -static bool ARTRegIDToDWARFRegID(bool is_x86_64, int art_reg_id, int* dwarf_reg_id) { - if (is_x86_64) { - switch (art_reg_id) { - case 3 : *dwarf_reg_id = 3; return true; // %rbx - // This is the only discrepancy between ART & DWARF register numbering. - case 5 : *dwarf_reg_id = 6; return true; // %rbp - case 12: *dwarf_reg_id = 12; return true; // %r12 - case 13: *dwarf_reg_id = 13; return true; // %r13 - case 14: *dwarf_reg_id = 14; return true; // %r14 - case 15: *dwarf_reg_id = 15; return true; // %r15 - default: return false; // Should not get here - } - } else { - switch (art_reg_id) { - case 5: *dwarf_reg_id = 5; return true; // %ebp - case 6: *dwarf_reg_id = 6; return true; // %esi - case 7: *dwarf_reg_id = 7; return true; // %edi - default: return false; // Should not get here - } - } -} - -std::vector<uint8_t>* X86Mir2Lir::ReturnFrameDescriptionEntry() { - std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>; - - // Generate the FDE for the method. - DCHECK_NE(data_offset_, 0U); - - WriteFDEHeader(cfi_info, cu_->target64); - WriteFDEAddressRange(cfi_info, data_offset_, cu_->target64); - - // The instructions in the FDE. - if (stack_decrement_ != nullptr) { - // Advance LOC to just past the stack decrement. - uint32_t pc = NEXT_LIR(stack_decrement_)->offset; - DW_CFA_advance_loc(cfi_info, pc); - - // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size. - DW_CFA_def_cfa_offset(cfi_info, frame_size_); - - // Handle register spills - const uint32_t kSpillInstLen = (cu_->target64) ? 5 : 4; - const int kDataAlignmentFactor = (cu_->target64) ? -8 : -4; - uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum()); - int offset = -(GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); - for (int reg = 0; mask; mask >>= 1, reg++) { - if (mask & 0x1) { - pc += kSpillInstLen; - - // Advance LOC to pass this instruction - DW_CFA_advance_loc(cfi_info, kSpillInstLen); - - int dwarf_reg_id; - if (ARTRegIDToDWARFRegID(cu_->target64, reg, &dwarf_reg_id)) { - // DW_CFA_offset_extended_sf reg offset - DW_CFA_offset_extended_sf(cfi_info, dwarf_reg_id, offset / kDataAlignmentFactor); - } - - offset += GetInstructionSetPointerSize(cu_->instruction_set); - } - } - - // We continue with that stack until the epilogue. - if (stack_increment_ != nullptr) { - uint32_t new_pc = NEXT_LIR(stack_increment_)->offset; - DW_CFA_advance_loc(cfi_info, new_pc - pc); - - // We probably have code snippets after the epilogue, so save the - // current state: DW_CFA_remember_state. - DW_CFA_remember_state(cfi_info); - - // We have now popped the stack: DW_CFA_def_cfa_offset 4/8. - // There is only the return PC on the stack now. - DW_CFA_def_cfa_offset(cfi_info, GetInstructionSetPointerSize(cu_->instruction_set)); - - // Everything after that is the same as before the epilogue. - // Stack bump was followed by RET instruction. - LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_)); - if (post_ret_insn != nullptr) { - pc = new_pc; - new_pc = post_ret_insn->offset; - DW_CFA_advance_loc(cfi_info, new_pc - pc); - // Restore the state: DW_CFA_restore_state. - DW_CFA_restore_state(cfi_info); - } - } - } - - PadCFI(cfi_info); - WriteCFILength(cfi_info, cu_->target64); - - return cfi_info; -} - void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) { switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) { case kMirOpReserveVectorRegisters: diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index f6b217a635..c2b837512c 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -2370,44 +2370,6 @@ bool CompilerDriver::WriteElf(const std::string& android_root, SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { return compiler_->WriteElf(file, oat_writer, dex_files, android_root, is_host); } -void CompilerDriver::InstructionSetToLLVMTarget(InstructionSet instruction_set, - std::string* target_triple, - std::string* target_cpu, - std::string* target_attr) { - switch (instruction_set) { - case kThumb2: - *target_triple = "thumb-none-linux-gnueabi"; - *target_cpu = "cortex-a9"; - *target_attr = "+thumb2,+neon,+neonfp,+vfp3,+db"; - break; - - case kArm: - *target_triple = "armv7-none-linux-gnueabi"; - // TODO: Fix for Nexus S. - *target_cpu = "cortex-a9"; - // TODO: Fix for Xoom. - *target_attr = "+v7,+neon,+neonfp,+vfp3,+db"; - break; - - case kX86: - *target_triple = "i386-pc-linux-gnu"; - *target_attr = ""; - break; - - case kX86_64: - *target_triple = "x86_64-pc-linux-gnu"; - *target_attr = ""; - break; - - case kMips: - *target_triple = "mipsel-unknown-linux"; - *target_attr = "mips32r2"; - break; - - default: - LOG(FATAL) << "Unknown instruction set: " << instruction_set; - } - } bool CompilerDriver::SkipCompilation(const std::string& method_name) { if (!profile_present_) { diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index edd1bd263f..a6ed5590dc 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -385,12 +385,6 @@ class CompilerDriver { OatWriter* oat_writer, File* file); - // TODO: move to a common home for llvm helpers once quick/portable are merged. - static void InstructionSetToLLVMTarget(InstructionSet instruction_set, - std::string* target_triple, - std::string* target_cpu, - std::string* target_attr); - void SetCompilerContext(void* compiler_context) { compiler_context_ = compiler_context; } diff --git a/compiler/dwarf/debug_frame_opcode_writer.h b/compiler/dwarf/debug_frame_opcode_writer.h index cc4ef8fde1..85186bbc22 100644 --- a/compiler/dwarf/debug_frame_opcode_writer.h +++ b/compiler/dwarf/debug_frame_opcode_writer.h @@ -150,7 +150,7 @@ class DebugFrameOpCodeWriter : private Writer<Allocator> { } void RememberState() { - // Note that we do not need to advance the PC. + ImplicitlyAdvancePC(); this->PushUint8(DW_CFA_remember_state); } @@ -236,6 +236,10 @@ class DebugFrameOpCodeWriter : private Writer<Allocator> { this->PushData(expr, expr_size); } + int GetCurrentPC() const { + return current_pc_; + } + int GetCurrentCFAOffset() const { return current_cfa_offset_; } diff --git a/compiler/dwarf/debug_frame_writer.h b/compiler/dwarf/debug_frame_writer.h index 6de45f5526..b104cc9408 100644 --- a/compiler/dwarf/debug_frame_writer.h +++ b/compiler/dwarf/debug_frame_writer.h @@ -33,8 +33,15 @@ class DebugFrameWriter FINAL : private Writer<Allocator> { int initial_opcodes_size) { DCHECK(cie_header_start_ == ~0u); cie_header_start_ = this->data()->size(); - this->PushUint32(0); // Length placeholder. - this->PushUint32(0); // CIE id. + if (use_64bit_address_) { + // TODO: This is not related to being 64bit. + this->PushUint32(0xffffffff); + this->PushUint64(0); // Length placeholder. + this->PushUint64(0); // CIE id. + } else { + this->PushUint32(0); // Length placeholder. + this->PushUint32(0); // CIE id. + } this->PushUint8(1); // Version. this->PushString("zR"); this->PushUleb128(DebugFrameOpCodeWriter<Allocator>::kCodeAlignmentFactor); @@ -48,7 +55,11 @@ class DebugFrameWriter FINAL : private Writer<Allocator> { } this->PushData(initial_opcodes, initial_opcodes_size); this->Pad(use_64bit_address_ ? 8 : 4); - this->UpdateUint32(cie_header_start_, this->data()->size() - cie_header_start_ - 4); + if (use_64bit_address_) { + this->UpdateUint64(cie_header_start_ + 4, this->data()->size() - cie_header_start_ - 12); + } else { + this->UpdateUint32(cie_header_start_, this->data()->size() - cie_header_start_ - 4); + } } void WriteCIE(Reg return_address_register, @@ -62,8 +73,15 @@ class DebugFrameWriter FINAL : private Writer<Allocator> { int unwind_opcodes_size) { DCHECK(cie_header_start_ != ~0u); size_t fde_header_start = this->data()->size(); - this->PushUint32(0); // Length placeholder. - this->PushUint32(this->data()->size() - cie_header_start_); // 'CIE_pointer' + if (use_64bit_address_) { + // TODO: This is not related to being 64bit. + this->PushUint32(0xffffffff); + this->PushUint64(0); // Length placeholder. + this->PushUint64(this->data()->size() - cie_header_start_); // 'CIE_pointer' + } else { + this->PushUint32(0); // Length placeholder. + this->PushUint32(this->data()->size() - cie_header_start_); // 'CIE_pointer' + } if (use_64bit_address_) { this->PushUint64(initial_address); this->PushUint64(address_range); @@ -74,7 +92,11 @@ class DebugFrameWriter FINAL : private Writer<Allocator> { this->PushUleb128(0); // Augmentation data size. this->PushData(unwind_opcodes, unwind_opcodes_size); this->Pad(use_64bit_address_ ? 8 : 4); - this->UpdateUint32(fde_header_start, this->data()->size() - fde_header_start - 4); + if (use_64bit_address_) { + this->UpdateUint64(fde_header_start + 4, this->data()->size() - fde_header_start - 12); + } else { + this->UpdateUint32(fde_header_start, this->data()->size() - fde_header_start - 4); + } } DebugFrameWriter(std::vector<uint8_t, Allocator>* buffer, bool use_64bit_address) diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc index f3553bcc99..2b051c9e12 100644 --- a/compiler/dwarf/dwarf_test.cc +++ b/compiler/dwarf/dwarf_test.cc @@ -127,7 +127,8 @@ TEST_F(DwarfTest, DebugFrame) { CheckObjdumpOutput(is64bit, "-W"); } -TEST_F(DwarfTest, DebugFrame64) { +// TODO: objdump seems to have trouble with 64bit CIE length. +TEST_F(DwarfTest, DISABLED_DebugFrame64) { const bool is64bit = true; DebugFrameWriter<> eh_frame(&eh_frame_data_, is64bit); DebugFrameOpCodeWriter<> no_opcodes; diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index 24cb364d08..354c71ec12 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -89,114 +89,126 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn, return elf_writer.Write(oat_writer, dex_files, android_root, is_host); } -std::vector<uint8_t>* ConstructCIEFrameX86(bool is_x86_64) { - std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>; - - // Length (will be filled in later in this routine). - if (is_x86_64) { - Push32(cfi_info, 0xffffffff); // Indicates 64bit - Push32(cfi_info, 0); - Push32(cfi_info, 0); - } else { - Push32(cfi_info, 0); - } - - // CIE id: always 0. - if (is_x86_64) { - Push32(cfi_info, 0); - Push32(cfi_info, 0); - } else { - Push32(cfi_info, 0); - } - - // Version: always 1. - cfi_info->push_back(0x01); - - // Augmentation: 'zR\0' - cfi_info->push_back(0x7a); - cfi_info->push_back(0x52); - cfi_info->push_back(0x0); - - // Code alignment: 1. - EncodeUnsignedLeb128(1, cfi_info); - - // Data alignment. - if (is_x86_64) { - EncodeSignedLeb128(-8, cfi_info); - } else { - EncodeSignedLeb128(-4, cfi_info); - } - - // Return address register. - if (is_x86_64) { - // R16(RIP) - cfi_info->push_back(0x10); - } else { - // R8(EIP) - cfi_info->push_back(0x08); - } - - // Augmentation length: 1. - cfi_info->push_back(1); - - // Augmentation data. - if (is_x86_64) { - // 0x04 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata8). - cfi_info->push_back(0x04); - } else { - // 0x03 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4). - cfi_info->push_back(0x03); - } - - // Initial instructions. - if (is_x86_64) { - // DW_CFA_def_cfa R7(RSP) 8. - cfi_info->push_back(0x0c); - cfi_info->push_back(0x07); - cfi_info->push_back(0x08); - - // DW_CFA_offset R16(RIP) 1 (* -8). - cfi_info->push_back(0x90); - cfi_info->push_back(0x01); - } else { - // DW_CFA_def_cfa R4(ESP) 4. - cfi_info->push_back(0x0c); - cfi_info->push_back(0x04); - cfi_info->push_back(0x04); - - // DW_CFA_offset R8(EIP) 1 (* -4). - cfi_info->push_back(0x88); - cfi_info->push_back(0x01); - } - - // Padding to a multiple of 4 - while ((cfi_info->size() & 3) != 0) { - // DW_CFA_nop is encoded as 0. - cfi_info->push_back(0); - } - - // Set the length of the CIE inside the generated bytes. - if (is_x86_64) { - uint32_t length = cfi_info->size() - 12; - UpdateWord(cfi_info, 4, length); - } else { - uint32_t length = cfi_info->size() - 4; - UpdateWord(cfi_info, 0, length); - } - return cfi_info; -} - -std::vector<uint8_t>* ConstructCIEFrame(InstructionSet isa) { +void WriteCIE(dwarf::DebugFrameWriter<>* cfi, InstructionSet isa) { + // Scratch registers should be marked as undefined. This tells the + // debugger that its value in the previous frame is not recoverable. switch (isa) { - case kX86: - return ConstructCIEFrameX86(false); - case kX86_64: - return ConstructCIEFrameX86(true); - - default: - // Not implemented. - return nullptr; + case kArm: + case kThumb2: { + dwarf::DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(dwarf::Reg::ArmCore(13), 0); // R13(SP). + // core registers. + for (int reg = 0; reg < 13; reg++) { + if (reg < 4 || reg == 12) { + opcodes.Undefined(dwarf::Reg::ArmCore(reg)); + } else { + opcodes.SameValue(dwarf::Reg::ArmCore(reg)); + } + } + // fp registers. + for (int reg = 0; reg < 32; reg++) { + if (reg < 16) { + opcodes.Undefined(dwarf::Reg::ArmFp(reg)); + } else { + opcodes.SameValue(dwarf::Reg::ArmFp(reg)); + } + } + auto return_address_reg = dwarf::Reg::ArmCore(14); // R14(LR). + cfi->WriteCIE(return_address_reg, opcodes); + return; + } + case kArm64: { + dwarf::DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(dwarf::Reg::Arm64Core(31), 0); // R31(SP). + // core registers. + for (int reg = 0; reg < 30; reg++) { + if (reg < 8 || reg == 16 || reg == 17) { + opcodes.Undefined(dwarf::Reg::Arm64Core(reg)); + } else { + opcodes.SameValue(dwarf::Reg::Arm64Core(reg)); + } + } + // fp registers. + for (int reg = 0; reg < 32; reg++) { + if (reg < 8 || reg >= 16) { + opcodes.Undefined(dwarf::Reg::Arm64Fp(reg)); + } else { + opcodes.SameValue(dwarf::Reg::Arm64Fp(reg)); + } + } + auto return_address_reg = dwarf::Reg::Arm64Core(30); // R30(LR). + cfi->WriteCIE(return_address_reg, opcodes); + return; + } + case kMips: + case kMips64: { + dwarf::DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(dwarf::Reg::MipsCore(29), 0); // R29(SP). + // core registers. + for (int reg = 1; reg < 26; reg++) { + if (reg < 16 || reg == 24 || reg == 25) { // AT, V*, A*, T*. + opcodes.Undefined(dwarf::Reg::MipsCore(reg)); + } else { + opcodes.SameValue(dwarf::Reg::MipsCore(reg)); + } + } + auto return_address_reg = dwarf::Reg::MipsCore(31); // R31(RA). + cfi->WriteCIE(return_address_reg, opcodes); + return; + } + case kX86: { + dwarf::DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(dwarf::Reg::X86Core(4), 4); // R4(ESP). + opcodes.Offset(dwarf::Reg::X86Core(8), -4); // R8(EIP). + // core registers. + for (int reg = 0; reg < 8; reg++) { + if (reg <= 3) { + opcodes.Undefined(dwarf::Reg::X86Core(reg)); + } else if (reg == 4) { + // Stack pointer. + } else { + opcodes.SameValue(dwarf::Reg::X86Core(reg)); + } + } + // fp registers. + for (int reg = 0; reg < 8; reg++) { + opcodes.Undefined(dwarf::Reg::X86Fp(reg)); + } + auto return_address_reg = dwarf::Reg::X86Core(8); // R8(EIP). + cfi->WriteCIE(return_address_reg, opcodes); + return; + } + case kX86_64: { + dwarf::DebugFrameOpCodeWriter<> opcodes; + opcodes.DefCFA(dwarf::Reg::X86_64Core(4), 8); // R4(RSP). + opcodes.Offset(dwarf::Reg::X86_64Core(16), -8); // R16(RIP). + // core registers. + for (int reg = 0; reg < 16; reg++) { + if (reg == 4) { + // Stack pointer. + } else if (reg < 12 && reg != 3 && reg != 5) { // except EBX and EBP. + opcodes.Undefined(dwarf::Reg::X86_64Core(reg)); + } else { + opcodes.SameValue(dwarf::Reg::X86_64Core(reg)); + } + } + // fp registers. + for (int reg = 0; reg < 16; reg++) { + if (reg < 12) { + opcodes.Undefined(dwarf::Reg::X86_64Fp(reg)); + } else { + opcodes.SameValue(dwarf::Reg::X86_64Fp(reg)); + } + } + auto return_address_reg = dwarf::Reg::X86_64Core(16); // R16(RIP). + cfi->WriteCIE(return_address_reg, opcodes); + return; + } + case kNone: + break; } + LOG(FATAL) << "Can not write CIE frame for ISA " << isa; + UNREACHABLE(); } class OatWriterWrapper FINAL : public CodeOutput { @@ -621,8 +633,10 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver, ElfBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn, Elf_Sym, Elf_Ehdr, Elf_Phdr, Elf_Shdr>* builder, OatWriter* oat_writer) { - std::unique_ptr<std::vector<uint8_t>> cfi_info( - ConstructCIEFrame(compiler_driver->GetInstructionSet())); + std::vector<uint8_t> cfi_data; + bool is_64bit = Is64BitInstructionSet(compiler_driver->GetInstructionSet()); + dwarf::DebugFrameWriter<> cfi(&cfi_data, is_64bit); + WriteCIE(&cfi, compiler_driver->GetInstructionSet()); Elf_Addr text_section_address = builder->GetTextBuilder().GetSection()->sh_addr; @@ -644,62 +658,17 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver, 0, STB_LOCAL, STT_NOTYPE); } - // Include CFI for compiled method, if possible. - if (cfi_info.get() != nullptr) { - DCHECK(it->compiled_method_ != nullptr); - - // Copy in the FDE, if present - const SwapVector<uint8_t>* fde = it->compiled_method_->GetCFIInfo(); - if (fde != nullptr) { - // Copy the information into cfi_info and then fix the address in the new copy. - int cur_offset = cfi_info->size(); - cfi_info->insert(cfi_info->end(), fde->begin(), fde->end()); - - bool is_64bit = *(reinterpret_cast<const uint32_t*>(fde->data())) == 0xffffffff; - - // Set the 'CIE_pointer' field. - uint64_t CIE_pointer = cur_offset + (is_64bit ? 12 : 4); - uint64_t offset_to_update = CIE_pointer; - if (is_64bit) { - (*cfi_info)[offset_to_update+0] = CIE_pointer; - (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8; - (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16; - (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24; - (*cfi_info)[offset_to_update+4] = CIE_pointer >> 32; - (*cfi_info)[offset_to_update+5] = CIE_pointer >> 40; - (*cfi_info)[offset_to_update+6] = CIE_pointer >> 48; - (*cfi_info)[offset_to_update+7] = CIE_pointer >> 56; - } else { - (*cfi_info)[offset_to_update+0] = CIE_pointer; - (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8; - (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16; - (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24; - } - - // Set the 'initial_location' field. - offset_to_update += is_64bit ? 8 : 4; - if (is_64bit) { - const uint64_t quick_code_start = it->low_pc_ + text_section_address; - (*cfi_info)[offset_to_update+0] = quick_code_start; - (*cfi_info)[offset_to_update+1] = quick_code_start >> 8; - (*cfi_info)[offset_to_update+2] = quick_code_start >> 16; - (*cfi_info)[offset_to_update+3] = quick_code_start >> 24; - (*cfi_info)[offset_to_update+4] = quick_code_start >> 32; - (*cfi_info)[offset_to_update+5] = quick_code_start >> 40; - (*cfi_info)[offset_to_update+6] = quick_code_start >> 48; - (*cfi_info)[offset_to_update+7] = quick_code_start >> 56; - } else { - const uint32_t quick_code_start = it->low_pc_ + text_section_address; - (*cfi_info)[offset_to_update+0] = quick_code_start; - (*cfi_info)[offset_to_update+1] = quick_code_start >> 8; - (*cfi_info)[offset_to_update+2] = quick_code_start >> 16; - (*cfi_info)[offset_to_update+3] = quick_code_start >> 24; - } - } + // Include FDE for compiled method, if possible. + DCHECK(it->compiled_method_ != nullptr); + const SwapVector<uint8_t>* unwind_opcodes = it->compiled_method_->GetCFIInfo(); + if (unwind_opcodes != nullptr) { + // TUNING: The headers take a lot of space. Can we have 1 FDE per file? + // TUNING: Some tools support compressed DWARF sections (.zdebug_*). + cfi.WriteFDE(text_section_address + it->low_pc_, it->high_pc_ - it->low_pc_, + unwind_opcodes->data(), unwind_opcodes->size()); } } - bool hasCFI = (cfi_info.get() != nullptr); bool hasLineInfo = false; for (auto& dbg_info : oat_writer->GetCFIMethodInfo()) { if (dbg_info.dbgstream_ != nullptr && @@ -709,7 +678,8 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver, } } - if (hasLineInfo || hasCFI) { + if (!method_info.empty() && + compiler_driver->GetCompilerOptions().GetGenerateGDBInformation()) { ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0); @@ -731,14 +701,12 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver, builder->RegisterRawSection(debug_info); builder->RegisterRawSection(debug_abbrev); - if (hasCFI) { - ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> eh_frame(".eh_frame", - SHT_PROGBITS, - SHF_ALLOC, - nullptr, 0, 4, 0); - eh_frame.SetBuffer(std::move(*cfi_info.get())); - builder->RegisterRawSection(eh_frame); - } + ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> eh_frame(".eh_frame", + SHT_PROGBITS, + SHF_ALLOC, + nullptr, 0, 4, 0); + eh_frame.SetBuffer(std::move(cfi_data)); + builder->RegisterRawSection(eh_frame); if (hasLineInfo) { builder->RegisterRawSection(debug_line); diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index 2d9e03a718..45e2fd0ffe 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -93,7 +93,6 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, // Assembler that holds generated instructions std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set)); - jni_asm->InitializeFrameDescriptionEntry(); // Offsets into data structures // TODO: if cross compiling these offsets are for the host not the target @@ -432,19 +431,14 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, std::vector<uint8_t> managed_code(cs); MemoryRegion code(&managed_code[0], managed_code.size()); __ FinalizeInstructions(code); - jni_asm->FinalizeFrameDescriptionEntry(); - std::vector<uint8_t>* fde(jni_asm->GetFrameDescriptionEntry()); - ArrayRef<const uint8_t> cfi_ref; - if (fde != nullptr) { - cfi_ref = ArrayRef<const uint8_t>(*fde); - } + return CompiledMethod::SwapAllocCompiledMethodCFI(driver, instruction_set, ArrayRef<const uint8_t>(managed_code), frame_size, main_jni_conv->CoreSpillMask(), main_jni_conv->FpSpillMask(), - cfi_ref); + ArrayRef<const uint8_t>()); } // Copy a single parameter from the managed to the JNI calling convention diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc index 4267743097..b17cbca2d2 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.cc +++ b/compiler/linker/arm/relative_patcher_thumb2.cc @@ -48,22 +48,30 @@ void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t liter uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11; value |= 0xf000d000; // BL - uint8_t* addr = &(*code)[literal_offset]; // Check that we're just overwriting an existing BL. - DCHECK_EQ(addr[1] & 0xf8, 0xf0); - DCHECK_EQ(addr[3] & 0xd0, 0xd0); + DCHECK_EQ(GetInsn32(code, literal_offset) & 0xf800d000, 0xf000d000); // Write the new BL. - addr[0] = (value >> 16) & 0xff; - addr[1] = (value >> 24) & 0xff; - addr[2] = (value >> 0) & 0xff; - addr[3] = (value >> 8) & 0xff; + SetInsn32(code, literal_offset, value); } -void Thumb2RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED, - uint32_t target_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unexpected relative dex cache array patch."; +void Thumb2RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) { + uint32_t literal_offset = patch.LiteralOffset(); + uint32_t pc_literal_offset = patch.PcInsnOffset(); + uint32_t pc_base = patch_offset + (pc_literal_offset - literal_offset) + 4u /* PC adjustment */; + uint32_t diff = target_offset - pc_base; + + uint32_t insn = GetInsn32(code, literal_offset); + DCHECK_EQ(insn & 0xff7ff0ffu, 0xf2400000u); // MOVW/MOVT, unpatched (imm16 == 0). + uint32_t diff16 = ((insn & 0x00800000u) != 0u) ? (diff >> 16) : (diff & 0xffffu); + uint32_t imm4 = (diff16 >> 12) & 0xfu; + uint32_t imm = (diff16 >> 11) & 0x1u; + uint32_t imm3 = (diff16 >> 8) & 0x7u; + uint32_t imm8 = diff16 & 0xffu; + insn = (insn & 0xfbf08f00u) | (imm << 26) | (imm4 << 16) | (imm3 << 12) | imm8; + SetInsn32(code, literal_offset, insn); } std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() { @@ -80,5 +88,31 @@ std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() { return thunk_code; } +void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) { + DCHECK_LE(offset + 4u, code->size()); + DCHECK_EQ(offset & 1u, 0u); + uint8_t* addr = &(*code)[offset]; + addr[0] = (value >> 16) & 0xff; + addr[1] = (value >> 24) & 0xff; + addr[2] = (value >> 0) & 0xff; + addr[3] = (value >> 8) & 0xff; +} + +uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) { + DCHECK_LE(offset + 4u, code.size()); + DCHECK_EQ(offset & 1u, 0u); + const uint8_t* addr = &code[offset]; + return + (static_cast<uint32_t>(addr[0]) << 16) + + (static_cast<uint32_t>(addr[1]) << 24) + + (static_cast<uint32_t>(addr[2]) << 0)+ + (static_cast<uint32_t>(addr[3]) << 8); +} + +template <typename Alloc> +uint32_t Thumb2RelativePatcher::GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset) { + return GetInsn32(ArrayRef<const uint8_t>(*code), offset); +} + } // namespace linker } // namespace art diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h index 561130305e..2d474c2db0 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.h +++ b/compiler/linker/arm/relative_patcher_thumb2.h @@ -34,6 +34,12 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { private: static std::vector<uint8_t> CompileThunkCode(); + void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value); + static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset); + + template <typename Alloc> + static uint32_t GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset); + // PC displacement from patch location; Thumb2 PC is always at instruction address + 4. static constexpr int32_t kPcDisplacement = 4; diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc index 3b397cc5fb..a057a4cf16 100644 --- a/compiler/linker/arm/relative_patcher_thumb2_test.cc +++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc @@ -121,6 +121,48 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { result.push_back(static_cast<uint8_t>(bl >> 8)); return result; } + + void TestDexCachereference(uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + dex_cache_arrays_begin_ = dex_cache_arrays_begin; + static const uint8_t raw_code[] = { + 0x40, 0xf2, 0x00, 0x00, // MOVW r0, #0 (placeholder) + 0xc0, 0xf2, 0x00, 0x00, // MOVT r0, #0 (placeholder) + 0x78, 0x44, // ADD r0, pc + }; + constexpr uint32_t pc_insn_offset = 8u; + const ArrayRef<const uint8_t> code(raw_code); + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(0u, nullptr, pc_insn_offset, element_offset), + LinkerPatch::DexCacheArrayPatch(4u, nullptr, pc_insn_offset, element_offset), + }; + AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t pc_base_offset = method1_offset + pc_insn_offset + 4u /* PC adjustment */; + uint32_t diff = dex_cache_arrays_begin_ + element_offset - pc_base_offset; + // Distribute the bits of the diff between the MOVW and MOVT: + uint32_t diffw = diff & 0xffffu; + uint32_t difft = diff >> 16; + uint32_t movw = 0xf2400000u | // MOVW r0, #0 (placeholder), + ((diffw & 0xf000u) << (16 - 12)) | // move imm4 from bits 12-15 to bits 16-19, + ((diffw & 0x0800u) << (26 - 11)) | // move imm from bit 11 to bit 26, + ((diffw & 0x0700u) << (12 - 8)) | // move imm3 from bits 8-10 to bits 12-14, + ((diffw & 0x00ffu)); // keep imm8 at bits 0-7. + uint32_t movt = 0xf2c00000u | // MOVT r0, #0 (placeholder), + ((difft & 0xf000u) << (16 - 12)) | // move imm4 from bits 12-15 to bits 16-19, + ((difft & 0x0800u) << (26 - 11)) | // move imm from bit 11 to bit 26, + ((difft & 0x0700u) << (12 - 8)) | // move imm3 from bits 8-10 to bits 12-14, + ((difft & 0x00ffu)); // keep imm8 at bits 0-7. + const uint8_t expected_code[] = { + static_cast<uint8_t>(movw >> 16), static_cast<uint8_t>(movw >> 24), + static_cast<uint8_t>(movw >> 0), static_cast<uint8_t>(movw >> 8), + static_cast<uint8_t>(movt >> 16), static_cast<uint8_t>(movt >> 24), + static_cast<uint8_t>(movt >> 0), static_cast<uint8_t>(movt >> 8), + 0x78, 0x44, + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); + } }; const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = { @@ -285,5 +327,25 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) { EXPECT_TRUE(CheckThunk(thunk_offset)); } +TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm8) { + TestDexCachereference(0x00ff0000u, 0x00fcu); + ASSERT_LT(GetMethodOffset(1u), 0xfcu); +} + +TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm3) { + TestDexCachereference(0x02ff0000u, 0x05fcu); + ASSERT_LT(GetMethodOffset(1u), 0xfcu); +} + +TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm) { + TestDexCachereference(0x08ff0000u, 0x08fcu); + ASSERT_LT(GetMethodOffset(1u), 0xfcu); +} + +TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceimm4) { + TestDexCachereference(0xd0ff0000u, 0x60fcu); + ASSERT_LT(GetMethodOffset(1u), 0xfcu); +} + } // namespace linker } // namespace art diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 439e85ca6c..5cb02172d2 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1659,11 +1659,26 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct Register lhs = InputRegisterAt(condition, 0); Operand rhs = InputOperandAt(condition, 1); Condition arm64_cond = ARM64Condition(condition->GetCondition()); - if ((arm64_cond == eq || arm64_cond == ne) && rhs.IsImmediate() && (rhs.immediate() == 0)) { - if (arm64_cond == eq) { - __ Cbz(lhs, true_target); - } else { - __ Cbnz(lhs, true_target); + if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) { + switch (arm64_cond) { + case eq: + __ Cbz(lhs, true_target); + break; + case ne: + __ Cbnz(lhs, true_target); + break; + case lt: + // Test the sign bit and branch accordingly. + __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); + break; + case ge: + // Test the sign bit and branch accordingly. + __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target); + break; + default: + // Without the `static_cast` the compiler throws an error for + // `-Werror=sign-promo`. + LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond); } } else { __ Cmp(lhs, rhs); diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc new file mode 100644 index 0000000000..26cab2ff09 --- /dev/null +++ b/compiler/optimizing/code_generator_utils.cc @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_utils.h" + +#include "base/logging.h" + +void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, + int64_t* magic, int* shift) { + // It does not make sense to calculate magic and shift for zero divisor. + DCHECK_NE(divisor, 0); + + /* According to implementation from H.S.Warren's "Hacker's Delight" (Addison Wesley, 2002) + * Chapter 10 and T,Grablund, P.L.Montogomery's "Division by Invariant Integers Using + * Multiplication" (PLDI 1994). + * The magic number M and shift S can be calculated in the following way: + * Let nc be the most positive value of numerator(n) such that nc = kd - 1, + * where divisor(d) >= 2. + * Let nc be the most negative value of numerator(n) such that nc = kd + 1, + * where divisor(d) <= -2. + * Thus nc can be calculated like: + * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long + * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long + * + * So the shift p is the smallest p satisfying + * 2^p > nc * (d - 2^p % d), where d >= 2 + * 2^p > nc * (d + 2^p % d), where d <= -2. + * + * The magic number M is calcuated by + * M = (2^p + d - 2^p % d) / d, where d >= 2 + * M = (2^p - d - 2^p % d) / d, where d <= -2. + * + * Notice that p is always bigger than or equal to 32 (resp. 64), so we just return 32-p + * (resp. 64 - p) as the shift number S. + */ + + int64_t p = is_long ? 63 : 31; + const uint64_t exp = is_long ? (UINT64_C(1) << 63) : (UINT32_C(1) << 31); + + // Initialize the computations. + uint64_t abs_d = (divisor >= 0) ? divisor : -divisor; + uint64_t tmp = exp + (is_long ? static_cast<uint64_t>(divisor) >> 63 : + static_cast<uint32_t>(divisor) >> 31); + uint64_t abs_nc = tmp - 1 - tmp % abs_d; + uint64_t quotient1 = exp / abs_nc; + uint64_t remainder1 = exp % abs_nc; + uint64_t quotient2 = exp / abs_d; + uint64_t remainder2 = exp % abs_d; + + /* + * To avoid handling both positive and negative divisor, "Hacker's Delight" + * introduces a method to handle these 2 cases together to avoid duplication. + */ + uint64_t delta; + do { + p++; + quotient1 = 2 * quotient1; + remainder1 = 2 * remainder1; + if (remainder1 >= abs_nc) { + quotient1++; + remainder1 = remainder1 - abs_nc; + } + quotient2 = 2 * quotient2; + remainder2 = 2 * remainder2; + if (remainder2 >= abs_d) { + quotient2++; + remainder2 = remainder2 - abs_d; + } + delta = abs_d - remainder2; + } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0)); + + *magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1); + + if (!is_long) { + *magic = static_cast<int>(*magic); + } + + *shift = is_long ? p - 64 : p - 32; +} + diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h new file mode 100644 index 0000000000..742d67565a --- /dev/null +++ b/compiler/optimizing/code_generator_utils.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_ +#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_ + +#include <cstdint> + +// Computes the magic number and the shift needed in the div/rem by constant algorithm +void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift); + +#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_ diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 92b62e2c84..c4fbc1d51c 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -16,6 +16,7 @@ #include "code_generator_x86.h" +#include "code_generator_utils.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" @@ -2278,6 +2279,134 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { __ addl(ESP, Immediate(2 * elem_size)); } + +void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(1).IsConstant()); + + Register out_register = locations->Out().AsRegister<Register>(); + Register input_register = locations->InAt(0).AsRegister<Register>(); + int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + + DCHECK(imm == 1 || imm == -1); + + if (instruction->IsRem()) { + __ xorl(out_register, out_register); + } else { + __ movl(out_register, input_register); + if (imm == -1) { + __ negl(out_register); + } + } +} + + +void InstructionCodeGeneratorX86::DivByPowerOfTwo(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv()); + + LocationSummary* locations = instruction->GetLocations(); + + Register out_register = locations->Out().AsRegister<Register>(); + Register input_register = locations->InAt(0).AsRegister<Register>(); + int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + + DCHECK(instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))); + Register num = locations->GetTemp(0).AsRegister<Register>(); + + __ leal(num, Address(input_register, std::abs(imm) - 1)); + __ testl(input_register, input_register); + __ cmovl(kGreaterEqual, num, input_register); + int shift = CTZ(imm); + __ sarl(num, Immediate(shift)); + + if (imm < 0) { + __ negl(num); + } + + __ movl(out_register, num); +} + +void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + + Register eax = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + Register num; + Register edx; + + if (instruction->IsDiv()) { + edx = locations->GetTemp(0).AsRegister<Register>(); + num = locations->GetTemp(1).AsRegister<Register>(); + } else { + edx = locations->Out().AsRegister<Register>(); + num = locations->GetTemp(0).AsRegister<Register>(); + } + + DCHECK_EQ(EAX, eax); + DCHECK_EQ(EDX, edx); + if (instruction->IsDiv()) { + DCHECK_EQ(EAX, out); + } else { + DCHECK_EQ(EDX, out); + } + + int64_t magic; + int shift; + CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + + Label ndiv; + Label end; + // If numerator is 0, the result is 0, no computation needed. + __ testl(eax, eax); + __ j(kNotEqual, &ndiv); + + __ xorl(out, out); + __ jmp(&end); + + __ Bind(&ndiv); + + // Save the numerator. + __ movl(num, eax); + + // EAX = magic + __ movl(eax, Immediate(magic)); + + // EDX:EAX = magic * numerator + __ imull(num); + + if (imm > 0 && magic < 0) { + // EDX += num + __ addl(edx, num); + } else if (imm < 0 && magic > 0) { + __ subl(edx, num); + } + + // Shift if needed. + if (shift != 0) { + __ sarl(edx, Immediate(shift)); + } + + // EDX += 1 if EDX < 0 + __ movl(eax, edx); + __ shrl(edx, Immediate(31)); + __ addl(edx, eax); + + if (instruction->IsRem()) { + __ movl(eax, num); + __ imull(edx, Immediate(imm)); + __ subl(eax, edx); + __ movl(edx, eax); + } else { + __ movl(eax, edx); + } + __ Bind(&end); +} + void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); @@ -2289,28 +2418,42 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr switch (instruction->GetResultType()) { case Primitive::kPrimInt: { - Register second_reg = second.AsRegister<Register>(); DCHECK_EQ(EAX, first.AsRegister<Register>()); DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>()); - SlowPathCodeX86* slow_path = + if (second.IsConstant()) { + int imm = second.GetConstant()->AsIntConstant()->GetValue(); + + if (imm == 0) { + // Do not generate anything for 0. DivZeroCheck would forbid any generated code. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (is_div && IsPowerOfTwo(std::abs(imm))) { + DivByPowerOfTwo(instruction); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } + } else { + SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.AsRegister<Register>(), - is_div); - codegen_->AddSlowPath(slow_path); + is_div); + codegen_->AddSlowPath(slow_path); - // 0x80000000/-1 triggers an arithmetic exception! - // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so - // it's safe to just use negl instead of more complex comparisons. - - __ cmpl(second_reg, Immediate(-1)); - __ j(kEqual, slow_path->GetEntryLabel()); + Register second_reg = second.AsRegister<Register>(); + // 0x80000000/-1 triggers an arithmetic exception! + // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so + // it's safe to just use negl instead of more complex comparisons. - // edx:eax <- sign-extended of eax - __ cdq(); - // eax = quotient, edx = remainder - __ idivl(second_reg); + __ cmpl(second_reg, Immediate(-1)); + __ j(kEqual, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + // edx:eax <- sign-extended of eax + __ cdq(); + // eax = quotient, edx = remainder + __ idivl(second_reg); + __ Bind(slow_path->GetExitLabel()); + } break; } @@ -2350,10 +2493,16 @@ void LocationsBuilderX86::VisitDiv(HDiv* div) { switch (div->GetResultType()) { case Primitive::kPrimInt: { locations->SetInAt(0, Location::RegisterLocation(EAX)); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); // Intel uses edx:eax as the dividend. locations->AddTemp(Location::RegisterLocation(EDX)); + // We need to save the numerator while we tweak eax and edx. As we are using imul in a way + // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as + // output and request another temp. + if (div->InputAt(1)->IsConstant()) { + locations->AddTemp(Location::RequiresRegister()); + } break; } case Primitive::kPrimLong: { @@ -2411,6 +2560,7 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { void LocationsBuilderX86::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); + LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong) ? LocationSummary::kCall : LocationSummary::kNoCall; @@ -2419,8 +2569,14 @@ void LocationsBuilderX86::VisitRem(HRem* rem) { switch (type) { case Primitive::kPrimInt: { locations->SetInAt(0, Location::RegisterLocation(EAX)); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); locations->SetOut(Location::RegisterLocation(EDX)); + // We need to save the numerator while we tweak eax and edx. As we are using imul in a way + // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as + // output and request another temp. + if (rem->InputAt(1)->IsConstant()) { + locations->AddTemp(Location::RequiresRegister()); + } break; } case Primitive::kPrimLong: { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 0cc3c6533a..20f14fb3f4 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -163,6 +163,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg); void HandleBitwiseOperation(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivByPowerOfTwo(HBinaryOperation* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateRemFP(HRem *rem); void HandleShift(HBinaryOperation* instruction); void GenerateShlLong(const Location& loc, Register shifter); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index cdbc7780a8..9e08558c8e 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -16,6 +16,7 @@ #include "code_generator_x86_64.h" +#include "code_generator_utils.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "intrinsics.h" @@ -2259,6 +2260,228 @@ void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) { __ addq(CpuRegister(RSP), Immediate(2 * elem_size)); } +void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + CpuRegister output_register = locations->Out().AsRegister<CpuRegister>(); + CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>(); + int64_t imm; + if (second.GetConstant()->IsLongConstant()) { + imm = second.GetConstant()->AsLongConstant()->GetValue(); + } else { + imm = second.GetConstant()->AsIntConstant()->GetValue(); + } + + DCHECK(imm == 1 || imm == -1); + + switch (instruction->GetResultType()) { + case Primitive::kPrimInt: { + if (instruction->IsRem()) { + __ xorl(output_register, output_register); + } else { + __ movl(output_register, input_register); + if (imm == -1) { + __ negl(output_register); + } + } + break; + } + + case Primitive::kPrimLong: { + if (instruction->IsRem()) { + __ xorq(output_register, output_register); + } else { + __ movq(output_register, input_register); + if (imm == -1) { + __ negq(output_register); + } + } + break; + } + + default: + LOG(FATAL) << "Unreachable"; + } +} + +void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv()); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + + CpuRegister output_register = locations->Out().AsRegister<CpuRegister>(); + CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>(); + + int64_t imm; + if (instruction->GetResultType() == Primitive::kPrimLong) { + imm = second.GetConstant()->AsLongConstant()->GetValue(); + } else { + imm = second.GetConstant()->AsIntConstant()->GetValue(); + } + + DCHECK(IsPowerOfTwo(std::abs(imm))); + + CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); + + if (instruction->GetResultType() == Primitive::kPrimInt) { + __ leal(tmp, Address(numerator, std::abs(imm) - 1)); + __ testl(numerator, numerator); + __ cmov(kGreaterEqual, tmp, numerator); + int shift = CTZ(imm); + __ sarl(tmp, Immediate(shift)); + + if (imm < 0) { + __ negl(tmp); + } + + __ movl(output_register, tmp); + } else { + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); + + __ movq(rdx, Immediate(std::abs(imm) - 1)); + __ addq(rdx, numerator); + __ testq(numerator, numerator); + __ cmov(kGreaterEqual, rdx, numerator); + int shift = CTZ(imm); + __ sarq(rdx, Immediate(shift)); + + if (imm < 0) { + __ negq(rdx); + } + + __ movq(output_register, rdx); + } +} + +void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + + CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>() + : locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>() + : locations->Out().AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + DCHECK_EQ(RAX, eax.AsRegister()); + DCHECK_EQ(RDX, edx.AsRegister()); + if (instruction->IsDiv()) { + DCHECK_EQ(RAX, out.AsRegister()); + } else { + DCHECK_EQ(RDX, out.AsRegister()); + } + + int64_t magic; + int shift; + + // TODO: can these branch be written as one? + if (instruction->GetResultType() == Primitive::kPrimInt) { + int imm = second.GetConstant()->AsIntConstant()->GetValue(); + + CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + + __ movl(numerator, eax); + + Label no_div; + Label end; + __ testl(eax, eax); + __ j(kNotEqual, &no_div); + + __ xorl(out, out); + __ jmp(&end); + + __ Bind(&no_div); + + __ movl(eax, Immediate(magic)); + __ imull(numerator); + + if (imm > 0 && magic < 0) { + __ addl(edx, numerator); + } else if (imm < 0 && magic > 0) { + __ subl(edx, numerator); + } + + if (shift != 0) { + __ sarl(edx, Immediate(shift)); + } + + __ movl(eax, edx); + __ shrl(edx, Immediate(31)); + __ addl(edx, eax); + + if (instruction->IsRem()) { + __ movl(eax, numerator); + __ imull(edx, Immediate(imm)); + __ subl(eax, edx); + __ movl(edx, eax); + } else { + __ movl(eax, edx); + } + __ Bind(&end); + } else { + int64_t imm = second.GetConstant()->AsLongConstant()->GetValue(); + + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + + CpuRegister rax = eax; + CpuRegister rdx = edx; + + CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift); + + // Save the numerator. + __ movq(numerator, rax); + + // RAX = magic + __ movq(rax, Immediate(magic)); + + // RDX:RAX = magic * numerator + __ imulq(numerator); + + if (imm > 0 && magic < 0) { + // RDX += numeratorerator + __ addq(rdx, numerator); + } else if (imm < 0 && magic > 0) { + // RDX -= numerator + __ subq(rdx, numerator); + } + + // Shift if needed. + if (shift != 0) { + __ sarq(rdx, Immediate(shift)); + } + + // RDX += 1 if RDX < 0 + __ movq(rax, rdx); + __ shrq(rdx, Immediate(63)); + __ addq(rdx, rax); + + if (instruction->IsRem()) { + __ movq(rax, numerator); + + if (IsInt<32>(imm)) { + __ imulq(rdx, Immediate(static_cast<int32_t>(imm))); + } else { + __ movq(numerator, Immediate(imm)); + __ imulq(rdx, numerator); + } + + __ subq(rax, rdx); + __ movq(rdx, rax); + } else { + __ movq(rax, rdx); + } + } +} + void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); Primitive::Type type = instruction->GetResultType(); @@ -2267,37 +2490,57 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in bool is_div = instruction->IsDiv(); LocationSummary* locations = instruction->GetLocations(); - CpuRegister out_reg = locations->Out().AsRegister<CpuRegister>(); - CpuRegister second_reg = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location second = locations->InAt(1); DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister()); - DCHECK_EQ(is_div ? RAX : RDX, out_reg.AsRegister()); + DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister()); - SlowPathCodeX86_64* slow_path = - new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64( - out_reg.AsRegister(), type, is_div); - codegen_->AddSlowPath(slow_path); + if (second.IsConstant()) { + int64_t imm; + if (second.GetConstant()->AsLongConstant()) { + imm = second.GetConstant()->AsLongConstant()->GetValue(); + } else { + imm = second.GetConstant()->AsIntConstant()->GetValue(); + } - // 0x80000000(00000000)/-1 triggers an arithmetic exception! - // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000) - // so it's safe to just use negl instead of more complex comparisons. - if (type == Primitive::kPrimInt) { - __ cmpl(second_reg, Immediate(-1)); - __ j(kEqual, slow_path->GetEntryLabel()); - // edx:eax <- sign-extended of eax - __ cdq(); - // eax = quotient, edx = remainder - __ idivl(second_reg); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) { + DivByPowerOfTwo(instruction); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } } else { - __ cmpq(second_reg, Immediate(-1)); - __ j(kEqual, slow_path->GetEntryLabel()); - // rdx:rax <- sign-extended of rax - __ cqo(); - // rax = quotient, rdx = remainder - __ idivq(second_reg); - } + SlowPathCodeX86_64* slow_path = + new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64( + out.AsRegister(), type, is_div); + codegen_->AddSlowPath(slow_path); - __ Bind(slow_path->GetExitLabel()); + CpuRegister second_reg = second.AsRegister<CpuRegister>(); + // 0x80000000(00000000)/-1 triggers an arithmetic exception! + // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000) + // so it's safe to just use negl instead of more complex comparisons. + if (type == Primitive::kPrimInt) { + __ cmpl(second_reg, Immediate(-1)); + __ j(kEqual, slow_path->GetEntryLabel()); + // edx:eax <- sign-extended of eax + __ cdq(); + // eax = quotient, edx = remainder + __ idivl(second_reg); + } else { + __ cmpq(second_reg, Immediate(-1)); + __ j(kEqual, slow_path->GetEntryLabel()); + // rdx:rax <- sign-extended of rax + __ cqo(); + // rax = quotient, rdx = remainder + __ idivq(second_reg); + } + __ Bind(slow_path->GetExitLabel()); + } } void LocationsBuilderX86_64::VisitDiv(HDiv* div) { @@ -2307,10 +2550,16 @@ void LocationsBuilderX86_64::VisitDiv(HDiv* div) { case Primitive::kPrimInt: case Primitive::kPrimLong: { locations->SetInAt(0, Location::RegisterLocation(RAX)); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); // Intel uses edx:eax as the dividend. locations->AddTemp(Location::RegisterLocation(RDX)); + // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way + // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as + // output and request another temp. + if (div->InputAt(1)->IsConstant()) { + locations->AddTemp(Location::RequiresRegister()); + } break; } @@ -2365,9 +2614,15 @@ void LocationsBuilderX86_64::VisitRem(HRem* rem) { case Primitive::kPrimInt: case Primitive::kPrimLong: { locations->SetInAt(0, Location::RegisterLocation(RAX)); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); // Intel uses rdx:rax as the dividend and puts the remainder in rdx locations->SetOut(Location::RegisterLocation(RDX)); + // We need to save the numerator while we tweak eax and edx. As we are using imul in a way + // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as + // output and request another temp. + if (rem->InputAt(1)->IsConstant()) { + locations->AddTemp(Location::RequiresRegister()); + } break; } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 375c0b03b9..be2a79e55e 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -173,6 +173,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg); void HandleBitwiseOperation(HBinaryOperation* operation); void GenerateRemFP(HRem *rem); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivByPowerOfTwo(HBinaryOperation* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleShift(HBinaryOperation* operation); void GenerateMemoryBarrier(MemBarrierKind kind); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index b6e451057a..aec2d19b1d 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -320,6 +320,27 @@ void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); } +void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) { + CreateLongToLongLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Location input = locations->InAt(0); + Register input_lo = input.AsRegisterPairLow<Register>(); + Register input_hi = input.AsRegisterPairHigh<Register>(); + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + + X86Assembler* assembler = GetAssembler(); + // Assign the inputs to the outputs, mixing low/high. + __ movl(output_lo, input_hi); + __ movl(output_hi, input_lo); + __ bswapl(output_lo); + __ bswapl(output_hi); +} + void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) { CreateIntToIntLocations(arena_, invoke); } @@ -1330,6 +1351,181 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_); } +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + // Offset is a long, but in 32 bit mode, we only need the low word. + // Can we update the invoke here to remove a TypeConvert to Long? + locations->SetInAt(2, Location::RequiresRegister()); + // Expected value must be in EAX or EDX:EAX. + // For long, new value must be in ECX:EBX. + if (type == Primitive::kPrimLong) { + locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX)); + locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX)); + } else { + locations->SetInAt(3, Location::RegisterLocation(EAX)); + locations->SetInAt(4, Location::RequiresRegister()); + } + + // Force a byte register for the output. + locations->SetOut(Location::RegisterLocation(EAX)); + if (type == Primitive::kPrimNot) { + // Need temp registers for card-marking. + locations->AddTemp(Location::RequiresRegister()); + // Need a byte register for marking. + locations->AddTemp(Location::RegisterLocation(ECX)); + } +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke); +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke); +} + +void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); +} + +static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) { + X86Assembler* assembler = + reinterpret_cast<X86Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + Register base = locations->InAt(1).AsRegister<Register>(); + Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); + Location out = locations->Out(); + DCHECK_EQ(out.AsRegister<Register>(), EAX); + + if (type == Primitive::kPrimLong) { + DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX); + DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX); + DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX); + DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX); + __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0)); + } else { + // Integer or object. + DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX); + Register value = locations->InAt(4).AsRegister<Register>(); + if (type == Primitive::kPrimNot) { + // Mark card for object assuming new value is stored. + codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(), + locations->GetTemp(1).AsRegister<Register>(), + base, + value); + } + + __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); + } + + // locked cmpxchg has full barrier semantics, and we don't need scheduling + // barriers at this time. + + // Convert ZF into the boolean result. + __ setb(kZero, out.AsRegister<Register>()); + __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) { + GenCAS(Primitive::kPrimInt, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { + GenCAS(Primitive::kPrimLong, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { + GenCAS(Primitive::kPrimNot, invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask, + X86Assembler* assembler) { + Immediate imm_shift(shift); + Immediate imm_mask(mask); + __ movl(temp, reg); + __ shrl(reg, imm_shift); + __ andl(temp, imm_mask); + __ andl(reg, imm_mask); + __ shll(temp, imm_shift); + __ orl(reg, temp); +} + +void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) { + X86Assembler* assembler = + reinterpret_cast<X86Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + Register reg = locations->InAt(0).AsRegister<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + + /* + * Use one bswap instruction to reverse byte order first and then use 3 rounds of + * swapping bits to reverse bits in a number x. Using bswap to save instructions + * compared to generic luni implementation which has 5 rounds of swapping bits. + * x = bswap x + * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; + * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; + * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; + */ + __ bswapl(reg); + SwapBits(reg, temp, 1, 0x55555555, assembler); + SwapBits(reg, temp, 2, 0x33333333, assembler); + SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); +} + +void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) { + X86Assembler* assembler = + reinterpret_cast<X86Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>(); + Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + + // We want to swap high/low, then bswap each one, and then do the same + // as a 32 bit reverse. + // Exchange high and low. + __ movl(temp, reg_low); + __ movl(reg_low, reg_high); + __ movl(reg_high, temp); + + // bit-reverse low + __ bswapl(reg_low); + SwapBits(reg_low, temp, 1, 0x55555555, assembler); + SwapBits(reg_low, temp, 2, 0x33333333, assembler); + SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler); + + // bit-reverse high + __ bswapl(reg_high); + SwapBits(reg_high, temp, 1, 0x55555555, assembler); + SwapBits(reg_high, temp, 2, 0x33333333, assembler); + SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -1338,16 +1534,10 @@ void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } -UNIMPLEMENTED_INTRINSIC(IntegerReverse) -UNIMPLEMENTED_INTRINSIC(LongReverse) -UNIMPLEMENTED_INTRINSIC(LongReverseBytes) UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) -UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) -UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) } // namespace x86 diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index f6fa013cc6..5122a00d92 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -1202,6 +1202,175 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_); } +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + // expected value must be in EAX/RAX. + locations->SetInAt(3, Location::RegisterLocation(RAX)); + locations->SetInAt(4, Location::RequiresRegister()); + + locations->SetOut(Location::RequiresRegister()); + if (type == Primitive::kPrimNot) { + // Need temp registers for card-marking. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke); +} + +void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); +} + +static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) { + X86_64Assembler* assembler = + reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); + CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>(); + DCHECK_EQ(expected.AsRegister(), RAX); + CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + if (type == Primitive::kPrimLong) { + __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value); + } else { + // Integer or object. + if (type == Primitive::kPrimNot) { + // Mark card for object assuming new value is stored. + codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), + locations->GetTemp(1).AsRegister<CpuRegister>(), + base, + value); + } + + __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); + } + + // locked cmpxchg has full barrier semantics, and we don't need scheduling + // barriers at this time. + + // Convert ZF into the boolean result. + __ setcc(kZero, out); + __ movzxb(out, out); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) { + GenCAS(Primitive::kPrimInt, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) { + GenCAS(Primitive::kPrimLong, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { + GenCAS(Primitive::kPrimNot, invoke, codegen_); +} + +void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask, + X86_64Assembler* assembler) { + Immediate imm_shift(shift); + Immediate imm_mask(mask); + __ movl(temp, reg); + __ shrl(reg, imm_shift); + __ andl(temp, imm_mask); + __ andl(reg, imm_mask); + __ shll(temp, imm_shift); + __ orl(reg, temp); +} + +void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) { + X86_64Assembler* assembler = + reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); + + /* + * Use one bswap instruction to reverse byte order first and then use 3 rounds of + * swapping bits to reverse bits in a number x. Using bswap to save instructions + * compared to generic luni implementation which has 5 rounds of swapping bits. + * x = bswap x + * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555; + * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333; + * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F; + */ + __ bswapl(reg); + SwapBits(reg, temp, 1, 0x55555555, assembler); + SwapBits(reg, temp, 2, 0x33333333, assembler); + SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler); +} + +void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask, + int32_t shift, int64_t mask, X86_64Assembler* assembler) { + Immediate imm_shift(shift); + __ movq(temp_mask, Immediate(mask)); + __ movq(temp, reg); + __ shrq(reg, imm_shift); + __ andq(temp, temp_mask); + __ andq(reg, temp_mask); + __ shlq(temp, imm_shift); + __ orq(reg, temp); +} + +void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) { + X86_64Assembler* assembler = + reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); + + /* + * Use one bswap instruction to reverse byte order first and then use 3 rounds of + * swapping bits to reverse bits in a long number x. Using bswap to save instructions + * compared to generic luni implementation which has 5 rounds of swapping bits. + * x = bswap x + * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555; + * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333; + * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F; + */ + __ bswapq(reg); + SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler); + SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler); + SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -1210,14 +1379,9 @@ void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UN void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } -UNIMPLEMENTED_INTRINSIC(IntegerReverse) -UNIMPLEMENTED_INTRINSIC(LongReverse) UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) -UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) -UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) } // namespace x86_64 diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 90a530aa5e..12798edac5 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -204,8 +204,13 @@ class OptimizingCompiler FINAL : public Compiler { const std::vector<const art::DexFile*>& dex_files, const std::string& android_root, bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, - *GetCompilerDriver()); + if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) { + return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, + *GetCompilerDriver()); + } else { + return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, + *GetCompilerDriver()); + } } void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE; diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 6286b106aa..3b42f63509 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -373,24 +373,34 @@ void Thumb2Assembler::ldrsh(Register rd, const Address& ad, Condition cond) { void Thumb2Assembler::ldrd(Register rd, const Address& ad, Condition cond) { + ldrd(rd, Register(rd + 1), ad, cond); +} + + +void Thumb2Assembler::ldrd(Register rd, Register rd2, const Address& ad, Condition cond) { CheckCondition(cond); - CHECK_EQ(rd % 2, 0); + // Encoding T1. // This is different from other loads. The encoding is like ARM. int32_t encoding = B31 | B30 | B29 | B27 | B22 | B20 | static_cast<int32_t>(rd) << 12 | - (static_cast<int32_t>(rd) + 1) << 8 | + static_cast<int32_t>(rd2) << 8 | ad.encodingThumbLdrdStrd(); Emit32(encoding); } void Thumb2Assembler::strd(Register rd, const Address& ad, Condition cond) { + strd(rd, Register(rd + 1), ad, cond); +} + + +void Thumb2Assembler::strd(Register rd, Register rd2, const Address& ad, Condition cond) { CheckCondition(cond); - CHECK_EQ(rd % 2, 0); + // Encoding T1. // This is different from other loads. The encoding is like ARM. int32_t encoding = B31 | B30 | B29 | B27 | B22 | static_cast<int32_t>(rd) << 12 | - (static_cast<int32_t>(rd) + 1) << 8 | + static_cast<int32_t>(rd2) << 8 | ad.encodingThumbLdrdStrd(); Emit32(encoding); } @@ -2613,14 +2623,16 @@ void Thumb2Assembler::StoreToOffset(StoreOperandType type, Register tmp_reg = kNoRegister; if (!Address::CanHoldStoreOffsetThumb(type, offset)) { CHECK_NE(base, IP); - if (reg != IP) { + if (reg != IP && + (type != kStoreWordPair || reg + 1 != IP)) { tmp_reg = IP; } else { - // Be careful not to use IP twice (for `reg` and to build the - // Address object used by the store instruction(s) below). - // Instead, save R5 on the stack (or R6 if R5 is not available), - // use it as secondary temporary register, and restore it after - // the store instruction has been emitted. + // Be careful not to use IP twice (for `reg` (or `reg` + 1 in + // the case of a word-pair store)) and to build the Address + // object used by the store instruction(s) below). Instead, + // save R5 on the stack (or R6 if R5 is not available), use it + // as secondary temporary register, and restore it after the + // store instruction has been emitted. tmp_reg = base != R5 ? R5 : R6; Push(tmp_reg); if (base == SP) { diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 81dd13894f..e33c240dbf 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -135,9 +135,17 @@ class Thumb2Assembler FINAL : public ArmAssembler { void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; + // Load/store register dual instructions using registers `rd` and `rd` + 1. void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE; + // Load/store register dual instructions using registers `rd` and `rd2`. + // Note that contrary to the ARM A1 encoding, the Thumb-2 T1 encoding + // does not require `rd` to be even, nor `rd2' to be equal to `rd` + 1. + void ldrd(Register rd, Register rd2, const Address& ad, Condition cond); + void strd(Register rd, Register rd2, const Address& ad, Condition cond); + + void ldm(BlockAddressMode am, Register base, RegList regs, Condition cond = AL) OVERRIDE; void stm(BlockAddressMode am, Register base, diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 813996b0db..5f5561a499 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -247,4 +247,103 @@ TEST_F(AssemblerThumb2Test, add) { DriverStr(expected, "add"); } +TEST_F(AssemblerThumb2Test, StoreWordToThumbOffset) { + arm::StoreOperandType type = arm::kStoreWord; + int32_t offset = 4092; + ASSERT_TRUE(arm::Address::CanHoldStoreOffsetThumb(type, offset)); + + __ StoreToOffset(type, arm::R0, arm::SP, offset); + __ StoreToOffset(type, arm::IP, arm::SP, offset); + __ StoreToOffset(type, arm::IP, arm::R5, offset); + + const char* expected = + "str r0, [sp, #4092]\n" + "str ip, [sp, #4092]\n" + "str ip, [r5, #4092]\n"; + DriverStr(expected, "StoreWordToThumbOffset"); +} + +TEST_F(AssemblerThumb2Test, StoreWordToNonThumbOffset) { + arm::StoreOperandType type = arm::kStoreWord; + int32_t offset = 4096; + ASSERT_FALSE(arm::Address::CanHoldStoreOffsetThumb(type, offset)); + + __ StoreToOffset(type, arm::R0, arm::SP, offset); + __ StoreToOffset(type, arm::IP, arm::SP, offset); + __ StoreToOffset(type, arm::IP, arm::R5, offset); + + const char* expected = + "mov ip, #4096\n" // LoadImmediate(ip, 4096) + "add ip, ip, sp\n" + "str r0, [ip, #0]\n" + + "str r5, [sp, #-4]!\n" // Push(r5) + "movw r5, #4100\n" // LoadImmediate(r5, 4096 + kRegisterSize) + "add r5, r5, sp\n" + "str ip, [r5, #0]\n" + "ldr r5, [sp], #4\n" // Pop(r5) + + "str r6, [sp, #-4]!\n" // Push(r6) + "mov r6, #4096\n" // LoadImmediate(r6, 4096) + "add r6, r6, r5\n" + "str ip, [r6, #0]\n" + "ldr r6, [sp], #4\n"; // Pop(r6) + DriverStr(expected, "StoreWordToNonThumbOffset"); +} + +TEST_F(AssemblerThumb2Test, StoreWordPairToThumbOffset) { + arm::StoreOperandType type = arm::kStoreWordPair; + int32_t offset = 1020; + ASSERT_TRUE(arm::Address::CanHoldStoreOffsetThumb(type, offset)); + + __ StoreToOffset(type, arm::R0, arm::SP, offset); + // We cannot use IP (i.e. R12) as first source register, as it would + // force us to use SP (i.e. R13) as second source register, which + // would have an "unpredictable" effect according to the ARMv7 + // specification (the T1 encoding describes the result as + // UNPREDICTABLE when of the source registers is R13). + // + // So we use (R11, IP) (e.g. (R11, R12)) as source registers in the + // following instructions. + __ StoreToOffset(type, arm::R11, arm::SP, offset); + __ StoreToOffset(type, arm::R11, arm::R5, offset); + + const char* expected = + "strd r0, r1, [sp, #1020]\n" + "strd r11, ip, [sp, #1020]\n" + "strd r11, ip, [r5, #1020]\n"; + DriverStr(expected, "StoreWordPairToThumbOffset"); +} + +TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) { + arm::StoreOperandType type = arm::kStoreWordPair; + int32_t offset = 1024; + ASSERT_FALSE(arm::Address::CanHoldStoreOffsetThumb(type, offset)); + + __ StoreToOffset(type, arm::R0, arm::SP, offset); + // Same comment as in AssemblerThumb2Test.StoreWordPairToThumbOffset + // regarding the use of (R11, IP) (e.g. (R11, R12)) as source + // registers in the following instructions. + __ StoreToOffset(type, arm::R11, arm::SP, offset); + __ StoreToOffset(type, arm::R11, arm::R5, offset); + + const char* expected = + "mov ip, #1024\n" // LoadImmediate(ip, 1024) + "add ip, ip, sp\n" + "strd r0, r1, [ip, #0]\n" + + "str r5, [sp, #-4]!\n" // Push(r5) + "movw r5, #1028\n" // LoadImmediate(r5, 1024 + kRegisterSize) + "add r5, r5, sp\n" + "strd r11, ip, [r5, #0]\n" + "ldr r5, [sp], #4\n" // Pop(r5) + + "str r6, [sp, #-4]!\n" // Push(r6) + "mov r6, #1024\n" // LoadImmediate(r6, 1024) + "add r6, r6, r5\n" + "strd r11, ip, [r6, #0]\n" + "ldr r6, [sp], #4\n"; // Pop(r6) + DriverStr(expected, "StoreWordPairToNonThumbOffset"); +} + } // namespace art diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 923ecdbd9d..323f93cb42 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -504,12 +504,6 @@ class Assembler { // and branch to a ExceptionSlowPath if it is. virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0; - virtual void InitializeFrameDescriptionEntry() {} - virtual void FinalizeFrameDescriptionEntry() {} - // Give a vector containing FDE data, or null if not used. Note: the assembler must take care - // of handling the lifecycle. - virtual std::vector<uint8_t>* GetFrameDescriptionEntry() { return nullptr; } - virtual ~Assembler() {} protected: diff --git a/compiler/utils/dwarf_cfi.cc b/compiler/utils/dwarf_cfi.cc deleted file mode 100644 index a7e09c6517..0000000000 --- a/compiler/utils/dwarf_cfi.cc +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "leb128.h" -#include "utils.h" - -#include "dwarf_cfi.h" - -namespace art { - -void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment) { - if (increment < 64) { - // Encoding in opcode. - buf->push_back(0x1 << 6 | increment); - } else if (increment < 256) { - // Single byte delta. - buf->push_back(0x02); - buf->push_back(increment); - } else if (increment < 256 * 256) { - // Two byte delta. - buf->push_back(0x03); - buf->push_back(increment & 0xff); - buf->push_back((increment >> 8) & 0xff); - } else { - // Four byte delta. - buf->push_back(0x04); - Push32(buf, increment); - } -} - -void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset) { - buf->push_back(0x11); - EncodeUnsignedLeb128(reg, buf); - EncodeSignedLeb128(offset, buf); -} - -void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset) { - buf->push_back((0x2 << 6) | reg); - EncodeUnsignedLeb128(offset, buf); -} - -void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset) { - buf->push_back(0x0e); - EncodeUnsignedLeb128(offset, buf); -} - -void DW_CFA_remember_state(std::vector<uint8_t>* buf) { - buf->push_back(0x0a); -} - -void DW_CFA_restore_state(std::vector<uint8_t>* buf) { - buf->push_back(0x0b); -} - -void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit) { - // 'length' (filled in by other functions). - if (is_64bit) { - Push32(buf, 0xffffffff); // Indicates 64bit - Push32(buf, 0); - Push32(buf, 0); - } else { - Push32(buf, 0); - } - - // 'CIE_pointer' (filled in by linker). - if (is_64bit) { - Push32(buf, 0); - Push32(buf, 0); - } else { - Push32(buf, 0); - } - - // 'initial_location' (filled in by linker). - if (is_64bit) { - Push32(buf, 0); - Push32(buf, 0); - } else { - Push32(buf, 0); - } - - // 'address_range' (filled in by other functions). - if (is_64bit) { - Push32(buf, 0); - Push32(buf, 0); - } else { - Push32(buf, 0); - } - - // Augmentation length: 0 - buf->push_back(0); -} - -void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint64_t data, bool is_64bit) { - const size_t kOffsetOfAddressRange = is_64bit? 28 : 12; - CHECK(buf->size() >= kOffsetOfAddressRange + (is_64bit? 8 : 4)); - - uint8_t *p = buf->data() + kOffsetOfAddressRange; - if (is_64bit) { - p[0] = data; - p[1] = data >> 8; - p[2] = data >> 16; - p[3] = data >> 24; - p[4] = data >> 32; - p[5] = data >> 40; - p[6] = data >> 48; - p[7] = data >> 56; - } else { - p[0] = data; - p[1] = data >> 8; - p[2] = data >> 16; - p[3] = data >> 24; - } -} - -void WriteCFILength(std::vector<uint8_t>* buf, bool is_64bit) { - uint64_t length = is_64bit ? buf->size() - 12 : buf->size() - 4; - DCHECK_EQ((length & 0x3), 0U); - - uint8_t *p = is_64bit? buf->data() + 4 : buf->data(); - if (is_64bit) { - p[0] = length; - p[1] = length >> 8; - p[2] = length >> 16; - p[3] = length >> 24; - p[4] = length >> 32; - p[5] = length >> 40; - p[6] = length >> 48; - p[7] = length >> 56; - } else { - p[0] = length; - p[1] = length >> 8; - p[2] = length >> 16; - p[3] = length >> 24; - } -} - -void PadCFI(std::vector<uint8_t>* buf) { - while (buf->size() & 0x3) { - buf->push_back(0); - } -} - -} // namespace art diff --git a/compiler/utils/dwarf_cfi.h b/compiler/utils/dwarf_cfi.h deleted file mode 100644 index 0c8b1516dd..0000000000 --- a/compiler/utils/dwarf_cfi.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_DWARF_CFI_H_ -#define ART_COMPILER_UTILS_DWARF_CFI_H_ - -#include <vector> - -namespace art { - -/** - * @brief Enter a 'DW_CFA_advance_loc' into an FDE buffer - * @param buf FDE buffer. - * @param increment Amount by which to increase the current location. - */ -void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment); - -/** - * @brief Enter a 'DW_CFA_offset_extended_sf' into an FDE buffer - * @param buf FDE buffer. - * @param reg Register number. - * @param offset Offset of register address from CFA. - */ -void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset); - -/** - * @brief Enter a 'DW_CFA_offset' into an FDE buffer - * @param buf FDE buffer. - * @param reg Register number. - * @param offset Offset of register address from CFA. - */ -void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset); - -/** - * @brief Enter a 'DW_CFA_def_cfa_offset' into an FDE buffer - * @param buf FDE buffer. - * @param offset New offset of CFA. - */ -void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset); - -/** - * @brief Enter a 'DW_CFA_remember_state' into an FDE buffer - * @param buf FDE buffer. - */ -void DW_CFA_remember_state(std::vector<uint8_t>* buf); - -/** - * @brief Enter a 'DW_CFA_restore_state' into an FDE buffer - * @param buf FDE buffer. - */ -void DW_CFA_restore_state(std::vector<uint8_t>* buf); - -/** - * @brief Write FDE header into an FDE buffer - * @param buf FDE buffer. - * @param is_64bit If FDE is for 64bit application. - */ -void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit); - -/** - * @brief Set 'address_range' field of an FDE buffer - * @param buf FDE buffer. - * @param data Data value. - * @param is_64bit If FDE is for 64bit application. - */ -void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint64_t data, bool is_64bit); - -/** - * @brief Set 'length' field of an FDE buffer - * @param buf FDE buffer. - * @param is_64bit If FDE is for 64bit application. - */ -void WriteCFILength(std::vector<uint8_t>* buf, bool is_64bit); - -/** - * @brief Pad an FDE buffer with 0 until its size is a multiple of 4 - * @param buf FDE buffer. - */ -void PadCFI(std::vector<uint8_t>* buf); -} // namespace art - -#endif // ART_COMPILER_UTILS_DWARF_CFI_H_ diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index b3a1376727..4cca529258 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -20,7 +20,6 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "memory_region.h" #include "thread.h" -#include "utils/dwarf_cfi.h" namespace art { namespace x86 { @@ -1467,6 +1466,15 @@ void X86Assembler::cmpxchgl(const Address& address, Register reg) { EmitOperand(reg, address); } + +void X86Assembler::cmpxchg8b(const Address& address) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0xC7); + EmitOperand(1, address); +} + + void X86Assembler::mfence() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); @@ -1631,69 +1639,32 @@ void X86Assembler::EmitGenericShift(int reg_or_opcode, EmitOperand(reg_or_opcode, Operand(operand)); } -void X86Assembler::InitializeFrameDescriptionEntry() { - WriteFDEHeader(&cfi_info_, false /* is_64bit */); -} - -void X86Assembler::FinalizeFrameDescriptionEntry() { - WriteFDEAddressRange(&cfi_info_, buffer_.Size(), false /* is_64bit */); - PadCFI(&cfi_info_); - WriteCFILength(&cfi_info_, false /* is_64bit */); -} - constexpr size_t kFramePointerSize = 4; void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& spill_regs, const ManagedRegisterEntrySpills& entry_spills) { - cfi_cfa_offset_ = kFramePointerSize; // Only return address on stack - cfi_pc_ = buffer_.Size(); // Nothing emitted yet - DCHECK_EQ(cfi_pc_, 0U); - - uint32_t reg_offset = 1; + DCHECK_EQ(buffer_.Size(), 0U); // Nothing emitted yet. CHECK_ALIGNED(frame_size, kStackAlignment); int gpr_count = 0; for (int i = spill_regs.size() - 1; i >= 0; --i) { - x86::X86ManagedRegister spill = spill_regs.at(i).AsX86(); - DCHECK(spill.IsCpuRegister()); - pushl(spill.AsCpuRegister()); + Register spill = spill_regs.at(i).AsX86().AsCpuRegister(); + pushl(spill); gpr_count++; - - // DW_CFA_advance_loc - DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_); - cfi_pc_ = buffer_.Size(); - // DW_CFA_def_cfa_offset - cfi_cfa_offset_ += kFramePointerSize; - DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_); - // DW_CFA_offset reg offset - reg_offset++; - DW_CFA_offset(&cfi_info_, spill_regs.at(i).AsX86().DWARFRegId(), reg_offset); } - // return address then method on stack + // return address then method on stack. int32_t adjust = frame_size - (gpr_count * kFramePointerSize) - sizeof(StackReference<mirror::ArtMethod>) /*method*/ - kFramePointerSize /*return address*/; addl(ESP, Immediate(-adjust)); - // DW_CFA_advance_loc - DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_); - cfi_pc_ = buffer_.Size(); - // DW_CFA_def_cfa_offset - cfi_cfa_offset_ += adjust; - DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_); - pushl(method_reg.AsX86().AsCpuRegister()); - // DW_CFA_advance_loc - DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_); - cfi_pc_ = buffer_.Size(); - // DW_CFA_def_cfa_offset - cfi_cfa_offset_ += kFramePointerSize; - DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_); for (size_t i = 0; i < entry_spills.size(); ++i) { ManagedRegisterSpill spill = entry_spills.at(i); if (spill.AsX86().IsCpuRegister()) { - movl(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsCpuRegister()); + int offset = frame_size + spill.getSpillOffset(); + movl(Address(ESP, offset), spill.AsX86().AsCpuRegister()); } else { DCHECK(spill.AsX86().IsXmmRegister()); if (spill.getSize() == 8) { @@ -1709,8 +1680,9 @@ void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void X86Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& spill_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); - addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) - - sizeof(StackReference<mirror::ArtMethod>))); + int adjust = frame_size - (spill_regs.size() * kFramePointerSize) - + sizeof(StackReference<mirror::ArtMethod>); + addl(ESP, Immediate(adjust)); for (size_t i = 0; i < spill_regs.size(); ++i) { x86::X86ManagedRegister spill = spill_regs.at(i).AsX86(); DCHECK(spill.IsCpuRegister()); @@ -1722,12 +1694,6 @@ void X86Assembler::RemoveFrame(size_t frame_size, void X86Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); addl(ESP, Immediate(-adjust)); - // DW_CFA_advance_loc - DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_); - cfi_pc_ = buffer_.Size(); - // DW_CFA_def_cfa_offset - cfi_cfa_offset_ += adjust; - DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_); } void X86Assembler::DecreaseFrameSize(size_t adjust) { diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index bdf88435a4..f3675aeceb 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -205,7 +205,7 @@ class Address : public Operand { class X86Assembler FINAL : public Assembler { public: - explicit X86Assembler() : cfi_cfa_offset_(0), cfi_pc_(0) {} + explicit X86Assembler() {} virtual ~X86Assembler() {} /* @@ -457,6 +457,7 @@ class X86Assembler FINAL : public Assembler { X86Assembler* lock(); void cmpxchgl(const Address& address, Register reg); + void cmpxchg8b(const Address& address); void mfence(); @@ -476,6 +477,10 @@ class X86Assembler FINAL : public Assembler { lock()->cmpxchgl(address, reg); } + void LockCmpxchg8b(const Address& address) { + lock()->cmpxchg8b(address); + } + // // Misc. functionality // @@ -599,12 +604,6 @@ class X86Assembler FINAL : public Assembler { // and branch to a ExceptionSlowPath if it is. void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; - void InitializeFrameDescriptionEntry() OVERRIDE; - void FinalizeFrameDescriptionEntry() OVERRIDE; - std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE { - return &cfi_info_; - } - private: inline void EmitUint8(uint8_t value); inline void EmitInt32(int32_t value); @@ -623,9 +622,6 @@ class X86Assembler FINAL : public Assembler { void EmitGenericShift(int rm, Register reg, const Immediate& imm); void EmitGenericShift(int rm, Register operand, Register shifter); - std::vector<uint8_t> cfi_info_; - uint32_t cfi_cfa_offset_, cfi_pc_; - DISALLOW_COPY_AND_ASSIGN(X86Assembler); }; diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index fccb510afb..dba3b6ba67 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -127,4 +127,49 @@ TEST_F(AssemblerX86Test, LoadLongConstant) { DriverStr(expected, "LoadLongConstant"); } +TEST_F(AssemblerX86Test, LockCmpxchgl) { + GetAssembler()->LockCmpxchgl(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12), + x86::Register(x86::ESI)); + GetAssembler()->LockCmpxchgl(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12), + x86::Register(x86::ESI)); + GetAssembler()->LockCmpxchgl(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12), + x86::Register(x86::EDI)); + GetAssembler()->LockCmpxchgl(x86::Address( + x86::Register(x86::EBP), 0), x86::Register(x86::ESI)); + GetAssembler()->LockCmpxchgl(x86::Address( + x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0), + x86::Register(x86::ESI)); + const char* expected = + "lock cmpxchgl %ESI, 0xc(%EDI,%EBX,4)\n" + "lock cmpxchgl %ESI, 0xc(%EDI,%ESI,4)\n" + "lock cmpxchgl %EDI, 0xc(%EDI,%ESI,4)\n" + "lock cmpxchgl %ESI, (%EBP)\n" + "lock cmpxchgl %ESI, (%EBP,%ESI,1)\n"; + + DriverStr(expected, "lock_cmpxchgl"); +} + +TEST_F(AssemblerX86Test, LockCmpxchg8b) { + GetAssembler()->LockCmpxchg8b(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12)); + GetAssembler()->LockCmpxchg8b(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12)); + GetAssembler()->LockCmpxchg8b(x86::Address( + x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12)); + GetAssembler()->LockCmpxchg8b(x86::Address(x86::Register(x86::EBP), 0)); + GetAssembler()->LockCmpxchg8b(x86::Address( + x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0)); + const char* expected = + "lock cmpxchg8b 0xc(%EDI,%EBX,4)\n" + "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n" + "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n" + "lock cmpxchg8b (%EBP)\n" + "lock cmpxchg8b (%EBP,%ESI,1)\n"; + + DriverStr(expected, "lock_cmpxchg8b"); +} + } // namespace art diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h index 5d46ee25cd..09d2b4919d 100644 --- a/compiler/utils/x86/managed_register_x86.h +++ b/compiler/utils/x86/managed_register_x86.h @@ -88,14 +88,6 @@ const int kNumberOfAllocIds = kNumberOfCpuAllocIds + kNumberOfXmmAllocIds + // There is a one-to-one mapping between ManagedRegister and register id. class X86ManagedRegister : public ManagedRegister { public: - int DWARFRegId() const { - CHECK(IsCpuRegister()); - // For all the X86 registers we care about: - // EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, - // DWARF register id is the same as id_. - return static_cast<int>(id_); - } - ByteRegister AsByteRegister() const { CHECK(IsCpuRegister()); CHECK_LT(AsCpuRegister(), ESP); // ESP, EBP, ESI and EDI cannot be encoded as byte registers. diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 9ad31c725c..3ba926236f 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -20,7 +20,6 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "memory_region.h" #include "thread.h" -#include "utils/dwarf_cfi.h" namespace art { namespace x86_64 { @@ -1625,6 +1624,14 @@ void X86_64Assembler::imull(CpuRegister reg) { } +void X86_64Assembler::imulq(CpuRegister reg) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitRex64(reg); + EmitUint8(0xF7); + EmitOperand(5, Operand(reg)); +} + + void X86_64Assembler::imull(const Address& address) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(address); @@ -1858,11 +1865,22 @@ X86_64Assembler* X86_64Assembler::lock() { void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(reg, address); EmitUint8(0x0F); EmitUint8(0xB1); EmitOperand(reg.LowBits(), address); } + +void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitRex64(reg, address); + EmitUint8(0x0F); + EmitUint8(0xB1); + EmitOperand(reg.LowBits(), address); +} + + void X86_64Assembler::mfence() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); @@ -2179,26 +2197,12 @@ void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const } } -void X86_64Assembler::InitializeFrameDescriptionEntry() { - WriteFDEHeader(&cfi_info_, true /* is_64bit */); -} - -void X86_64Assembler::FinalizeFrameDescriptionEntry() { - WriteFDEAddressRange(&cfi_info_, buffer_.Size(), true /* is_64bit */); - PadCFI(&cfi_info_); - WriteCFILength(&cfi_info_, true /* is_64bit */); -} - constexpr size_t kFramePointerSize = 8; void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, const std::vector<ManagedRegister>& spill_regs, const ManagedRegisterEntrySpills& entry_spills) { - cfi_cfa_offset_ = kFramePointerSize; // Only return address on stack - cfi_pc_ = buffer_.Size(); // Nothing emitted yet - DCHECK_EQ(cfi_pc_, 0U); - - uint32_t reg_offset = 1; + DCHECK_EQ(buffer_.Size(), 0U); // Nothing emitted yet. CHECK_ALIGNED(frame_size, kStackAlignment); int gpr_count = 0; for (int i = spill_regs.size() - 1; i >= 0; --i) { @@ -2206,29 +2210,13 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, if (spill.IsCpuRegister()) { pushq(spill.AsCpuRegister()); gpr_count++; - - // DW_CFA_advance_loc - DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_); - cfi_pc_ = buffer_.Size(); - // DW_CFA_def_cfa_offset - cfi_cfa_offset_ += kFramePointerSize; - DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_); - // DW_CFA_offset reg offset - reg_offset++; - DW_CFA_offset(&cfi_info_, spill.DWARFRegId(), reg_offset); } } - // return address then method on stack + // return address then method on stack. int64_t rest_of_frame = static_cast<int64_t>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize /*return address*/; subq(CpuRegister(RSP), Immediate(rest_of_frame)); - // DW_CFA_advance_loc - DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_); - cfi_pc_ = buffer_.Size(); - // DW_CFA_def_cfa_offset - cfi_cfa_offset_ += rest_of_frame; - DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_); // spill xmms int64_t offset = rest_of_frame; @@ -2293,12 +2281,6 @@ void X86_64Assembler::RemoveFrame(size_t frame_size, void X86_64Assembler::IncreaseFrameSize(size_t adjust) { CHECK_ALIGNED(adjust, kStackAlignment); addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust))); - // DW_CFA_advance_loc - DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_); - cfi_pc_ = buffer_.Size(); - // DW_CFA_def_cfa_offset - cfi_cfa_offset_ += adjust; - DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_); } void X86_64Assembler::DecreaseFrameSize(size_t adjust) { diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 39f781cb1c..d357a813e8 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -244,7 +244,7 @@ class Address : public Operand { class X86_64Assembler FINAL : public Assembler { public: - X86_64Assembler() : cfi_cfa_offset_(0), cfi_pc_(0) {} + X86_64Assembler() {} virtual ~X86_64Assembler() {} /* @@ -468,6 +468,7 @@ class X86_64Assembler FINAL : public Assembler { void imull(CpuRegister reg, const Immediate& imm); void imull(CpuRegister reg, const Address& address); + void imulq(CpuRegister src); void imulq(CpuRegister dst, CpuRegister src); void imulq(CpuRegister reg, const Immediate& imm); void imulq(CpuRegister reg, const Address& address); @@ -517,6 +518,7 @@ class X86_64Assembler FINAL : public Assembler { X86_64Assembler* lock(); void cmpxchgl(const Address& address, CpuRegister reg); + void cmpxchgq(const Address& address, CpuRegister reg); void mfence(); @@ -539,6 +541,10 @@ class X86_64Assembler FINAL : public Assembler { lock()->cmpxchgl(address, reg); } + void LockCmpxchgq(const Address& address, CpuRegister reg) { + lock()->cmpxchgq(address, reg); + } + // // Misc. functionality // @@ -663,12 +669,6 @@ class X86_64Assembler FINAL : public Assembler { // and branch to a ExceptionSlowPath if it is. void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; - void InitializeFrameDescriptionEntry() OVERRIDE; - void FinalizeFrameDescriptionEntry() OVERRIDE; - std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE { - return &cfi_info_; - } - private: void EmitUint8(uint8_t value); void EmitInt32(int32_t value); @@ -714,9 +714,6 @@ class X86_64Assembler FINAL : public Assembler { void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src); void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand); - std::vector<uint8_t> cfi_info_; - uint32_t cfi_cfa_offset_, cfi_pc_; - DISALLOW_COPY_AND_ASSIGN(X86_64Assembler); }; diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index b90c142132..116190a832 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -315,6 +315,10 @@ TEST_F(AssemblerX86_64Test, AddlImm) { DriverStr(Repeatri(&x86_64::X86_64Assembler::addl, 4U, "add ${imm}, %{reg}"), "addli"); } +TEST_F(AssemblerX86_64Test, ImulqReg1) { + DriverStr(RepeatR(&x86_64::X86_64Assembler::imulq, "imulq %{reg}"), "imulq"); +} + TEST_F(AssemblerX86_64Test, ImulqRegs) { DriverStr(RepeatRR(&x86_64::X86_64Assembler::imulq, "imulq %{reg2}, %{reg1}"), "imulq"); } @@ -585,6 +589,56 @@ TEST_F(AssemblerX86_64Test, Xchgl) { // DriverStr(Repeatrr(&x86_64::X86_64Assembler::xchgl, "xchgl %{reg2}, %{reg1}"), "xchgl"); } +TEST_F(AssemblerX86_64Test, LockCmpxchgl) { + GetAssembler()->LockCmpxchgl(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgl(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgl(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::R8)); + GetAssembler()->LockCmpxchgl(x86_64::Address( + x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgl(x86_64::Address( + x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0), + x86_64::CpuRegister(x86_64::RSI)); + const char* expected = + "lock cmpxchgl %ESI, 0xc(%RDI,%RBX,4)\n" + "lock cmpxchgl %ESI, 0xc(%RDI,%R9,4)\n" + "lock cmpxchgl %R8d, 0xc(%RDI,%R9,4)\n" + "lock cmpxchgl %ESI, (%R13)\n" + "lock cmpxchgl %ESI, (%R13,%R9,1)\n"; + + DriverStr(expected, "lock_cmpxchgl"); +} + +TEST_F(AssemblerX86_64Test, LockCmpxchgq) { + GetAssembler()->LockCmpxchgq(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgq(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgq(x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), + x86_64::CpuRegister(x86_64::R8)); + GetAssembler()->LockCmpxchgq(x86_64::Address( + x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI)); + GetAssembler()->LockCmpxchgq(x86_64::Address( + x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0), + x86_64::CpuRegister(x86_64::RSI)); + const char* expected = + "lock cmpxchg %RSI, 0xc(%RDI,%RBX,4)\n" + "lock cmpxchg %RSI, 0xc(%RDI,%R9,4)\n" + "lock cmpxchg %R8, 0xc(%RDI,%R9,4)\n" + "lock cmpxchg %RSI, (%R13)\n" + "lock cmpxchg %RSI, (%R13,%R9,1)\n"; + + DriverStr(expected, "lock_cmpxchg"); +} + TEST_F(AssemblerX86_64Test, Movl) { GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address( x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h index 3a96ad0b51..822659fffc 100644 --- a/compiler/utils/x86_64/managed_register_x86_64.h +++ b/compiler/utils/x86_64/managed_register_x86_64.h @@ -87,21 +87,6 @@ const int kNumberOfAllocIds = kNumberOfCpuAllocIds + kNumberOfXmmAllocIds + // There is a one-to-one mapping between ManagedRegister and register id. class X86_64ManagedRegister : public ManagedRegister { public: - int DWARFRegId() const { - CHECK(IsCpuRegister()); - switch (id_) { - case RAX: return 0; - case RDX: return 1; - case RCX: return 2; - case RBX: return 3; - case RSI: return 4; - case RDI: return 5; - case RBP: return 6; - case RSP: return 7; - default: return static_cast<int>(id_); // R8 ~ R15 - } - } - CpuRegister AsCpuRegister() const { CHECK(IsCpuRegister()); return CpuRegister(static_cast<Register>(id_)); diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc index a1834e1e9a..ba0c0bdebd 100644 --- a/disassembler/disassembler_x86.cc +++ b/disassembler/disassembler_x86.cc @@ -942,7 +942,7 @@ DISASSEMBLER_ENTRY(cmp, opcode1 = "pextrw"; prefix[2] = 0; has_modrm = true; - store = true; + load = true; src_reg_file = SSE; immediate_bytes = 1; } else { diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h index 6e7b04fc93..af008347cd 100644 --- a/runtime/base/mutex.h +++ b/runtime/base/mutex.h @@ -97,6 +97,7 @@ enum LockLevel { kAllocTrackerLock, kDeoptimizationLock, kProfilerLock, + kJdwpShutdownLock, kJdwpEventListLock, kJdwpAttachLock, kJdwpStartLock, diff --git a/runtime/debugger.cc b/runtime/debugger.cc index 6759c4d9c3..a909a1afbe 100644 --- a/runtime/debugger.cc +++ b/runtime/debugger.cc @@ -307,7 +307,6 @@ static JDWP::JdwpOptions gJdwpOptions; // Runtime JDWP state. static JDWP::JdwpState* gJdwpState = nullptr; static bool gDebuggerConnected; // debugger or DDMS is connected. -static bool gDisposed; // debugger called VirtualMachine.Dispose, so we should drop the connection. static bool gDdmThreadNotification = false; @@ -319,6 +318,7 @@ static Dbg::HpsgWhen gDdmNhsgWhen = Dbg::HPSG_WHEN_NEVER; static Dbg::HpsgWhat gDdmNhsgWhat; bool Dbg::gDebuggerActive = false; +bool Dbg::gDisposed = false; ObjectRegistry* Dbg::gRegistry = nullptr; // Recent allocation tracking. @@ -551,7 +551,7 @@ void Dbg::StopJdwp() { gJdwpState->PostVMDeath(); } // Prevent the JDWP thread from processing JDWP incoming packets after we close the connection. - Disposed(); + Dispose(); delete gJdwpState; gJdwpState = nullptr; delete gRegistry; @@ -599,14 +599,6 @@ void Dbg::Connected() { gDisposed = false; } -void Dbg::Disposed() { - gDisposed = true; -} - -bool Dbg::IsDisposed() { - return gDisposed; -} - bool Dbg::RequiresDeoptimization() { // We don't need deoptimization if everything runs with interpreter after // enabling -Xint mode. diff --git a/runtime/debugger.h b/runtime/debugger.h index 5898784c43..dd7f9c56fa 100644 --- a/runtime/debugger.h +++ b/runtime/debugger.h @@ -239,7 +239,9 @@ class Dbg { static void GoActive() LOCKS_EXCLUDED(Locks::breakpoint_lock_, Locks::deoptimization_lock_, Locks::mutator_lock_); static void Disconnected() LOCKS_EXCLUDED(Locks::deoptimization_lock_, Locks::mutator_lock_); - static void Disposed(); + static void Dispose() { + gDisposed = true; + } // Returns true if we're actually debugging with a real debugger, false if it's // just DDMS (or nothing at all). @@ -255,9 +257,12 @@ class Dbg { // Returns true if a method has any breakpoints. static bool MethodHasAnyBreakpoints(mirror::ArtMethod* method) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::breakpoint_lock_); + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + LOCKS_EXCLUDED(Locks::breakpoint_lock_); - static bool IsDisposed(); + static bool IsDisposed() { + return gDisposed; + } /* * Time, in milliseconds, since the last debugger activity. Does not @@ -756,6 +761,10 @@ class Dbg { // Indicates whether the debugger is making requests. static bool gDebuggerActive; + // Indicates whether we should drop the JDWP connection because the runtime stops or the + // debugger called VirtualMachine.Dispose. + static bool gDisposed; + // The registry mapping objects to JDWP ids. static ObjectRegistry* gRegistry; diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc index bc5cf9b1ff..411ec43aab 100644 --- a/runtime/elf_file.cc +++ b/runtime/elf_file.cc @@ -1630,8 +1630,10 @@ static bool IsFDE(FDE64* frame) { return frame->CIE_pointer != 0; } -static bool FixupEHFrame(off_t base_address_delta, - uint8_t* eh_frame, size_t eh_frame_size) { +template <typename Elf_SOff> +static bool FixupEHFrame(Elf_SOff base_address_delta, uint8_t* eh_frame, size_t eh_frame_size) { + // TODO: Check the spec whether this is really data-dependent, or whether it's clear from the + // ELF file whether we should expect 32-bit or 64-bit. if (*(reinterpret_cast<uint32_t*>(eh_frame)) == 0xffffffff) { FDE64* last_frame = reinterpret_cast<FDE64*>(eh_frame + eh_frame_size); FDE64* frame = NextFDE(reinterpret_cast<FDE64*>(eh_frame)); @@ -1643,6 +1645,7 @@ static bool FixupEHFrame(off_t base_address_delta, } return true; } else { + CHECK(IsInt<32>(base_address_delta)); FDE32* last_frame = reinterpret_cast<FDE32*>(eh_frame + eh_frame_size); FDE32* frame = NextFDE(reinterpret_cast<FDE32*>(eh_frame)); for (; frame < last_frame; frame = NextFDE(frame)) { @@ -1772,7 +1775,9 @@ class DebugLineInstructionIterator FINAL { uint8_t* current_instruction_; }; -static bool FixupDebugLine(off_t base_offset_delta, DebugLineInstructionIterator* iter) { +template <typename Elf_SOff> +static bool FixupDebugLine(Elf_SOff base_offset_delta, DebugLineInstructionIterator* iter) { + CHECK(IsInt<32>(base_offset_delta)); for (; iter->GetInstruction(); iter->Next()) { if (iter->IsExtendedOpcode() && iter->GetOpcode() == dwarf::DW_LNE_set_address) { *reinterpret_cast<uint32_t*>(iter->GetArguments()) += base_offset_delta; @@ -2044,7 +2049,9 @@ class DebugInfoIterator { DebugTag* current_tag_; }; -static bool FixupDebugInfo(off_t base_address_delta, DebugInfoIterator* iter) { +template <typename Elf_SOff> +static bool FixupDebugInfo(Elf_SOff base_address_delta, DebugInfoIterator* iter) { + CHECK(IsInt<32>(base_address_delta)); do { if (iter->GetCurrentTag()->GetAttrSize(dwarf::DW_AT_low_pc) != sizeof(int32_t) || iter->GetCurrentTag()->GetAttrSize(dwarf::DW_AT_high_pc) != sizeof(int32_t)) { @@ -2066,7 +2073,7 @@ template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_ typename Elf_Rela, typename Elf_Dyn, typename Elf_Off> bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off> - ::FixupDebugSections(off_t base_address_delta) { + ::FixupDebugSections(typename std::make_signed<Elf_Off>::type base_address_delta) { const Elf_Shdr* debug_info = FindSectionByName(".debug_info"); const Elf_Shdr* debug_abbrev = FindSectionByName(".debug_abbrev"); const Elf_Shdr* eh_frame = FindSectionByName(".eh_frame"); @@ -2280,7 +2287,7 @@ template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_ typename Elf_Rela, typename Elf_Dyn, typename Elf_Off> bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off> - ::Fixup(uintptr_t base_address) { + ::Fixup(Elf_Addr base_address) { if (!FixupDynamic(base_address)) { LOG(WARNING) << "Failed to fixup .dynamic in " << file_->GetPath(); return false; @@ -2305,7 +2312,8 @@ bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word, LOG(WARNING) << "Failed to fixup .rel.dyn in " << file_->GetPath(); return false; } - if (!FixupDebugSections(base_address)) { + static_assert(sizeof(Elf_Off) >= sizeof(base_address), "Potentially losing precision."); + if (!FixupDebugSections(static_cast<Elf_Off>(base_address))) { LOG(WARNING) << "Failed to fixup debug sections in " << file_->GetPath(); return false; } @@ -2317,7 +2325,7 @@ template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_ typename Elf_Rela, typename Elf_Dyn, typename Elf_Off> bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off> - ::FixupDynamic(uintptr_t base_address) { + ::FixupDynamic(Elf_Addr base_address) { for (Elf_Word i = 0; i < GetDynamicNum(); i++) { Elf_Dyn& elf_dyn = GetDynamic(i); Elf_Word d_tag = elf_dyn.d_tag; @@ -2341,7 +2349,7 @@ template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_ typename Elf_Rela, typename Elf_Dyn, typename Elf_Off> bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off> - ::FixupSectionHeaders(uintptr_t base_address) { + ::FixupSectionHeaders(Elf_Addr base_address) { for (Elf_Word i = 0; i < GetSectionHeaderNum(); i++) { Elf_Shdr* sh = GetSectionHeader(i); CHECK(sh != nullptr); @@ -2365,7 +2373,7 @@ template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_ typename Elf_Rela, typename Elf_Dyn, typename Elf_Off> bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off> - ::FixupProgramHeaders(uintptr_t base_address) { + ::FixupProgramHeaders(Elf_Addr base_address) { // TODO: ELFObjectFile doesn't have give to Elf_Phdr, so we do that ourselves for now. for (Elf_Word i = 0; i < GetProgramHeaderNum(); i++) { Elf_Phdr* ph = GetProgramHeader(i); @@ -2392,7 +2400,7 @@ template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_ typename Elf_Rela, typename Elf_Dyn, typename Elf_Off> bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off> - ::FixupSymbols(uintptr_t base_address, bool dynamic) { + ::FixupSymbols(Elf_Addr base_address, bool dynamic) { Elf_Word section_type = dynamic ? SHT_DYNSYM : SHT_SYMTAB; // TODO: Unfortunate ELFObjectFile has protected symbol access, so use ElfFile Elf_Shdr* symbol_section = FindSectionByType(section_type); @@ -2422,7 +2430,7 @@ template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_ typename Elf_Rela, typename Elf_Dyn, typename Elf_Off> bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off> - ::FixupRelocations(uintptr_t base_address) { + ::FixupRelocations(Elf_Addr base_address) { for (Elf_Word i = 0; i < GetSectionHeaderNum(); i++) { Elf_Shdr* sh = GetSectionHeader(i); CHECK(sh != nullptr); @@ -2622,7 +2630,14 @@ bool ElfFile::Strip(File* file, std::string* error_msg) { return elf_file->elf32_->Strip(error_msg); } -bool ElfFile::Fixup(uintptr_t base_address) { +bool ElfFile::Fixup(uint64_t base_address) { + if (elf64_.get() != nullptr) { + return elf64_->Fixup(static_cast<Elf64_Addr>(base_address)); + } else { + DCHECK(elf32_.get() != nullptr); + CHECK(IsUint<32>(base_address)) << std::hex << base_address; + return elf32_->Fixup(static_cast<Elf32_Addr>(base_address)); + } DELEGATE_TO_IMPL(Fixup, base_address); } diff --git a/runtime/elf_file.h b/runtime/elf_file.h index 41c54bce7b..286c2a638c 100644 --- a/runtime/elf_file.h +++ b/runtime/elf_file.h @@ -78,9 +78,9 @@ class ElfFile { // Fixup an ELF file so that that oat header will be loaded at oat_begin. // Returns true on success, false on failure. - static bool Fixup(File* file, uintptr_t oat_data_begin); + static bool Fixup(File* file, uint64_t oat_data_begin); - bool Fixup(uintptr_t base_address); + bool Fixup(uint64_t base_address); bool Is64Bit() const { return elf64_.get() != nullptr; diff --git a/runtime/elf_file_impl.h b/runtime/elf_file_impl.h index a70fa17868..16d3857086 100644 --- a/runtime/elf_file_impl.h +++ b/runtime/elf_file_impl.h @@ -19,6 +19,7 @@ #include <map> #include <memory> +#include <type_traits> #include <vector> // Explicitly include our own elf.h to avoid Linux and other dependencies. @@ -102,13 +103,13 @@ class ElfFileImpl { // executable is true at run time, false at compile time. bool Load(bool executable, std::string* error_msg); - bool Fixup(uintptr_t base_address); - bool FixupDynamic(uintptr_t base_address); - bool FixupSectionHeaders(uintptr_t base_address); - bool FixupProgramHeaders(uintptr_t base_address); - bool FixupSymbols(uintptr_t base_address, bool dynamic); - bool FixupRelocations(uintptr_t base_address); - bool FixupDebugSections(off_t base_address_delta); + bool Fixup(Elf_Addr base_address); + bool FixupDynamic(Elf_Addr base_address); + bool FixupSectionHeaders(Elf_Addr base_address); + bool FixupProgramHeaders(Elf_Addr base_address); + bool FixupSymbols(Elf_Addr base_address, bool dynamic); + bool FixupRelocations(Elf_Addr base_address); + bool FixupDebugSections(typename std::make_signed<Elf_Off>::type base_address_delta); bool Strip(std::string* error_msg); diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc index d6a6595127..cdb3e2af79 100644 --- a/runtime/hprof/hprof.cc +++ b/runtime/hprof/hprof.cc @@ -949,6 +949,10 @@ void Hprof::DumpHeapObject(mirror::Object* obj) { } void Hprof::DumpHeapClass(mirror::Class* klass) { + if (!klass->IsLoaded() && !klass->IsErroneous()) { + // Class is allocated but not yet loaded: we cannot access its fields or super class. + return; + } size_t sFieldCount = klass->NumStaticFields(); if (sFieldCount != 0) { int byteLength = sFieldCount * sizeof(JValue); // TODO bogus; fields are packed diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h index e16221c69a..31c9a0bb4e 100644 --- a/runtime/jdwp/jdwp.h +++ b/runtime/jdwp/jdwp.h @@ -403,6 +403,14 @@ struct JdwpState { // Used for VirtualMachine.Exit command handling. bool should_exit_; int exit_status_; + + // Used to synchronize runtime shutdown with JDWP command handler thread. + // When the runtime shuts down, it needs to stop JDWP command handler thread by closing the + // JDWP connection. However, if the JDWP thread is processing a command, it needs to wait + // for the command to finish so we can send its reply before closing the connection. + Mutex shutdown_lock_ ACQUIRED_AFTER(event_list_lock_); + ConditionVariable shutdown_cond_ GUARDED_BY(shutdown_lock_); + bool processing_request_ GUARDED_BY(shutdown_lock_); }; std::string DescribeField(const FieldId& field_id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc index 0d161bc100..d0ca214ee4 100644 --- a/runtime/jdwp/jdwp_handler.cc +++ b/runtime/jdwp/jdwp_handler.cc @@ -271,7 +271,7 @@ static JdwpError VM_IDSizes(JdwpState*, Request*, ExpandBuf* pReply) static JdwpError VM_Dispose(JdwpState*, Request*, ExpandBuf*) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - Dbg::Disposed(); + Dbg::Dispose(); return ERR_NONE; } diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc index e2b88a5e79..5b30f0cd8c 100644 --- a/runtime/jdwp/jdwp_main.cc +++ b/runtime/jdwp/jdwp_main.cc @@ -126,6 +126,7 @@ void JdwpNetStateBase::Close() { */ ssize_t JdwpNetStateBase::WritePacket(ExpandBuf* pReply, size_t length) { MutexLock mu(Thread::Current(), socket_lock_); + DCHECK(IsConnected()) << "Connection with debugger is closed"; DCHECK_LE(length, expandBufGetLength(pReply)); return TEMP_FAILURE_RETRY(write(clientSock, expandBufGetBuffer(pReply), length)); } @@ -140,6 +141,7 @@ ssize_t JdwpNetStateBase::WriteBufferedPacket(const std::vector<iovec>& iov) { ssize_t JdwpNetStateBase::WriteBufferedPacketLocked(const std::vector<iovec>& iov) { socket_lock_.AssertHeld(Thread::Current()); + DCHECK(IsConnected()) << "Connection with debugger is closed"; return TEMP_FAILURE_RETRY(writev(clientSock, &iov[0], iov.size())); } @@ -225,7 +227,10 @@ JdwpState::JdwpState(const JdwpOptions* options) jdwp_token_owner_thread_id_(0), ddm_is_active_(false), should_exit_(false), - exit_status_(0) { + exit_status_(0), + shutdown_lock_("JDWP shutdown lock", kJdwpShutdownLock), + shutdown_cond_("JDWP shutdown condition variable", shutdown_lock_), + processing_request_(false) { } /* @@ -338,10 +343,20 @@ void JdwpState::ResetState() { JdwpState::~JdwpState() { if (netState != nullptr) { /* - * Close down the network to inspire the thread to halt. + * Close down the network to inspire the thread to halt. If a request is being processed, + * we need to wait for it to finish first. */ - VLOG(jdwp) << "JDWP shutting down net..."; - netState->Shutdown(); + { + Thread* self = Thread::Current(); + MutexLock mu(self, shutdown_lock_); + while (processing_request_) { + VLOG(jdwp) << "JDWP command in progress: wait for it to finish ..."; + shutdown_cond_.Wait(self); + } + + VLOG(jdwp) << "JDWP shutting down net..."; + netState->Shutdown(); + } if (debug_thread_started_) { run = false; @@ -369,7 +384,13 @@ bool JdwpState::IsActive() { // Returns "false" if we encounter a connection-fatal error. bool JdwpState::HandlePacket() { - JdwpNetStateBase* netStateBase = reinterpret_cast<JdwpNetStateBase*>(netState); + Thread* const self = Thread::Current(); + { + MutexLock mu(self, shutdown_lock_); + processing_request_ = true; + } + JdwpNetStateBase* netStateBase = netState; + CHECK(netStateBase != nullptr) << "Connection has been closed"; JDWP::Request request(netStateBase->input_buffer_, netStateBase->input_count_); ExpandBuf* pReply = expandBufAlloc(); @@ -388,6 +409,11 @@ bool JdwpState::HandlePacket() { } expandBufFree(pReply); netStateBase->ConsumeBytes(request.GetLength()); + { + MutexLock mu(self, shutdown_lock_); + processing_request_ = false; + shutdown_cond_.Broadcast(self); + } return true; } diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index c182a4d9ad..87ae64d1d4 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -297,22 +297,15 @@ static jobjectArray DexFile_getClassNameList(JNIEnv* env, jclass, jobject cookie return result; } -// Java: dalvik.system.DexFile.UP_TO_DATE -static const jbyte kUpToDate = 0; -// Java: dalvik.system.DexFile.DEXOPT_NEEDED -static const jbyte kPatchoatNeeded = 1; -// Java: dalvik.system.DexFile.PATCHOAT_NEEDED -static const jbyte kDexoptNeeded = 2; - -static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename, +static jint GetDexOptNeeded(JNIEnv* env, const char* filename, const char* pkgname, const char* instruction_set, const jboolean defer) { if ((filename == nullptr) || !OS::FileExists(filename)) { - LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename << "' does not exist"; + LOG(ERROR) << "DexFile_getDexOptNeeded file '" << filename << "' does not exist"; ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException")); const char* message = (filename == nullptr) ? "<empty file name>" : filename; env->ThrowNew(fnfe.get(), message); - return kUpToDate; + return OatFileAssistant::kNoDexOptNeeded; } const InstructionSet target_instruction_set = GetInstructionSetFromString(instruction_set); @@ -330,7 +323,7 @@ static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename, // Always treat elements of the bootclasspath as up-to-date. if (oat_file_assistant.IsInBootClassPath()) { - return kUpToDate; + return OatFileAssistant::kNoDexOptNeeded; } // TODO: Checking the profile should probably be done in the GetStatus() @@ -343,7 +336,7 @@ static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename, if (!defer) { oat_file_assistant.CopyProfileFile(); } - return kDexoptNeeded; + return OatFileAssistant::kDex2OatNeeded; } else if (oat_file_assistant.ProfileExists() && !oat_file_assistant.OldProfileExists()) { if (!defer) { @@ -353,16 +346,10 @@ static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename, } } - OatFileAssistant::Status status = oat_file_assistant.GetStatus(); - switch (status) { - case OatFileAssistant::kUpToDate: return kUpToDate; - case OatFileAssistant::kNeedsRelocation: return kPatchoatNeeded; - case OatFileAssistant::kOutOfDate: return kDexoptNeeded; - } - UNREACHABLE(); + return oat_file_assistant.GetDexOptNeeded(); } -static jbyte DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename, +static jint DexFile_getDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename, jstring javaPkgname, jstring javaInstructionSet, jboolean defer) { ScopedUtfChars filename(env, javaFilename); if (env->ExceptionCheck()) { @@ -376,25 +363,25 @@ static jbyte DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFil return 0; } - return IsDexOptNeededInternal(env, filename.c_str(), pkgname.c_str(), - instruction_set.c_str(), defer); + return GetDexOptNeeded(env, filename.c_str(), pkgname.c_str(), + instruction_set.c_str(), defer); } // public API, NULL pkgname static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename) { const char* instruction_set = GetInstructionSetString(kRuntimeISA); ScopedUtfChars filename(env, javaFilename); - return kUpToDate != IsDexOptNeededInternal(env, filename.c_str(), nullptr /* pkgname */, - instruction_set, false /* defer */); + jint status = GetDexOptNeeded(env, filename.c_str(), nullptr /* pkgname */, + instruction_set, false /* defer */); + return (status != OatFileAssistant::kNoDexOptNeeded) ? JNI_TRUE : JNI_FALSE; } - static JNINativeMethod gMethods[] = { NATIVE_METHOD(DexFile, closeDexFile, "(Ljava/lang/Object;)V"), NATIVE_METHOD(DexFile, defineClassNative, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/Object;)Ljava/lang/Class;"), NATIVE_METHOD(DexFile, getClassNameList, "(Ljava/lang/Object;)[Ljava/lang/String;"), NATIVE_METHOD(DexFile, isDexOptNeeded, "(Ljava/lang/String;)Z"), - NATIVE_METHOD(DexFile, isDexOptNeededInternal, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)B"), + NATIVE_METHOD(DexFile, getDexOptNeeded, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)I"), NATIVE_METHOD(DexFile, openDexFileNative, "(Ljava/lang/String;Ljava/lang/String;I)Ljava/lang/Object;"), }; diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc index d92f59bde6..e5c27b2430 100644 --- a/runtime/oat_file_assistant.cc +++ b/runtime/oat_file_assistant.cc @@ -142,31 +142,31 @@ bool OatFileAssistant::Lock(std::string* error_msg) { return true; } -OatFileAssistant::Status OatFileAssistant::GetStatus() { +OatFileAssistant::DexOptNeeded OatFileAssistant::GetDexOptNeeded() { // TODO: If the profiling code is ever restored, it's worth considering // whether we should check to see if the profile is out of date here. - if (OdexFileIsOutOfDate()) { - // The DEX file is not pre-compiled. - // TODO: What if the oat file is not out of date? Could we relocate it - // from itself? - return OatFileIsUpToDate() ? kUpToDate : kOutOfDate; - } else { - // The DEX file is pre-compiled. If the oat file isn't up to date, we can - // patch the pre-compiled version rather than recompiling. - if (OatFileIsUpToDate() || OdexFileIsUpToDate()) { - return kUpToDate; - } else { - return kNeedsRelocation; - } + if (OatFileIsUpToDate() || OdexFileIsUpToDate()) { + return kNoDexOptNeeded; } + + if (OdexFileNeedsRelocation()) { + return kPatchOatNeeded; + } + + if (OatFileNeedsRelocation()) { + return kSelfPatchOatNeeded; + } + + return kDex2OatNeeded; } bool OatFileAssistant::MakeUpToDate(std::string* error_msg) { - switch (GetStatus()) { - case kUpToDate: return true; - case kNeedsRelocation: return RelocateOatFile(error_msg); - case kOutOfDate: return GenerateOatFile(error_msg); + switch (GetDexOptNeeded()) { + case kNoDexOptNeeded: return true; + case kDex2OatNeeded: return GenerateOatFile(error_msg); + case kPatchOatNeeded: return RelocateOatFile(OdexFileName(), error_msg); + case kSelfPatchOatNeeded: return RelocateOatFile(OatFileName(), error_msg); } UNREACHABLE(); } @@ -269,14 +269,14 @@ bool OatFileAssistant::OdexFileExists() { return GetOdexFile() != nullptr; } -OatFileAssistant::Status OatFileAssistant::OdexFileStatus() { +OatFileAssistant::OatStatus OatFileAssistant::OdexFileStatus() { if (OdexFileIsOutOfDate()) { - return kOutOfDate; + return kOatOutOfDate; } if (OdexFileIsUpToDate()) { - return kUpToDate; + return kOatUpToDate; } - return kNeedsRelocation; + return kOatNeedsRelocation; } bool OatFileAssistant::OdexFileIsOutOfDate() { @@ -293,7 +293,7 @@ bool OatFileAssistant::OdexFileIsOutOfDate() { } bool OatFileAssistant::OdexFileNeedsRelocation() { - return OdexFileStatus() == kNeedsRelocation; + return OdexFileStatus() == kOatNeedsRelocation; } bool OatFileAssistant::OdexFileIsUpToDate() { @@ -338,14 +338,14 @@ bool OatFileAssistant::OatFileExists() { return GetOatFile() != nullptr; } -OatFileAssistant::Status OatFileAssistant::OatFileStatus() { +OatFileAssistant::OatStatus OatFileAssistant::OatFileStatus() { if (OatFileIsOutOfDate()) { - return kOutOfDate; + return kOatOutOfDate; } if (OatFileIsUpToDate()) { - return kUpToDate; + return kOatUpToDate; } - return kNeedsRelocation; + return kOatNeedsRelocation; } bool OatFileAssistant::OatFileIsOutOfDate() { @@ -362,7 +362,7 @@ bool OatFileAssistant::OatFileIsOutOfDate() { } bool OatFileAssistant::OatFileNeedsRelocation() { - return OatFileStatus() == kNeedsRelocation; + return OatFileStatus() == kOatNeedsRelocation; } bool OatFileAssistant::OatFileIsUpToDate() { @@ -378,17 +378,17 @@ bool OatFileAssistant::OatFileIsUpToDate() { return cached_oat_file_is_up_to_date_; } -OatFileAssistant::Status OatFileAssistant::GivenOatFileStatus(const OatFile& file) { +OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& file) { // TODO: This could cause GivenOatFileIsOutOfDate to be called twice, which // is more work than we need to do. If performance becomes a concern, and // this method is actually called, this should be fixed. if (GivenOatFileIsOutOfDate(file)) { - return kOutOfDate; + return kOatOutOfDate; } if (GivenOatFileIsUpToDate(file)) { - return kUpToDate; + return kOatUpToDate; } - return kNeedsRelocation; + return kOatNeedsRelocation; } bool OatFileAssistant::GivenOatFileIsOutOfDate(const OatFile& file) { @@ -451,7 +451,7 @@ bool OatFileAssistant::GivenOatFileIsOutOfDate(const OatFile& file) { } bool OatFileAssistant::GivenOatFileNeedsRelocation(const OatFile& file) { - return GivenOatFileStatus(file) == kNeedsRelocation; + return GivenOatFileStatus(file) == kOatNeedsRelocation; } bool OatFileAssistant::GivenOatFileIsUpToDate(const OatFile& file) { @@ -592,16 +592,17 @@ void OatFileAssistant::CopyProfileFile() { } } -bool OatFileAssistant::RelocateOatFile(std::string* error_msg) { +bool OatFileAssistant::RelocateOatFile(const std::string* input_file, + std::string* error_msg) { CHECK(error_msg != nullptr); - if (OdexFileName() == nullptr) { + if (input_file == nullptr) { *error_msg = "Patching of oat file for dex location " + std::string(dex_location_) - + " not attempted because the odex file name could not be determined."; + + " not attempted because the input file name could not be determined."; return false; } - const std::string& odex_file_name = *OdexFileName(); + const std::string& input_file_name = *input_file; if (OatFileName() == nullptr) { *error_msg = "Patching of oat file for dex location " @@ -628,7 +629,7 @@ bool OatFileAssistant::RelocateOatFile(std::string* error_msg) { std::vector<std::string> argv; argv.push_back(runtime->GetPatchoatExecutable()); argv.push_back("--instruction-set=" + std::string(GetInstructionSetString(isa_))); - argv.push_back("--input-oat-file=" + odex_file_name); + argv.push_back("--input-oat-file=" + input_file_name); argv.push_back("--output-oat-file=" + oat_file_name); argv.push_back("--patched-image-location=" + image_info->location); diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h index f2abcf99d3..9e7c2efc45 100644 --- a/runtime/oat_file_assistant.h +++ b/runtime/oat_file_assistant.h @@ -43,20 +43,43 @@ namespace art { // be restored and tested, or removed. class OatFileAssistant { public: - enum Status { - // kOutOfDate - An oat file is said to be out of date if the file does not - // exist, or is out of date with respect to the dex file or boot image. - kOutOfDate, - - // kNeedsRelocation - An oat file is said to need relocation if the code - // is up to date, but not yet properly relocated for address space layout - // randomization (ASLR). In this case, the oat file is neither "out of - // date" nor "up to date". - kNeedsRelocation, - - // kUpToDate - An oat file is said to be up to date if it is not out of + enum DexOptNeeded { + // kNoDexOptNeeded - The code for this dex location is up to date and can + // be used as is. + // Matches Java: dalvik.system.DexFile.NO_DEXOPT_NEEDED = 0 + kNoDexOptNeeded = 0, + + // kDex2OatNeeded - In order to make the code for this dex location up to + // date, dex2oat must be run on the dex file. + // Matches Java: dalvik.system.DexFile.DEX2OAT_NEEDED = 1 + kDex2OatNeeded = 1, + + // kPatchOatNeeded - In order to make the code for this dex location up to + // date, patchoat must be run on the odex file. + // Matches Java: dalvik.system.DexFile.PATCHOAT_NEEDED = 2 + kPatchOatNeeded = 2, + + // kSelfPatchOatNeeded - In order to make the code for this dex location + // up to date, patchoat must be run on the oat file. + // Matches Java: dalvik.system.DexFile.SELF_PATCHOAT_NEEDED = 3 + kSelfPatchOatNeeded = 3, + }; + + enum OatStatus { + // kOatOutOfDate - An oat file is said to be out of date if the file does + // not exist, or is out of date with respect to the dex file or boot + // image. + kOatOutOfDate, + + // kOatNeedsRelocation - An oat file is said to need relocation if the + // code is up to date, but not yet properly relocated for address space + // layout randomization (ASLR). In this case, the oat file is neither + // "out of date" nor "up to date". + kOatNeedsRelocation, + + // kOatUpToDate - An oat file is said to be up to date if it is not out of // date and has been properly relocated for the purposes of ASLR. - kUpToDate, + kOatUpToDate, }; // Constructs an OatFileAssistant object to assist the oat file @@ -67,7 +90,6 @@ class OatFileAssistant { // Typically the dex_location is the absolute path to the original, // un-optimized dex file. // - // // Note: Currently the dex_location must have an extension. // TODO: Relax this restriction? // @@ -121,8 +143,9 @@ class OatFileAssistant { // file. bool Lock(std::string* error_msg); - // Returns the overall compilation status for the given dex location. - Status GetStatus(); + // Return what action needs to be taken to produce up-to-date code for this + // dex location. + DexOptNeeded GetDexOptNeeded(); // Attempts to generate or relocate the oat file as needed to make it up to // date. @@ -164,7 +187,7 @@ class OatFileAssistant { // determined. const std::string* OdexFileName(); bool OdexFileExists(); - Status OdexFileStatus(); + OatStatus OdexFileStatus(); bool OdexFileIsOutOfDate(); bool OdexFileNeedsRelocation(); bool OdexFileIsUpToDate(); @@ -176,20 +199,18 @@ class OatFileAssistant { // the dex location. // // Notes: - // * To get the overall status of the compiled code for this dex_location, - // use the GetStatus() method, not the OatFileStatus() method. // * OatFileName may return null if the oat file name could not be // determined. const std::string* OatFileName(); bool OatFileExists(); - Status OatFileStatus(); + OatStatus OatFileStatus(); bool OatFileIsOutOfDate(); bool OatFileNeedsRelocation(); bool OatFileIsUpToDate(); // These methods return the status for a given opened oat file with respect // to the dex location. - Status GivenOatFileStatus(const OatFile& file); + OatStatus GivenOatFileStatus(const OatFile& file); bool GivenOatFileIsOutOfDate(const OatFile& file); bool GivenOatFileNeedsRelocation(const OatFile& file); bool GivenOatFileIsUpToDate(const OatFile& file); @@ -216,7 +237,7 @@ class OatFileAssistant { // Copy the current profile to the old profile location. void CopyProfileFile(); - // Generates the oat file by relocation from the odex file. + // Generates the oat file by relocation from the named input file. // This does not check the current status before attempting to relocate the // oat file. // Returns true on success. @@ -224,7 +245,7 @@ class OatFileAssistant { // // If there is a failure, the value of error_msg will be set to a string // describing why there was failure. error_msg must not be nullptr. - bool RelocateOatFile(std::string* error_msg); + bool RelocateOatFile(const std::string* input_file, std::string* error_msg); // Generate the oat file from the dex file. // This does not check the current status before attempting to generate the diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc index a1988244c7..d2362a210e 100644 --- a/runtime/oat_file_assistant_test.cc +++ b/runtime/oat_file_assistant_test.cc @@ -29,7 +29,9 @@ #include "common_runtime_test.h" #include "compiler_callbacks.h" #include "mem_map.h" +#include "mirror/art_field-inl.h" #include "os.h" +#include "scoped_thread_state_change.h" #include "thread-inl.h" #include "utils.h" @@ -267,42 +269,42 @@ static void GenerateOatForTest(const char* dex_location) { } // Case: We have a DEX file, but no OAT file for it. -// Expect: The oat file status is kOutOfDate. +// Expect: The status is kDex2OatNeeded. TEST_F(OatFileAssistantTest, DexNoOat) { std::string dex_location = GetScratchDir() + "/DexNoOat.jar"; Copy(GetDexSrc1(), dex_location); OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_FALSE(oat_file_assistant.OdexFileExists()); EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate()); EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation()); EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate()); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.OdexFileStatus()); + EXPECT_EQ(OatFileAssistant::kOatOutOfDate, oat_file_assistant.OdexFileStatus()); EXPECT_FALSE(oat_file_assistant.OatFileExists()); EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate()); EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation()); EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate()); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.OatFileStatus()); + EXPECT_EQ(OatFileAssistant::kOatOutOfDate, oat_file_assistant.OatFileStatus()); } // Case: We have no DEX file and no OAT file. -// Expect: Status is out of date. Loading should fail, but not crash. +// Expect: Status is kDex2OatNeeded. Loading should fail, but not crash. TEST_F(OatFileAssistantTest, NoDexNoOat) { std::string dex_location = GetScratchDir() + "/NoDexNoOat.jar"; OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded()); std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); EXPECT_EQ(nullptr, oat_file.get()); } // Case: We have a DEX file and up-to-date OAT file for it. -// Expect: The oat file status is kUpToDate. +// Expect: The status is kNoDexOptNeeded. TEST_F(OatFileAssistantTest, OatUpToDate) { std::string dex_location = GetScratchDir() + "/OatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); @@ -310,7 +312,7 @@ TEST_F(OatFileAssistantTest, OatUpToDate) { OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); - EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_FALSE(oat_file_assistant.OdexFileExists()); EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate()); @@ -319,18 +321,20 @@ TEST_F(OatFileAssistantTest, OatUpToDate) { EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate()); EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation()); EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate()); - EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.OatFileStatus()); + EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus()); } // Case: We have a MultiDEX file and up-to-date OAT file for it. -// Expect: The oat file status is kUpToDate. +// Expect: The status is kNoDexOptNeeded and we load all dex files. TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) { std::string dex_location = GetScratchDir() + "/MultiDexOatUpToDate.jar"; Copy(GetMultiDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str()); - // Verify we can load both dex files. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded()); + + // Verify we can load both dex files. std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); ASSERT_TRUE(oat_file.get() != nullptr); EXPECT_TRUE(oat_file->IsExecutable()); @@ -341,7 +345,7 @@ TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) { // Case: We have a MultiDEX file and up-to-date OAT file for it with relative // encoded dex locations. -// Expect: The oat file status is kUpToDate. +// Expect: The oat file status is kNoDexOptNeeded. TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) { std::string dex_location = GetScratchDir() + "/RelativeEncodedDexLocation.jar"; std::string oat_location = GetOdexDir() + "/RelativeEncodedDexLocation.oat"; @@ -370,8 +374,8 @@ TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) { EXPECT_EQ(2u, dex_files.size()); } -// Case: We have a DEX file and out of date OAT file. -// Expect: The oat file status is kOutOfDate. +// Case: We have a DEX file and out-of-date OAT file. +// Expect: The status is kDex2OatNeeded. TEST_F(OatFileAssistantTest, OatOutOfDate) { std::string dex_location = GetScratchDir() + "/OatOutOfDate.jar"; @@ -382,7 +386,7 @@ TEST_F(OatFileAssistantTest, OatOutOfDate) { Copy(GetDexSrc2(), dex_location); OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_FALSE(oat_file_assistant.OdexFileExists()); @@ -394,7 +398,7 @@ TEST_F(OatFileAssistantTest, OatOutOfDate) { } // Case: We have a DEX file and an ODEX file, but no OAT file. -// Expect: The oat file status is kNeedsRelocation. +// Expect: The status is kPatchOatNeeded. TEST_F(OatFileAssistantTest, DexOdexNoOat) { std::string dex_location = GetScratchDir() + "/DexOdexNoOat.jar"; std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex"; @@ -406,21 +410,20 @@ TEST_F(OatFileAssistantTest, DexOdexNoOat) { // Verify the status. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); - EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_TRUE(oat_file_assistant.OdexFileExists()); EXPECT_FALSE(oat_file_assistant.OdexFileIsOutOfDate()); EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate()); EXPECT_TRUE(oat_file_assistant.OdexFileNeedsRelocation()); - EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.OdexFileNeedsRelocation()); EXPECT_FALSE(oat_file_assistant.OatFileExists()); EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate()); EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate()); } // Case: We have a stripped DEX file and an ODEX file, but no OAT file. -// Expect: The oat file status is kNeedsRelocation. +// Expect: The status is kPatchOatNeeded TEST_F(OatFileAssistantTest, StrippedDexOdexNoOat) { std::string dex_location = GetScratchDir() + "/StrippedDexOdexNoOat.jar"; std::string odex_location = GetOdexDir() + "/StrippedDexOdexNoOat.odex"; @@ -435,7 +438,7 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexNoOat) { // Verify the status. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); - EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_TRUE(oat_file_assistant.OdexFileExists()); @@ -449,7 +452,7 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexNoOat) { std::string error_msg; ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg; - EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_TRUE(oat_file_assistant.OdexFileExists()); @@ -468,8 +471,8 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexNoOat) { EXPECT_EQ(1u, dex_files.size()); } -// Case: We have a stripped DEX file, an ODEX file, and an out of date OAT file. -// Expect: The oat file status is kNeedsRelocation. +// Case: We have a stripped DEX file, an ODEX file, and an out-of-date OAT file. +// Expect: The status is kPatchOatNeeded. TEST_F(OatFileAssistantTest, StrippedDexOdexOat) { std::string dex_location = GetScratchDir() + "/StrippedDexOdexOat.jar"; std::string odex_location = GetOdexDir() + "/StrippedDexOdexOat.odex"; @@ -488,7 +491,7 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexOat) { // Verify the status. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); - EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_TRUE(oat_file_assistant.OdexFileExists()); @@ -503,7 +506,7 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexOat) { std::string error_msg; ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg; - EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_TRUE(oat_file_assistant.OdexFileExists()); @@ -524,9 +527,59 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexOat) { EXPECT_EQ(1u, dex_files.size()); } +// Case: We have a DEX file, no ODEX file and an OAT file that needs +// relocation. +// Expect: The status is kSelfPatchOatNeeded. +TEST_F(OatFileAssistantTest, SelfRelocation) { + std::string dex_location = GetScratchDir() + "/SelfRelocation.jar"; + std::string oat_location = GetOdexDir() + "/SelfRelocation.oat"; + + // Create the dex and odex files + Copy(GetDexSrc1(), dex_location); + GenerateOdexForTest(dex_location, oat_location); + + OatFileAssistant oat_file_assistant(dex_location.c_str(), + oat_location.c_str(), kRuntimeISA, true); + + EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded, oat_file_assistant.GetDexOptNeeded()); + + EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); + EXPECT_FALSE(oat_file_assistant.OdexFileExists()); + EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate()); + EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation()); + EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate()); + EXPECT_TRUE(oat_file_assistant.OatFileExists()); + EXPECT_TRUE(oat_file_assistant.OatFileNeedsRelocation()); + EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate()); + EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate()); + + // Make the oat file up to date. + std::string error_msg; + ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg; + + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded()); + + EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); + EXPECT_FALSE(oat_file_assistant.OdexFileExists()); + EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate()); + EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation()); + EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate()); + EXPECT_TRUE(oat_file_assistant.OatFileExists()); + EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate()); + EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation()); + EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate()); + + std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); + ASSERT_TRUE(oat_file.get() != nullptr); + EXPECT_TRUE(oat_file->IsExecutable()); + std::vector<std::unique_ptr<const DexFile>> dex_files; + dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str()); + EXPECT_EQ(1u, dex_files.size()); +} + // Case: We have a DEX file, an ODEX file and an OAT file, where the ODEX and // OAT files both have patch delta of 0. -// Expect: It shouldn't crash. +// Expect: It shouldn't crash, and status is kPatchOatNeeded. TEST_F(OatFileAssistantTest, OdexOatOverlap) { std::string dex_location = GetScratchDir() + "/OdexOatOverlap.jar"; std::string odex_location = GetOdexDir() + "/OdexOatOverlap.odex"; @@ -544,7 +597,7 @@ TEST_F(OatFileAssistantTest, OdexOatOverlap) { OatFileAssistant oat_file_assistant(dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true); - EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_TRUE(oat_file_assistant.OdexFileExists()); @@ -564,7 +617,7 @@ TEST_F(OatFileAssistantTest, OdexOatOverlap) { } // Case: We have a DEX file and a PIC ODEX file, but no OAT file. -// Expect: The oat file status is kUpToDate, because PIC needs no relocation. +// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation. TEST_F(OatFileAssistantTest, DexPicOdexNoOat) { std::string dex_location = GetScratchDir() + "/DexPicOdexNoOat.jar"; std::string odex_location = GetOdexDir() + "/DexPicOdexNoOat.odex"; @@ -576,7 +629,7 @@ TEST_F(OatFileAssistantTest, DexPicOdexNoOat) { // Verify the status. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); - EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_TRUE(oat_file_assistant.OdexFileExists()); @@ -661,7 +714,7 @@ TEST_F(OatFileAssistantTest, NonExsistentDexLocation) { OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.OdexFileExists()); EXPECT_FALSE(oat_file_assistant.OatFileExists()); EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate()); @@ -720,7 +773,7 @@ TEST_F(OatFileAssistantTest, NonAbsoluteDexLocation) { OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.OdexFileExists()); EXPECT_FALSE(oat_file_assistant.OatFileExists()); EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate()); @@ -737,7 +790,7 @@ TEST_F(OatFileAssistantTest, ShortDexLocation) { OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.OdexFileExists()); EXPECT_FALSE(oat_file_assistant.OatFileExists()); EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate()); @@ -751,14 +804,14 @@ TEST_F(OatFileAssistantTest, ShortDexLocation) { } // Case: Non-standard extension for dex file. -// Expect: The oat file status is kOutOfDate. +// Expect: The status is kDex2OatNeeded. TEST_F(OatFileAssistantTest, LongDexExtension) { std::string dex_location = GetScratchDir() + "/LongDexExtension.jarx"; Copy(GetDexSrc1(), dex_location); OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); - EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus()); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded()); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_FALSE(oat_file_assistant.OdexFileExists()); @@ -895,6 +948,41 @@ TEST(OatFileAssistantUtilsTest, DexFilenameToOdexFilename) { "/foo/bar/baz_noext", kArm, &odex_file, &error_msg)); } +// Verify the dexopt status values from dalvik.system.DexFile +// match the OatFileAssistant::DexOptStatus values. +TEST_F(OatFileAssistantTest, DexOptStatusValues) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + ClassLinker* linker = Runtime::Current()->GetClassLinker(); + Handle<mirror::Class> dexfile( + hs.NewHandle(linker->FindSystemClass(soa.Self(), "Ldalvik/system/DexFile;"))); + ASSERT_FALSE(dexfile.Get() == nullptr); + linker->EnsureInitialized(soa.Self(), dexfile, true, true); + + mirror::ArtField* no_dexopt_needed = mirror::Class::FindStaticField( + soa.Self(), dexfile, "NO_DEXOPT_NEEDED", "I"); + ASSERT_FALSE(no_dexopt_needed == nullptr); + EXPECT_EQ(no_dexopt_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, no_dexopt_needed->GetInt(dexfile.Get())); + + mirror::ArtField* dex2oat_needed = mirror::Class::FindStaticField( + soa.Self(), dexfile, "DEX2OAT_NEEDED", "I"); + ASSERT_FALSE(dex2oat_needed == nullptr); + EXPECT_EQ(dex2oat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt); + EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, dex2oat_needed->GetInt(dexfile.Get())); + + mirror::ArtField* patchoat_needed = mirror::Class::FindStaticField( + soa.Self(), dexfile, "PATCHOAT_NEEDED", "I"); + ASSERT_FALSE(patchoat_needed == nullptr); + EXPECT_EQ(patchoat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt); + EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, patchoat_needed->GetInt(dexfile.Get())); + + mirror::ArtField* self_patchoat_needed = mirror::Class::FindStaticField( + soa.Self(), dexfile, "SELF_PATCHOAT_NEEDED", "I"); + ASSERT_FALSE(self_patchoat_needed == nullptr); + EXPECT_EQ(self_patchoat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt); + EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded, self_patchoat_needed->GetInt(dexfile.Get())); +} // TODO: More Tests: // * Test class linker falls back to unquickened dex for DexNoOat diff --git a/test/004-UnsafeTest/src/Main.java b/test/004-UnsafeTest/src/Main.java index 3d0f074f94..708f61f028 100644 --- a/test/004-UnsafeTest/src/Main.java +++ b/test/004-UnsafeTest/src/Main.java @@ -104,6 +104,16 @@ public class Main { if (!unsafe.compareAndSwapInt(t, intOffset, 0, 1)) { System.out.println("Unexpectedly not succeeding compareAndSwap..."); } + + if (unsafe.compareAndSwapLong(t, longOffset, 0, 1)) { + System.out.println("Unexpectedly succeeding compareAndSwapLong..."); + } + if (!unsafe.compareAndSwapLong(t, longOffset, longValue, 0)) { + System.out.println("Unexpectedly not succeeding compareAndSwapLong..."); + } + if (!unsafe.compareAndSwapLong(t, longOffset, 0, 1)) { + System.out.println("Unexpectedly not succeeding compareAndSwapLong..."); + } } private static class TestClass { diff --git a/test/107-int-math2/src/Main.java b/test/107-int-math2/src/Main.java index f0fe934ae9..6a6227cee5 100644 --- a/test/107-int-math2/src/Main.java +++ b/test/107-int-math2/src/Main.java @@ -379,7 +379,7 @@ class Main extends IntMathBase { */ static int lit16Test(int x) { - int[] results = new int[8]; + int[] results = new int[10]; /* try to generate op-int/lit16" instructions */ results[0] = x + 1000; @@ -390,6 +390,9 @@ class Main extends IntMathBase { results[5] = x & 1000; results[6] = x | -1000; results[7] = x ^ -1000; + /* use an 16-bit constant that has its MSB (bit-15) set */ + results[8] = x / 32769; + results[9] = x / -32769; if (results[0] != 78777) { return 1; } if (results[1] != -76777) { return 2; } @@ -399,6 +402,8 @@ class Main extends IntMathBase { if (results[5] != 960) { return 6; } if (results[6] != -39) { return 7; } if (results[7] != -76855) { return 8; } + if (results[8] != 2) { return 9; } + if (results[9] != -2) { return 10; } return 0; } |