diff options
Diffstat (limited to 'compiler')
102 files changed, 4852 insertions, 1266 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index 904f117a5a..eaea031b62 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -79,6 +79,13 @@ LIBART_COMPILER_SRC_FILES := \ driver/compiler_driver.cc \ driver/compiler_options.cc \ driver/dex_compilation_unit.cc \ + linker/relative_patcher.cc \ + linker/arm/relative_patcher_arm_base.cc \ + linker/arm/relative_patcher_thumb2.cc \ + linker/arm64/relative_patcher_arm64.cc \ + linker/x86/relative_patcher_x86_base.cc \ + linker/x86/relative_patcher_x86.cc \ + linker/x86_64/relative_patcher_x86_64.cc \ jit/jit_compiler.cc \ jni/quick/arm/calling_convention_arm.cc \ jni/quick/arm64/calling_convention_arm64.cc \ diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index 1d0aad5425..96d90bb443 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -24,6 +24,7 @@ #include "dex/quick/dex_file_to_method_inliner_map.h" #include "dex/verification_results.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "interpreter/interpreter.h" #include "mirror/art_method.h" #include "mirror/dex_cache.h" diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc index 1849e7ef64..03370db6c0 100644 --- a/compiler/compiled_method.cc +++ b/compiler/compiled_method.cc @@ -142,7 +142,6 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver, if (src_mapping_table == nullptr) { src_mapping_table_ = new SwapSrcMap(driver->GetSwapSpaceAllocator()); } else { - src_mapping_table->Arrange(); src_mapping_table_ = new SwapSrcMap(src_mapping_table->begin(), src_mapping_table->end(), driver->GetSwapSpaceAllocator()); } @@ -159,7 +158,7 @@ CompiledMethod::CompiledMethod(CompilerDriver* driver, } else { src_mapping_table_ = src_mapping_table == nullptr ? driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>()) : - driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>(src_mapping_table->Arrange())); + driver->DeduplicateSrcMappingTable(ArrayRef<SrcMapElem>(*src_mapping_table)); mapping_table_ = mapping_table.empty() ? nullptr : driver->DeduplicateMappingTable(mapping_table); vmap_table_ = driver->DeduplicateVMapTable(vmap_table); diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index d6a07f6226..7497b175fc 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -94,20 +94,12 @@ class SrcMapElem { uint32_t from_; int32_t to_; - explicit operator int64_t() const { - return (static_cast<int64_t>(to_) << 32) | from_; - } - - bool operator<(const SrcMapElem& sme) const { - return int64_t(*this) < int64_t(sme); - } - - bool operator==(const SrcMapElem& sme) const { - return int64_t(*this) == int64_t(sme); - } - - explicit operator uint8_t() const { - return static_cast<uint8_t>(from_ + to_); + // Lexicographical compare. + bool operator<(const SrcMapElem& other) const { + if (from_ != other.from_) { + return from_ < other.from_; + } + return to_ < other.to_; } }; @@ -129,49 +121,33 @@ class SrcMap FINAL : public std::vector<SrcMapElem, Allocator> { SrcMap(InputIt first, InputIt last, const Allocator& alloc) : std::vector<SrcMapElem, Allocator>(first, last, alloc) {} - void SortByFrom() { - std::sort(begin(), end(), [] (const SrcMapElem& lhs, const SrcMapElem& rhs) -> bool { - return lhs.from_ < rhs.from_; - }); - } - - const_iterator FindByTo(int32_t to) const { - return std::lower_bound(begin(), end(), SrcMapElem({0, to})); - } - - SrcMap& Arrange() { + void push_back(const SrcMapElem& elem) { if (!empty()) { - std::sort(begin(), end()); - resize(std::unique(begin(), end()) - begin()); - shrink_to_fit(); + // Check that the addresses are inserted in sorted order. + DCHECK_GE(elem.from_, this->back().from_); + // If two consequitive entries map to the same value, ignore the later. + // E.g. for map {{0, 1}, {4, 1}, {8, 2}}, all values in [0,8) map to 1. + if (elem.to_ == this->back().to_) { + return; + } } - return *this; + std::vector<SrcMapElem, Allocator>::push_back(elem); } - void DeltaFormat(const SrcMapElem& start, uint32_t highest_pc) { - // Convert from abs values to deltas. - if (!empty()) { - SortByFrom(); - - // TODO: one PC can be mapped to several Java src lines. - // do we want such a one-to-many correspondence? - - // get rid of the highest values - size_t i = size() - 1; - for (; i > 0 ; i--) { - if ((*this)[i].from_ < highest_pc) { - break; - } - } - this->resize(i + 1); - - for (i = size(); --i >= 1; ) { - (*this)[i].from_ -= (*this)[i-1].from_; - (*this)[i].to_ -= (*this)[i-1].to_; - } - DCHECK((*this)[0].from_ >= start.from_); - (*this)[0].from_ -= start.from_; - (*this)[0].to_ -= start.to_; + // Returns true and the corresponding "to" value if the mapping is found. + // Oterwise returns false and 0. + std::pair<bool, int32_t> Find(uint32_t from) const { + // Finds first mapping such that lb.from_ >= from. + auto lb = std::lower_bound(begin(), end(), SrcMapElem {from, INT32_MIN}); + if (lb != end() && lb->from_ == from) { + // Found exact match. + return std::make_pair(true, lb->to_); + } else if (lb != begin()) { + // The previous mapping is still in effect. + return std::make_pair(true, (--lb)->to_); + } else { + // Not found because 'from' is smaller than first entry in the map. + return std::make_pair(false, 0); } } }; @@ -185,6 +161,7 @@ enum LinkerPatchType { kLinkerPatchCall, kLinkerPatchCallRelative, // NOTE: Actual patching is instruction_set-dependent. kLinkerPatchType, + kLinkerPatchDexCacheArray, // NOTE: Actual patching is instruction_set-dependent. }; class LinkerPatch { @@ -192,28 +169,44 @@ class LinkerPatch { static LinkerPatch MethodPatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t target_method_idx) { - return LinkerPatch(literal_offset, kLinkerPatchMethod, - target_method_idx, target_dex_file); + LinkerPatch patch(literal_offset, kLinkerPatchMethod, target_dex_file); + patch.method_idx_ = target_method_idx; + return patch; } static LinkerPatch CodePatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t target_method_idx) { - return LinkerPatch(literal_offset, kLinkerPatchCall, - target_method_idx, target_dex_file); + LinkerPatch patch(literal_offset, kLinkerPatchCall, target_dex_file); + patch.method_idx_ = target_method_idx; + return patch; } static LinkerPatch RelativeCodePatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t target_method_idx) { - return LinkerPatch(literal_offset, kLinkerPatchCallRelative, - target_method_idx, target_dex_file); + LinkerPatch patch(literal_offset, kLinkerPatchCallRelative, target_dex_file); + patch.method_idx_ = target_method_idx; + return patch; } static LinkerPatch TypePatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t target_type_idx) { - return LinkerPatch(literal_offset, kLinkerPatchType, target_type_idx, target_dex_file); + LinkerPatch patch(literal_offset, kLinkerPatchType, target_dex_file); + patch.type_idx_ = target_type_idx; + return patch; + } + + static LinkerPatch DexCacheArrayPatch(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + size_t element_offset) { + DCHECK(IsUint<32>(element_offset)); + LinkerPatch patch(literal_offset, kLinkerPatchDexCacheArray, target_dex_file); + patch.pc_insn_offset_ = pc_insn_offset; + patch.element_offset_ = element_offset; + return patch; } LinkerPatch(const LinkerPatch& other) = default; @@ -227,10 +220,14 @@ class LinkerPatch { return patch_type_; } + bool IsPcRelative() const { + return Type() == kLinkerPatchCallRelative || Type() == kLinkerPatchDexCacheArray; + } + MethodReference TargetMethod() const { DCHECK(patch_type_ == kLinkerPatchMethod || patch_type_ == kLinkerPatchCall || patch_type_ == kLinkerPatchCallRelative); - return MethodReference(target_dex_file_, target_idx_); + return MethodReference(target_dex_file_, method_idx_); } const DexFile* TargetTypeDexFile() const { @@ -240,22 +237,52 @@ class LinkerPatch { uint32_t TargetTypeIndex() const { DCHECK(patch_type_ == kLinkerPatchType); - return target_idx_; + return type_idx_; + } + + const DexFile* TargetDexCacheDexFile() const { + DCHECK(patch_type_ == kLinkerPatchDexCacheArray); + return target_dex_file_; + } + + size_t TargetDexCacheElementOffset() const { + DCHECK(patch_type_ == kLinkerPatchDexCacheArray); + return element_offset_; + } + + uint32_t PcInsnOffset() const { + DCHECK(patch_type_ == kLinkerPatchDexCacheArray); + return pc_insn_offset_; } private: - LinkerPatch(size_t literal_offset, LinkerPatchType patch_type, - uint32_t target_idx, const DexFile* target_dex_file) - : literal_offset_(literal_offset), - patch_type_(patch_type), - target_idx_(target_idx), - target_dex_file_(target_dex_file) { + LinkerPatch(size_t literal_offset, LinkerPatchType patch_type, const DexFile* target_dex_file) + : target_dex_file_(target_dex_file), + literal_offset_(literal_offset), + patch_type_(patch_type) { + cmp1_ = 0u; + cmp2_ = 0u; + // The compiler rejects methods that are too big, so the compiled code + // of a single method really shouln't be anywhere close to 16MiB. + DCHECK(IsUint<24>(literal_offset)); } - size_t literal_offset_; - LinkerPatchType patch_type_; - uint32_t target_idx_; // Method index (Call/Method patches) or type index (Type patches). const DexFile* target_dex_file_; + uint32_t literal_offset_ : 24; // Method code size up to 16MiB. + LinkerPatchType patch_type_ : 8; + union { + uint32_t cmp1_; // Used for relational operators. + uint32_t method_idx_; // Method index for Call/Method patches. + uint32_t type_idx_; // Type index for Type patches. + uint32_t element_offset_; // Element offset in the dex cache arrays. + }; + union { + uint32_t cmp2_; // Used for relational operators. + // Literal offset of the insn loading PC (same as literal_offset if it's the same insn, + // may be different if the PC-relative addressing needs multiple insns). + uint32_t pc_insn_offset_; + static_assert(sizeof(pc_insn_offset_) == sizeof(cmp2_), "needed by relational operators"); + }; friend bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs); friend bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs); @@ -264,15 +291,17 @@ class LinkerPatch { inline bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs) { return lhs.literal_offset_ == rhs.literal_offset_ && lhs.patch_type_ == rhs.patch_type_ && - lhs.target_idx_ == rhs.target_idx_ && - lhs.target_dex_file_ == rhs.target_dex_file_; + lhs.target_dex_file_ == rhs.target_dex_file_ && + lhs.cmp1_ == rhs.cmp1_ && + lhs.cmp2_ == rhs.cmp2_; } inline bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs) { return (lhs.literal_offset_ != rhs.literal_offset_) ? lhs.literal_offset_ < rhs.literal_offset_ : (lhs.patch_type_ != rhs.patch_type_) ? lhs.patch_type_ < rhs.patch_type_ - : (lhs.target_idx_ != rhs.target_idx_) ? lhs.target_idx_ < rhs.target_idx_ - : lhs.target_dex_file_ < rhs.target_dex_file_; + : (lhs.target_dex_file_ != rhs.target_dex_file_) ? lhs.target_dex_file_ < rhs.target_dex_file_ + : (lhs.cmp1_ != rhs.cmp1_) ? lhs.cmp1_ < rhs.cmp1_ + : lhs.cmp2_ < rhs.cmp2_; } class CompiledMethod FINAL : public CompiledCode { @@ -375,7 +404,7 @@ class CompiledMethod FINAL : public CompiledCode { const uint32_t core_spill_mask_; // For quick code, a bit mask describing spilled FPR callee-save registers. const uint32_t fp_spill_mask_; - // For quick code, a set of pairs (PC, Line) mapping from native PC offset to Java line + // For quick code, a set of pairs (PC, DEX) mapping from native PC offset to DEX offset. SwapSrcMap* src_mapping_table_; // For quick code, a uleb128 encoded map from native PC offset to dex PC aswell as dex PC to // native PC offset. Size prefixed. diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 39725dee38..0acdd422df 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -99,14 +99,16 @@ std::ostream& operator<<(std::ostream& os, const BBType& code); // Shared pseudo opcodes - must be < 0. enum LIRPseudoOpcode { - kPseudoExportedPC = -16, - kPseudoSafepointPC = -15, - kPseudoIntrinsicRetry = -14, - kPseudoSuspendTarget = -13, - kPseudoThrowTarget = -12, - kPseudoCaseLabel = -11, - kPseudoMethodEntry = -10, - kPseudoMethodExit = -9, + kPseudoPrologueBegin = -18, + kPseudoPrologueEnd = -17, + kPseudoEpilogueBegin = -16, + kPseudoEpilogueEnd = -15, + kPseudoExportedPC = -14, + kPseudoSafepointPC = -13, + kPseudoIntrinsicRetry = -12, + kPseudoSuspendTarget = -11, + kPseudoThrowTarget = -10, + kPseudoCaseLabel = -9, kPseudoBarrier = -8, kPseudoEntryBlock = -7, kPseudoExitBlock = -6, diff --git a/compiler/dex/mir_field_info.cc b/compiler/dex/mir_field_info.cc index d2079a254d..a9ab3bb0d4 100644 --- a/compiler/dex/mir_field_info.cc +++ b/compiler/dex/mir_field_info.cc @@ -19,6 +19,7 @@ #include <string.h> #include "base/logging.h" +#include "dex/verified_method.h" #include "driver/compiler_driver.h" #include "driver/compiler_driver-inl.h" #include "mirror/class_loader.h" // Only to allow casts in Handle<ClassLoader>. diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 3103f96e4e..58f12c94e4 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -688,7 +688,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse /* Parse a Dex method and insert it into the MIRGraph at the current insert point. */ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_flags, - InvokeType invoke_type, uint16_t class_def_idx, + InvokeType invoke_type ATTRIBUTE_UNUSED, uint16_t class_def_idx, uint32_t method_idx, jobject class_loader, const DexFile& dex_file) { current_code_item_ = code_item; method_stack_.push_back(std::make_pair(current_method_, current_offset_)); @@ -726,13 +726,6 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ null_block->hidden = true; entry_block_ = CreateNewBB(kEntryBlock); exit_block_ = CreateNewBB(kExitBlock); - // TODO: deprecate all "cu->" fields; move what's left to wherever CompilationUnit is allocated. - cu_->dex_file = &dex_file; - cu_->class_def_idx = class_def_idx; - cu_->method_idx = method_idx; - cu_->access_flags = access_flags; - cu_->invoke_type = invoke_type; - cu_->shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx)); } else { UNIMPLEMENTED(FATAL) << "Nested inlining not implemented."; /* diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc index 34fb1bf0e0..831ad42682 100644 --- a/compiler/dex/mir_method_info.cc +++ b/compiler/dex/mir_method_info.cc @@ -16,9 +16,11 @@ # include "mir_method_info.h" +#include "dex/verified_method.h" #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "driver/compiler_driver-inl.h" +#include "driver/compiler_options.h" #include "mirror/class_loader.h" // Only to allow casts in Handle<ClassLoader>. #include "mirror/dex_cache.h" // Only to allow casts in Handle<DexCache>. #include "scoped_thread_state_change.h" diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc index 3e69878846..c5ac4c1508 100644 --- a/compiler/dex/quick/arm/assemble_arm.cc +++ b/compiler/dex/quick/arm/assemble_arm.cc @@ -1083,7 +1083,9 @@ void ArmMir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) { #define PADDING_MOV_R5_R5 0x1C2D uint8_t* ArmMir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { + uint8_t* const write_buffer = write_pos; for (; lir != NULL; lir = NEXT_LIR(lir)) { + lir->offset = (write_pos - write_buffer); if (!lir->flags.is_nop) { int opcode = lir->opcode; if (IsPseudoLirOp(opcode)) { diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index d46c25a8da..e6158c3200 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -23,6 +23,7 @@ #include "dex/mir_graph.h" #include "dex/quick/mir_to_lir-inl.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "gc/accounting/card_table.h" #include "mirror/art_method.h" #include "mirror/object_array-inl.h" @@ -371,7 +372,6 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { * a leaf *and* our frame size < fudge factor. */ bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, kArm); - NewLIR0(kPseudoMethodEntry); const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm); bool large_frame = (static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes); bool generate_explicit_stack_overflow_check = large_frame || @@ -506,7 +506,6 @@ void ArmMir2Lir::GenExitSequence() { LockTemp(rs_r0); LockTemp(rs_r1); - NewLIR0(kPseudoMethodExit); OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4)); /* Need to restore any FP callee saves? */ if (num_fp_spills_) { diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h index d15412a1bd..f6fa9389d0 100644 --- a/compiler/dex/quick/arm64/arm64_lir.h +++ b/compiler/dex/quick/arm64/arm64_lir.h @@ -236,6 +236,7 @@ enum A64Opcode { kA64Add4rrro, // add [00001011000] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0]. kA64Add4RRre, // add [00001011001] rm[20-16] option[15-13] imm_3[12-10] rn[9-5] rd[4-0]. kA64Adr2xd, // adr [0] immlo[30-29] [10000] immhi[23-5] rd[4-0]. + kA64Adrp2xd, // adrp [1] immlo[30-29] [10000] immhi[23-5] rd[4-0]. kA64And3Rrl, // and [00010010] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0]. kA64And4rrro, // and [00001010] shift[23-22] [N=0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0]. kA64Asr3rrd, // asr [0001001100] immr[21-16] imms[15-10] rn[9-5] rd[4-0]. diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index 329bb1e770..2f1ae66bfc 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -131,6 +131,10 @@ const A64EncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP, "adr", "!0x, #!1d", kFixupAdr), + ENCODING_MAP(kA64Adrp2xd, NO_VARIANTS(0x90000000), + kFmtRegX, 4, 0, kFmtImm21, -1, -1, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP, + "adrp", "!0x, #!1d", kFixupLabel), ENCODING_MAP(WIDE(kA64And3Rrl), SF_VARIANTS(0x12000000), kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, @@ -682,7 +686,9 @@ void Arm64Mir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) #define PADDING_NOP (UINT32_C(0xd503201f)) uint8_t* Arm64Mir2Lir::EncodeLIRs(uint8_t* write_pos, LIR* lir) { + uint8_t* const write_buffer = write_pos; for (; lir != nullptr; lir = NEXT_LIR(lir)) { + lir->offset = (write_pos - write_buffer); bool opcode_is_wide = IS_WIDE(lir->opcode); A64Opcode opcode = UNWIDE(lir->opcode); diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 823cb60d97..6b47bba884 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -23,10 +23,12 @@ #include "dex/mir_graph.h" #include "dex/quick/mir_to_lir-inl.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "gc/accounting/card_table.h" #include "entrypoints/quick/quick_entrypoints.h" #include "mirror/art_method.h" #include "mirror/object_array-inl.h" +#include "utils/dex_cache_arrays_layout-inl.h" namespace art { @@ -310,8 +312,6 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, kArm64); - NewLIR0(kPseudoMethodEntry); - const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm64); const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes; bool generate_explicit_stack_overflow_check = large_frame || @@ -399,9 +399,6 @@ void Arm64Mir2Lir::GenExitSequence() { */ LockTemp(rs_x0); LockTemp(rs_x1); - - NewLIR0(kPseudoMethodExit); - UnspillRegs(rs_sp, core_spill_mask_, fp_spill_mask_, frame_size_); // Finally return. @@ -438,13 +435,13 @@ static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& tar * Bit of a hack here - in the absence of a real scheduling pass, * emit the next instruction in static & direct invoke sequences. */ -static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info, - int state, const MethodReference& target_method, - uint32_t unused_idx, - uintptr_t direct_code, uintptr_t direct_method, - InvokeType type) { +int Arm64Mir2Lir::Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info, + int state, const MethodReference& target_method, + uint32_t unused_idx, + uintptr_t direct_code, uintptr_t direct_method, + InvokeType type) { UNUSED(info, unused_idx); - Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); + Arm64Mir2Lir* cg = static_cast<Arm64Mir2Lir*>(cu->cg.get()); if (direct_code != 0 && direct_method != 0) { switch (state) { case 0: // Get the current Method* [sets kArg0] @@ -465,17 +462,24 @@ static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info, return -1; } } else { + bool use_pc_rel = cg->CanUseOpPcRelDexCacheArrayLoad(); RegStorage arg0_ref = cg->TargetReg(kArg0, kRef); switch (state) { case 0: // Get the current Method* [sets kArg0] // TUNING: we can save a reg copy if Method* has been promoted. - cg->LoadCurrMethodDirect(arg0_ref); - break; + if (!use_pc_rel) { + cg->LoadCurrMethodDirect(arg0_ref); + break; + } + ++state; + FALLTHROUGH_INTENDED; case 1: // Get method->dex_cache_resolved_methods_ - cg->LoadRefDisp(arg0_ref, - mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), - arg0_ref, - kNotVolatile); + if (!use_pc_rel) { + cg->LoadRefDisp(arg0_ref, + mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), + arg0_ref, + kNotVolatile); + } // Set up direct code if known. if (direct_code != 0) { if (direct_code != static_cast<uintptr_t>(-1)) { @@ -487,14 +491,23 @@ static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info, cg->LoadCodeAddress(target_method, type, kInvokeTgt); } } - break; + if (!use_pc_rel || direct_code != 0) { + break; + } + ++state; + FALLTHROUGH_INTENDED; case 2: // Grab target method* CHECK_EQ(cu->dex_file, target_method.dex_file); - cg->LoadRefDisp(arg0_ref, - mirror::ObjectArray<mirror::Object>::OffsetOfElement( - target_method.dex_method_index).Int32Value(), - arg0_ref, - kNotVolatile); + if (!use_pc_rel) { + cg->LoadRefDisp(arg0_ref, + mirror::ObjectArray<mirror::Object>::OffsetOfElement( + target_method.dex_method_index).Int32Value(), + arg0_ref, + kNotVolatile); + } else { + size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index); + cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, arg0_ref); + } break; case 3: // Grab the code from the method* if (direct_code == 0) { diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index 54fd46de0e..8184f02287 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -78,6 +78,9 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage) void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE; + bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE; + void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) OVERRIDE; + LIR* OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg, RegStorage base_reg, int offset, int check_value, LIR* target, LIR** compare) OVERRIDE; @@ -393,9 +396,16 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_div, int flags); + static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info, + int state, const MethodReference& target_method, + uint32_t unused_idx, + uintptr_t direct_code, uintptr_t direct_method, + InvokeType type); + static const A64EncodingMap EncodingMap[kA64Last]; ArenaVector<LIR*> call_method_insns_; + ArenaVector<LIR*> dex_cache_access_insns_; int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE; }; diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index 2372ccc527..a9d9f3d463 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -943,6 +943,23 @@ void Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { lir->target = target; } +bool Arm64Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const { + return dex_cache_arrays_layout_.Valid(); +} + +void Arm64Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, + RegStorage r_dest) { + LIR* adrp = NewLIR2(kA64Adrp2xd, r_dest.GetReg(), 0); + adrp->operands[2] = WrapPointer(dex_file); + adrp->operands[3] = offset; + adrp->operands[4] = WrapPointer(adrp); + dex_cache_access_insns_.push_back(adrp); + LIR* ldr = LoadBaseDisp(r_dest, 0, r_dest, kReference, kNotVolatile); + ldr->operands[4] = adrp->operands[4]; + ldr->flags.fixup = kFixupLabel; + dex_cache_access_insns_.push_back(ldr); +} + LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) { UNUSED(r_base, count); LOG(FATAL) << "Unexpected use of OpVldm for Arm64"; diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index 09a34bf022..c5c0dc5447 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -606,7 +606,8 @@ RegisterClass Arm64Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volat Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) : Mir2Lir(cu, mir_graph, arena), - call_method_insns_(arena->Adapter()) { + call_method_insns_(arena->Adapter()), + dex_cache_access_insns_(arena->Adapter()) { // Sanity check - make sure encoding map lines up. for (int i = 0; i < kA64Last; i++) { DCHECK_EQ(UNWIDE(Arm64Mir2Lir::EncodingMap[i].opcode), i) @@ -846,8 +847,9 @@ RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(ShortyArg arg) { } void Arm64Mir2Lir::InstallLiteralPools() { + patches_.reserve(call_method_insns_.size() + dex_cache_access_insns_.size()); + // PC-relative calls to methods. - patches_.reserve(call_method_insns_.size()); for (LIR* p : call_method_insns_) { DCHECK_EQ(p->opcode, kA64Bl1t); uint32_t target_method_idx = p->operands[1]; @@ -856,6 +858,18 @@ void Arm64Mir2Lir::InstallLiteralPools() { target_dex_file, target_method_idx)); } + // PC-relative references to dex cache arrays. + for (LIR* p : dex_cache_access_insns_) { + DCHECK(p->opcode == kA64Adrp2xd || p->opcode == kA64Ldr3rXD); + const LIR* adrp = UnwrapPointer<LIR>(p->operands[4]); + DCHECK_EQ(adrp->opcode, kA64Adrp2xd); + const DexFile* dex_file = UnwrapPointer<DexFile>(adrp->operands[2]); + uint32_t offset = adrp->operands[3]; + DCHECK(!p->flags.is_nop); + DCHECK(!adrp->flags.is_nop); + patches_.push_back(LinkerPatch::DexCacheArrayPatch(p->offset, dex_file, adrp->offset, offset)); + } + // And do the normal processing. Mir2Lir::InstallLiteralPools(); } diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index df72830801..483a5d06cc 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -29,6 +29,7 @@ #include "dex/quick/dex_file_to_method_inliner_map.h" #include "dex/verification_results.h" #include "dex/verified_method.h" +#include "utils/dex_cache_arrays_layout-inl.h" #include "verifier/dex_gc_map.h" #include "verifier/method_verifier.h" #include "vmap_table.h" @@ -202,12 +203,17 @@ void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) { /* Handle pseudo-ops individually, and all regular insns as a group */ switch (lir->opcode) { - case kPseudoMethodEntry: - LOG(INFO) << "-------- method entry " - << PrettyMethod(cu_->method_idx, *cu_->dex_file); + case kPseudoPrologueBegin: + LOG(INFO) << "-------- PrologueBegin"; break; - case kPseudoMethodExit: - LOG(INFO) << "-------- Method_Exit"; + case kPseudoPrologueEnd: + LOG(INFO) << "-------- PrologueEnd"; + break; + case kPseudoEpilogueBegin: + LOG(INFO) << "-------- EpilogueBegin"; + break; + case kPseudoEpilogueEnd: + LOG(INFO) << "-------- EpilogueEnd"; break; case kPseudoBarrier: LOG(INFO) << "-------- BARRIER"; @@ -266,8 +272,9 @@ void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) { lir, base_addr)); std::string op_operands(BuildInsnString(GetTargetInstFmt(lir->opcode), lir, base_addr)); - LOG(INFO) << StringPrintf("%5p: %-9s%s%s", + LOG(INFO) << StringPrintf("%5p|0x%02x: %-9s%s%s", base_addr + offset, + lir->dalvik_offset, op_name.c_str(), op_operands.c_str(), lir->flags.is_nop ? "(nop)" : ""); } @@ -712,14 +719,17 @@ void Mir2Lir::CreateMappingTables() { DCHECK_EQ(static_cast<size_t>(write_pos - &encoded_mapping_table_[0]), hdr_data_size); uint8_t* write_pos2 = write_pos + pc2dex_data_size; + bool is_in_prologue_or_epilogue = false; pc2dex_offset = 0u; pc2dex_dalvik_offset = 0u; dex2pc_offset = 0u; dex2pc_dalvik_offset = 0u; for (LIR* tgt_lir = first_lir_insn_; tgt_lir != nullptr; tgt_lir = NEXT_LIR(tgt_lir)) { - if (generate_src_map && !tgt_lir->flags.is_nop) { - src_mapping_table_.push_back(SrcMapElem({tgt_lir->offset, - static_cast<int32_t>(tgt_lir->dalvik_offset)})); + if (generate_src_map && !tgt_lir->flags.is_nop && tgt_lir->opcode >= 0) { + if (!is_in_prologue_or_epilogue) { + src_mapping_table_.push_back(SrcMapElem({tgt_lir->offset, + static_cast<int32_t>(tgt_lir->dalvik_offset)})); + } } if (!tgt_lir->flags.is_nop && (tgt_lir->opcode == kPseudoSafepointPC)) { DCHECK(pc2dex_offset <= tgt_lir->offset); @@ -737,6 +747,12 @@ void Mir2Lir::CreateMappingTables() { dex2pc_offset = tgt_lir->offset; dex2pc_dalvik_offset = tgt_lir->dalvik_offset; } + if (tgt_lir->opcode == kPseudoPrologueBegin || tgt_lir->opcode == kPseudoEpilogueBegin) { + is_in_prologue_or_epilogue = true; + } + if (tgt_lir->opcode == kPseudoPrologueEnd || tgt_lir->opcode == kPseudoEpilogueEnd) { + is_in_prologue_or_epilogue = false; + } } DCHECK_EQ(static_cast<size_t>(write_pos - &encoded_mapping_table_[0]), hdr_data_size + pc2dex_data_size); @@ -1053,6 +1069,7 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena mem_ref_type_(ResourceMask::kHeapRef), mask_cache_(arena), safepoints_(arena->Adapter()), + dex_cache_arrays_layout_(cu->compiler_driver->GetDexCacheArraysLayout(cu->dex_file)), in_to_reg_storage_mapping_(arena) { switch_tables_.reserve(4); fill_array_data_.reserve(4); @@ -1304,6 +1321,17 @@ void Mir2Lir::LoadClassType(const DexFile& dex_file, uint32_t type_idx, OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target); } +bool Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const { + return false; +} + +void Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file ATTRIBUTE_UNUSED, + int offset ATTRIBUTE_UNUSED, + RegStorage r_dest ATTRIBUTE_UNUSED) { + LOG(FATAL) << "No generic implementation."; + UNREACHABLE(); +} + std::vector<uint8_t>* Mir2Lir::ReturnFrameDescriptionEntry() { // Default case is to do nothing. return nullptr; diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 2bcaaca822..1813e0939e 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -24,12 +24,14 @@ #include "dex/mir_graph.h" #include "dex/quick/arm/arm_lir.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "entrypoints/quick/quick_entrypoints.h" #include "mirror/array.h" #include "mirror/object_array-inl.h" #include "mirror/object-inl.h" #include "mirror/object_reference.h" #include "utils.h" +#include "utils/dex_cache_arrays_layout-inl.h" #include "verifier/method_verifier.h" namespace art { @@ -56,6 +58,42 @@ ALWAYS_INLINE static inline bool ForceSlowTypePath(CompilationUnit* cu) { return (cu->enable_debug & (1 << kDebugSlowTypePath)) != 0; } +void Mir2Lir::GenIfNullUseHelperImmMethod( + RegStorage r_result, QuickEntrypointEnum trampoline, int imm, RegStorage r_method) { + class CallHelperImmMethodSlowPath : public LIRSlowPath { + public: + CallHelperImmMethodSlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, + QuickEntrypointEnum trampoline_in, int imm_in, + RegStorage r_method_in, RegStorage r_result_in) + : LIRSlowPath(m2l, fromfast, cont), trampoline_(trampoline_in), + imm_(imm_in), r_method_(r_method_in), r_result_(r_result_in) { + } + + void Compile() { + GenerateTargetLabel(); + if (r_method_.Valid()) { + m2l_->CallRuntimeHelperImmReg(trampoline_, imm_, r_method_, true); + } else { + m2l_->CallRuntimeHelperImmMethod(trampoline_, imm_, true); + } + m2l_->OpRegCopy(r_result_, m2l_->TargetReg(kRet0, kRef)); + m2l_->OpUnconditionalBranch(cont_); + } + + private: + QuickEntrypointEnum trampoline_; + const int imm_; + const RegStorage r_method_; + const RegStorage r_result_; + }; + + LIR* branch = OpCmpImmBranch(kCondEq, r_result, 0, NULL); + LIR* cont = NewLIR0(kPseudoTargetLabel); + + AddSlowPath(new (arena_) CallHelperImmMethodSlowPath(this, branch, cont, trampoline, imm, + r_method, r_result)); +} + /* * Generate a kPseudoBarrier marker to indicate the boundary of special * blocks. @@ -1022,64 +1060,41 @@ void Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl } void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) { - RegLocation rl_method = LoadCurrMethod(); - CheckRegLocation(rl_method); - RegStorage res_reg = AllocTempRef(); + RegLocation rl_result; if (!cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file, type_idx)) { // Call out to helper which resolves type and verifies access. // Resolved type returned in kRet0. - CallRuntimeHelperImmReg(kQuickInitializeTypeAndVerifyAccess, type_idx, rl_method.reg, true); - RegLocation rl_result = GetReturn(kRefReg); - StoreValue(rl_dest, rl_result); + CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true); + rl_result = GetReturn(kRefReg); } else { - RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true); - // We're don't need access checks, load type from dex cache - int32_t dex_cache_offset = - mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(); - LoadRefDisp(rl_method.reg, dex_cache_offset, res_reg, kNotVolatile); - int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); - LoadRefDisp(res_reg, offset_of_type, rl_result.reg, kNotVolatile); + rl_result = EvalLoc(rl_dest, kRefReg, true); + // We don't need access checks, load type from dex cache + RegStorage r_method = RegStorage::InvalidReg(); + if (CanUseOpPcRelDexCacheArrayLoad()) { + size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx); + OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, rl_result.reg); + } else { + RegLocation rl_method = LoadCurrMethod(); + CheckRegLocation(rl_method); + r_method = rl_method.reg; + int32_t dex_cache_offset = + mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(); + RegStorage res_reg = AllocTempRef(); + LoadRefDisp(r_method, dex_cache_offset, res_reg, kNotVolatile); + int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); + LoadRefDisp(res_reg, offset_of_type, rl_result.reg, kNotVolatile); + FreeTemp(res_reg); + } if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx) || ForceSlowTypePath(cu_)) { // Slow path, at runtime test if type is null and if so initialize FlushAllRegs(); - LIR* branch = OpCmpImmBranch(kCondEq, rl_result.reg, 0, NULL); - LIR* cont = NewLIR0(kPseudoTargetLabel); - - // Object to generate the slow path for class resolution. - class SlowPath : public LIRSlowPath { - public: - SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont_in, const int type_idx_in, - const RegLocation& rl_method_in, const RegLocation& rl_result_in) - : LIRSlowPath(m2l, fromfast, cont_in), - type_idx_(type_idx_in), rl_method_(rl_method_in), rl_result_(rl_result_in) { - } - - void Compile() { - GenerateTargetLabel(); - - m2l_->CallRuntimeHelperImmReg(kQuickInitializeType, type_idx_, rl_method_.reg, true); - m2l_->OpRegCopy(rl_result_.reg, m2l_->TargetReg(kRet0, kRef)); - m2l_->OpUnconditionalBranch(cont_); - } - - private: - const int type_idx_; - const RegLocation rl_method_; - const RegLocation rl_result_; - }; - - // Add to list for future. - AddSlowPath(new (arena_) SlowPath(this, branch, cont, type_idx, rl_method, rl_result)); - - StoreValue(rl_dest, rl_result); - } else { - // Fast path, we're done - just store result - StoreValue(rl_dest, rl_result); + GenIfNullUseHelperImmMethod(rl_result.reg, kQuickInitializeType, type_idx, r_method); } } + StoreValue(rl_dest, rl_result); } void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) { @@ -1092,64 +1107,42 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) { FlushAllRegs(); LockCallTemps(); // Using explicit registers - // If the Method* is already in a register, we can save a copy. - RegLocation rl_method = mir_graph_->GetMethodLoc(); - RegStorage r_method; - if (rl_method.location == kLocPhysReg) { - // A temp would conflict with register use below. - DCHECK(!IsTemp(rl_method.reg)); - r_method = rl_method.reg; - } else { - r_method = TargetReg(kArg2, kRef); - LoadCurrMethodDirect(r_method); - } - // Method to declaring class. - LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), - TargetReg(kArg0, kRef), kNotVolatile); - // Declaring class to dex cache strings. - LoadRefDisp(TargetReg(kArg0, kRef), mirror::Class::DexCacheStringsOffset().Int32Value(), - TargetReg(kArg0, kRef), kNotVolatile); - // Might call out to helper, which will return resolved string in kRet0 - LoadRefDisp(TargetReg(kArg0, kRef), offset_of_string, TargetReg(kRet0, kRef), kNotVolatile); - LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0, kRef), 0, NULL); - LIR* cont = NewLIR0(kPseudoTargetLabel); - - { - // Object to generate the slow path for string resolution. - class SlowPath : public LIRSlowPath { - public: - SlowPath(Mir2Lir* m2l, LIR* fromfast_in, LIR* cont_in, RegStorage r_method_in, - int32_t string_idx_in) - : LIRSlowPath(m2l, fromfast_in, cont_in), - r_method_(r_method_in), string_idx_(string_idx_in) { - } - - void Compile() { - GenerateTargetLabel(); - m2l_->CallRuntimeHelperImmReg(kQuickResolveString, string_idx_, r_method_, true); - m2l_->OpUnconditionalBranch(cont_); - } - - private: - const RegStorage r_method_; - const int32_t string_idx_; - }; - - AddSlowPath(new (arena_) SlowPath(this, fromfast, cont, r_method, string_idx)); + RegStorage ret0 = TargetReg(kRet0, kRef); + RegStorage r_method = RegStorage::InvalidReg(); + if (CanUseOpPcRelDexCacheArrayLoad()) { + size_t offset = dex_cache_arrays_layout_.StringOffset(string_idx); + OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, ret0); + } else { + r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef)); + // Method to declaring class. + RegStorage arg0 = TargetReg(kArg0, kRef); + LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), + arg0, kNotVolatile); + // Declaring class to dex cache strings. + LoadRefDisp(arg0, mirror::Class::DexCacheStringsOffset().Int32Value(), arg0, kNotVolatile); + + LoadRefDisp(arg0, offset_of_string, ret0, kNotVolatile); } + GenIfNullUseHelperImmMethod(ret0, kQuickResolveString, string_idx, r_method); GenBarrier(); StoreValue(rl_dest, GetReturn(kRefReg)); } else { - RegLocation rl_method = LoadCurrMethod(); - RegStorage res_reg = AllocTempRef(); RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true); - LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), res_reg, - kNotVolatile); - LoadRefDisp(res_reg, mirror::Class::DexCacheStringsOffset().Int32Value(), res_reg, - kNotVolatile); - LoadRefDisp(res_reg, offset_of_string, rl_result.reg, kNotVolatile); + if (CanUseOpPcRelDexCacheArrayLoad()) { + size_t offset = dex_cache_arrays_layout_.StringOffset(string_idx); + OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, rl_result.reg); + } else { + RegLocation rl_method = LoadCurrMethod(); + RegStorage res_reg = AllocTempRef(); + LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), res_reg, + kNotVolatile); + LoadRefDisp(res_reg, mirror::Class::DexCacheStringsOffset().Int32Value(), res_reg, + kNotVolatile); + LoadRefDisp(res_reg, offset_of_string, rl_result.reg, kNotVolatile); + FreeTemp(res_reg); + } StoreValue(rl_dest, rl_result); } } @@ -1224,14 +1217,20 @@ void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, Re RegStorage check_class = AllocTypedTemp(false, kRefReg); RegStorage object_class = AllocTypedTemp(false, kRefReg); - LoadCurrMethodDirect(check_class); if (use_declaring_class) { - LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), check_class, + RegStorage r_method = LoadCurrMethodWithHint(check_class); + LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), check_class, + kNotVolatile); + LoadRefDisp(object.reg, mirror::Object::ClassOffset().Int32Value(), object_class, kNotVolatile); + } else if (CanUseOpPcRelDexCacheArrayLoad()) { + size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx); + OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, check_class); LoadRefDisp(object.reg, mirror::Object::ClassOffset().Int32Value(), object_class, kNotVolatile); } else { - LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), + RegStorage r_method = LoadCurrMethodWithHint(check_class); + LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), check_class, kNotVolatile); LoadRefDisp(object.reg, mirror::Object::ClassOffset().Int32Value(), object_class, kNotVolatile); @@ -1267,20 +1266,19 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know FlushAllRegs(); // May generate a call - use explicit registers LockCallTemps(); - RegStorage method_reg = TargetReg(kArg1, kRef); - LoadCurrMethodDirect(method_reg); // kArg1 <= current Method* RegStorage class_reg = TargetReg(kArg2, kRef); // kArg2 will hold the Class* RegStorage ref_reg = TargetReg(kArg0, kRef); // kArg0 will hold the ref. RegStorage ret_reg = GetReturn(kRefReg).reg; if (needs_access_check) { // Check we have access to type_idx and if not throw IllegalAccessError, // returns Class* in kArg0 - CallRuntimeHelperImm(kQuickInitializeTypeAndVerifyAccess, type_idx, true); + CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true); OpRegCopy(class_reg, ret_reg); // Align usage with fast path LoadValueDirectFixed(rl_src, ref_reg); // kArg0 <= ref } else if (use_declaring_class) { + RegStorage r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef)); LoadValueDirectFixed(rl_src, ref_reg); // kArg0 <= ref - LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), + LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), class_reg, kNotVolatile); } else { if (can_assume_type_is_in_dex_cache) { @@ -1288,42 +1286,23 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know LoadValueDirectFixed(rl_src, ref_reg); // kArg0 <= ref } - // Load dex cache entry into class_reg (kArg2) - LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), - class_reg, kNotVolatile); - int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); - LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile); + RegStorage r_method = RegStorage::InvalidReg(); + if (CanUseOpPcRelDexCacheArrayLoad()) { + size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx); + OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, class_reg); + } else { + r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef)); + // Load dex cache entry into class_reg (kArg2) + LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), + class_reg, kNotVolatile); + int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); + LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile); + } if (!can_assume_type_is_in_dex_cache) { - LIR* slow_path_branch = OpCmpImmBranch(kCondEq, class_reg, 0, NULL); - LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); + GenIfNullUseHelperImmMethod(class_reg, kQuickInitializeType, type_idx, r_method); // Should load value here. LoadValueDirectFixed(rl_src, ref_reg); // kArg0 <= ref - - class InitTypeSlowPath : public Mir2Lir::LIRSlowPath { - public: - InitTypeSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont, uint32_t type_idx_in, - RegLocation rl_src_in) - : LIRSlowPath(m2l, branch, cont), type_idx_(type_idx_in), - rl_src_(rl_src_in) { - } - - void Compile() OVERRIDE { - GenerateTargetLabel(); - - m2l_->CallRuntimeHelperImm(kQuickInitializeType, type_idx_, true); - m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kRef), - m2l_->TargetReg(kRet0, kRef)); // Align usage with fast path - m2l_->OpUnconditionalBranch(cont_); - } - - private: - uint32_t type_idx_; - RegLocation rl_src_; - }; - - AddSlowPath(new (arena_) InitTypeSlowPath(this, slow_path_branch, slow_path_target, - type_idx, rl_src)); } } /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result */ @@ -1426,55 +1405,34 @@ void Mir2Lir::GenCheckCast(int opt_flags, uint32_t insn_idx, uint32_t type_idx, FlushAllRegs(); // May generate a call - use explicit registers LockCallTemps(); - RegStorage method_reg = TargetReg(kArg1, kRef); - LoadCurrMethodDirect(method_reg); // kArg1 <= current Method* RegStorage class_reg = TargetReg(kArg2, kRef); // kArg2 will hold the Class* if (needs_access_check) { // Check we have access to type_idx and if not throw IllegalAccessError, // returns Class* in kRet0 // InitializeTypeAndVerifyAccess(idx, method) - CallRuntimeHelperImm(kQuickInitializeTypeAndVerifyAccess, type_idx, true); + CallRuntimeHelperImmMethod(kQuickInitializeTypeAndVerifyAccess, type_idx, true); OpRegCopy(class_reg, TargetReg(kRet0, kRef)); // Align usage with fast path } else if (use_declaring_class) { + RegStorage method_reg = LoadCurrMethodWithHint(TargetReg(kArg1, kRef)); LoadRefDisp(method_reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), class_reg, kNotVolatile); } else { // Load dex cache entry into class_reg (kArg2) - LoadRefDisp(method_reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), - class_reg, kNotVolatile); - int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); - LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile); + RegStorage r_method = RegStorage::InvalidReg(); + if (CanUseOpPcRelDexCacheArrayLoad()) { + size_t offset = dex_cache_arrays_layout_.TypeOffset(type_idx); + OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, class_reg); + } else { + r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef)); + + LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), + class_reg, kNotVolatile); + int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); + LoadRefDisp(class_reg, offset_of_type, class_reg, kNotVolatile); + } if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx)) { // Need to test presence of type in dex cache at runtime - LIR* hop_branch = OpCmpImmBranch(kCondEq, class_reg, 0, NULL); - LIR* cont = NewLIR0(kPseudoTargetLabel); - - // Slow path to initialize the type. Executed if the type is NULL. - class SlowPath : public LIRSlowPath { - public: - SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont_in, const int type_idx_in, - const RegStorage class_reg_in) - : LIRSlowPath(m2l, fromfast, cont_in), - type_idx_(type_idx_in), class_reg_(class_reg_in) { - } - - void Compile() { - GenerateTargetLabel(); - - // Call out to helper, which will return resolved type in kArg0 - // InitializeTypeFromCode(idx, method) - m2l_->CallRuntimeHelperImmReg(kQuickInitializeType, type_idx_, - m2l_->TargetReg(kArg1, kRef), true); - m2l_->OpRegCopy(class_reg_, m2l_->TargetReg(kRet0, kRef)); // Align usage with fast path - m2l_->OpUnconditionalBranch(cont_); - } - - public: - const int type_idx_; - const RegStorage class_reg_; - }; - - AddSlowPath(new (arena_) SlowPath(this, hop_branch, cont, type_idx, class_reg)); + GenIfNullUseHelperImmMethod(class_reg, kQuickInitializeType, type_idx, r_method); } } // At this point, class_reg (kArg2) has class diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 2d41ba1795..e747239894 100755 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -24,6 +24,7 @@ #include "dex/quick/dex_file_to_method_inliner_map.h" #include "dex_file-inl.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "entrypoints/quick/quick_entrypoints.h" #include "invoke_type.h" #include "mirror/array.h" diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc index b71691f20a..54e5742837 100644 --- a/compiler/dex/quick/gen_loadstore.cc +++ b/compiler/dex/quick/gen_loadstore.cc @@ -340,6 +340,20 @@ void Mir2Lir::LoadCurrMethodDirect(RegStorage r_tgt) { LoadValueDirectFixed(mir_graph_->GetMethodLoc(), r_tgt); } +RegStorage Mir2Lir::LoadCurrMethodWithHint(RegStorage r_hint) { + // If the method is promoted to a register, return that register, otherwise load it to r_hint. + // (Replacement for LoadCurrMethod() usually used when LockCallTemps() is in effect.) + DCHECK(r_hint.Valid()); + RegLocation rl_method = mir_graph_->GetMethodLoc(); + if (rl_method.location == kLocPhysReg) { + DCHECK(!IsTemp(rl_method.reg)); + return rl_method.reg; + } else { + LoadCurrMethodDirect(r_hint); + return r_hint; + } +} + RegLocation Mir2Lir::LoadCurrMethod() { return LoadValue(mir_graph_->GetMethodLoc(), kRefReg); } diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc index e5738998a0..6cdf56773e 100644 --- a/compiler/dex/quick/local_optimizations.cc +++ b/compiler/dex/quick/local_optimizations.cc @@ -493,15 +493,14 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { /* Found a slot to hoist to */ if (slot >= 0) { LIR* cur_lir = prev_inst_list[slot]; - LIR* new_load_lir = - static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR)); - *new_load_lir = *this_lir; + LIR* prev_lir = PREV_LIR(this_lir); + UnlinkLIR(this_lir); /* * Insertion is guaranteed to succeed since check_lir * is never the first LIR on the list */ - InsertLIRBefore(cur_lir, new_load_lir); - NopLIR(this_lir); + InsertLIRBefore(cur_lir, this_lir); + this_lir = prev_lir; // Continue the loop with the next LIR. } } } diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc index de66b35418..c932df6dc9 100644 --- a/compiler/dex/quick/mips/call_mips.cc +++ b/compiler/dex/quick/mips/call_mips.cc @@ -275,7 +275,6 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) */ skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, target); - NewLIR0(kPseudoMethodEntry); RegStorage check_reg = AllocPtrSizeTemp(); RegStorage new_sp = AllocPtrSizeTemp(); const RegStorage rs_sp = TargetPtrReg(kSp); @@ -345,7 +344,6 @@ void MipsMir2Lir::GenExitSequence() { LockTemp(TargetPtrReg(kRet0)); LockTemp(TargetPtrReg(kRet1)); - NewLIR0(kPseudoMethodExit); UnSpillCoreRegs(); OpReg(kOpBx, TargetPtrReg(kLr)); } diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 0b480a09c6..ed8e21e817 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -1250,10 +1250,14 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { if (bb->block_type == kEntryBlock) { ResetRegPool(); int start_vreg = mir_graph_->GetFirstInVR(); + AppendLIR(NewLIR0(kPseudoPrologueBegin)); GenEntrySequence(&mir_graph_->reg_location_[start_vreg], mir_graph_->GetMethodLoc()); + AppendLIR(NewLIR0(kPseudoPrologueEnd)); } else if (bb->block_type == kExitBlock) { ResetRegPool(); + AppendLIR(NewLIR0(kPseudoEpilogueBegin)); GenExitSequence(); + AppendLIR(NewLIR0(kPseudoEpilogueEnd)); } for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index cca4e5a30a..bb8fbae8f6 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -32,6 +32,7 @@ #include "leb128.h" #include "safe_map.h" #include "utils/array_ref.h" +#include "utils/dex_cache_arrays_layout.h" #include "utils/stack_checks.h" namespace art { @@ -956,6 +957,7 @@ class Mir2Lir { // Shared by all targets - implemented in gen_loadstore.cc. RegLocation LoadCurrMethod(); void LoadCurrMethodDirect(RegStorage r_tgt); + RegStorage LoadCurrMethodWithHint(RegStorage r_hint); virtual LIR* LoadConstant(RegStorage r_dest, int value); // Natural word size. LIR* LoadWordDisp(RegStorage r_base, int displacement, RegStorage r_dest) { @@ -1093,6 +1095,18 @@ class Mir2Lir { virtual void LoadClassType(const DexFile& dex_file, uint32_t type_idx, SpecialTargetRegister symbolic_reg); + // TODO: Support PC-relative dex cache array loads on all platforms and + // replace CanUseOpPcRelDexCacheArrayLoad() with dex_cache_arrays_layout_.Valid(). + virtual bool CanUseOpPcRelDexCacheArrayLoad() const; + + /* + * @brief Load an element of one of the dex cache arrays. + * @param dex_file the dex file associated with the target dex cache. + * @param offset the offset of the element in the fixed dex cache arrays' layout. + * @param r_dest the register where to load the element. + */ + virtual void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest); + // Routines that work for the generic case, but may be overriden by target. /* * @brief Compare memory to immediate, and branch if condition true. @@ -1596,7 +1610,6 @@ class Mir2Lir { */ virtual bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special); - protected: void ClobberBody(RegisterInfo* p); void SetCurrentDexPc(DexOffset dexpc) { current_dalvik_offset_ = dexpc; @@ -1669,6 +1682,16 @@ class Mir2Lir { */ bool GenSpecialIdentity(MIR* mir, const InlineMethod& special); + /** + * @brief Generate code to check if result is null and, if it is, call helper to load it. + * @param r_result the result register. + * @param trampoline the helper to call in slow path. + * @param imm the immediate passed to the helper. + * @param r_method the register with ArtMethod* if available, otherwise RegStorage::Invalid(). + */ + void GenIfNullUseHelperImmMethod( + RegStorage r_result, QuickEntrypointEnum trampoline, int imm, RegStorage r_method); + void AddDivZeroCheckSlowPath(LIR* branch); // Copy arg0 and arg1 to kArg0 and kArg1 safely, possibly using @@ -1815,7 +1838,9 @@ class Mir2Lir { // Record the MIR that generated a given safepoint (nullptr for prologue safepoints). ArenaVector<std::pair<LIR*, MIR*>> safepoints_; - protected: + // The layout of the cu_->dex_file's dex cache arrays for PC-relative addressing. + const DexCacheArraysLayout dex_cache_arrays_layout_; + // ABI support class ShortyArg { public: diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc index 6d289843e8..8baafc7fd2 100644 --- a/compiler/dex/quick/quick_compiler.cc +++ b/compiler/dex/quick/quick_compiler.cc @@ -634,6 +634,12 @@ CompiledMethod* QuickCompiler::Compile(const DexFile::CodeItem* code_item, instruction_set = kThumb2; } CompilationUnit cu(runtime->GetArenaPool(), instruction_set, driver, class_linker); + cu.dex_file = &dex_file; + cu.class_def_idx = class_def_idx; + cu.method_idx = method_idx; + cu.access_flags = access_flags; + cu.invoke_type = invoke_type; + cu.shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx)); CHECK((cu.instruction_set == kThumb2) || (cu.instruction_set == kArm64) || diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index abee87254b..fd23692d24 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -21,9 +21,11 @@ #include "base/logging.h" #include "dex/quick/mir_to_lir-inl.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "gc/accounting/card_table.h" #include "mirror/art_method.h" #include "mirror/object_array-inl.h" +#include "utils/dex_cache_arrays_layout-inl.h" #include "x86_lir.h" namespace art { @@ -185,7 +187,6 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { stack_decrement_ = OpRegImm(kOpSub, rs_rSP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set)); - NewLIR0(kPseudoMethodEntry); /* Spill core callee saves */ SpillCoreRegs(); SpillFPRegs(); @@ -258,7 +259,6 @@ void X86Mir2Lir::GenExitSequence() { LockTemp(rs_rX86_RET0); LockTemp(rs_rX86_RET1); - NewLIR0(kPseudoMethodExit); UnSpillCoreRegs(); UnSpillFPRegs(); /* Remove frame except for return address */ @@ -321,13 +321,13 @@ void X86Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) { * Bit of a hack here - in the absence of a real scheduling pass, * emit the next instruction in static & direct invoke sequences. */ -static int X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info, - int state, const MethodReference& target_method, - uint32_t, - uintptr_t direct_code, uintptr_t direct_method, - InvokeType type) { +int X86Mir2Lir::X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info, + int state, const MethodReference& target_method, + uint32_t, + uintptr_t direct_code, uintptr_t direct_method, + InvokeType type) { UNUSED(info, direct_code); - Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); + X86Mir2Lir* cg = static_cast<X86Mir2Lir*>(cu->cg.get()); if (direct_method != 0) { switch (state) { case 0: // Get the current Method* [sets kArg0] @@ -345,6 +345,17 @@ static int X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info, default: return -1; } + } else if (cg->CanUseOpPcRelDexCacheArrayLoad()) { + switch (state) { + case 0: { + CHECK_EQ(cu->dex_file, target_method.dex_file); + size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index); + cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, cg->TargetReg(kArg0, kRef)); + break; + } + default: + return -1; + } } else { RegStorage arg0_ref = cg->TargetReg(kArg0, kRef); switch (state) { diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 040a8c4bef..758684e835 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -104,6 +104,9 @@ class X86Mir2Lir : public Mir2Lir { /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage) void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE; + bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE; + void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) OVERRIDE; + void GenImplicitNullCheck(RegStorage reg, int opt_flags) OVERRIDE; // Required for target - register utilities. @@ -952,6 +955,9 @@ class X86Mir2Lir : public Mir2Lir { // Instructions needing patching with PC relative code addresses. ArenaVector<LIR*> call_method_insns_; + // Instructions needing patching with PC relative code addresses. + ArenaVector<LIR*> dex_cache_access_insns_; + // Prologue decrement of stack pointer. LIR* stack_decrement_; @@ -992,6 +998,12 @@ class X86Mir2Lir : public Mir2Lir { void SwapBits(RegStorage result_reg, int shift, int32_t value); void SwapBits64(RegStorage result_reg, int shift, int64_t value); + static int X86NextSDCallInsn(CompilationUnit* cu, CallInfo* info, + int state, const MethodReference& target_method, + uint32_t, + uintptr_t direct_code, uintptr_t direct_method, + InvokeType type); + static const X86EncodingMap EncodingMap[kX86Last]; friend std::ostream& operator<<(std::ostream& os, const X86OpCode& rhs); diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 4eb626c14f..5def5c8bb0 100755 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -1324,14 +1324,16 @@ bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) { return true; } +// When we don't know the proper offset for the value, pick one that will force +// 4 byte offset. We will fix this up in the assembler or linker later to have +// the right value. +static constexpr int kDummy32BitOffset = 256; + void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { if (cu_->target64) { // We can do this directly using RIP addressing. - // We don't know the proper offset for the value, so pick one that will force - // 4 byte offset. We will fix this up in the assembler later to have the right - // value. ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); - LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, 256); + LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, kDummy32BitOffset); res->target = target; res->flags.fixup = kFixupLoad; return; @@ -1349,15 +1351,32 @@ void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { store_method_addr_used_ = true; // Load the proper value from the literal area. - // We don't know the proper offset for the value, so pick one that will force - // 4 byte offset. We will fix this up in the assembler later to have the right - // value. ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); - LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256); + LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), reg.GetReg(), kDummy32BitOffset); res->target = target; res->flags.fixup = kFixupLoad; } +bool X86Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const { + // TODO: Implement for 32-bit. + return cu_->target64 && dex_cache_arrays_layout_.Valid(); +} + +void X86Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, + RegStorage r_dest) { + if (cu_->target64) { + LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), kRIPReg, kDummy32BitOffset); + mov->flags.fixup = kFixupLabel; + mov->operands[3] = WrapPointer(dex_file); + mov->operands[4] = offset; + dex_cache_access_insns_.push_back(mov); + } else { + // TODO: Implement for 32-bit. + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); + } +} + LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) { UNUSED(r_base, count); LOG(FATAL) << "Unexpected use of OpVldm for x86"; diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index f128eb78a3..cad82a183e 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -829,6 +829,7 @@ X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* method_address_insns_(arena->Adapter()), class_type_address_insns_(arena->Adapter()), call_method_insns_(arena->Adapter()), + dex_cache_access_insns_(arena->Adapter()), stack_decrement_(nullptr), stack_increment_(nullptr), const_vectors_(nullptr) { method_address_insns_.reserve(100); @@ -1058,6 +1059,9 @@ void X86Mir2Lir::InstallLiteralPools() { } } + patches_.reserve(method_address_insns_.size() + class_type_address_insns_.size() + + call_method_insns_.size() + dex_cache_access_insns_.size()); + // Handle the fixups for methods. for (LIR* p : method_address_insns_) { DCHECK_EQ(p->opcode, kX86Mov32RI); @@ -1084,7 +1088,6 @@ void X86Mir2Lir::InstallLiteralPools() { } // And now the PC-relative calls to methods. - patches_.reserve(call_method_insns_.size()); for (LIR* p : call_method_insns_) { DCHECK_EQ(p->opcode, kX86CallI); uint32_t target_method_idx = p->operands[1]; @@ -1096,6 +1099,17 @@ void X86Mir2Lir::InstallLiteralPools() { target_dex_file, target_method_idx)); } + // PC-relative references to dex cache arrays. + for (LIR* p : dex_cache_access_insns_) { + DCHECK(p->opcode == kX86Mov32RM); + const DexFile* dex_file = UnwrapPointer<DexFile>(p->operands[3]); + uint32_t offset = p->operands[4]; + // The offset to patch is the last 4 bytes of the instruction. + int patch_offset = p->offset + p->flags.size - 4; + DCHECK(!p->flags.is_nop); + patches_.push_back(LinkerPatch::DexCacheArrayPatch(patch_offset, dex_file, p->offset, offset)); + } + // And do the normal processing. Mir2Lir::InstallLiteralPools(); } diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 100d49a99e..f52f50eda5 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -31,6 +31,7 @@ #include "base/timing_logger.h" #include "class_linker.h" #include "compiled_class.h" +#include "compiled_method.h" #include "compiler.h" #include "compiler_driver-inl.h" #include "dex_compilation_unit.h" @@ -62,6 +63,7 @@ #include "thread_pool.h" #include "trampolines/trampoline_compiler.h" #include "transaction.h" +#include "utils/dex_cache_arrays_layout-inl.h" #include "utils/swap_space.h" #include "verifier/method_verifier.h" #include "verifier/method_verifier-inl.h" @@ -1173,6 +1175,13 @@ uint32_t CompilerDriver::GetReferenceDisableFlagOffset() const { return klass->GetDisableIntrinsicFlagOffset().Uint32Value(); } +DexCacheArraysLayout CompilerDriver::GetDexCacheArraysLayout(const DexFile* dex_file) { + // Currently only image dex caches have fixed array layout. + return IsImage() && GetSupportBootImageFixup() + ? DexCacheArraysLayout(dex_file) + : DexCacheArraysLayout(); +} + void CompilerDriver::ProcessedInstanceField(bool resolved) { if (!resolved) { stats_->UnresolvedInstanceField(); @@ -2246,7 +2255,7 @@ void CompilerDriver::CompileMethod(Thread* self, const DexFile::CodeItem* code_i // Count non-relative linker patches. size_t non_relative_linker_patch_count = 0u; for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (patch.Type() != kLinkerPatchCallRelative) { + if (!patch.IsPcRelative()) { ++non_relative_linker_patch_count; } } @@ -2438,7 +2447,7 @@ std::string CompilerDriver::GetMemoryUsageString(bool extended) const { gc::Heap* const heap = runtime->GetHeap(); oss << "arena alloc=" << PrettySize(arena_pool->GetBytesAllocated()); oss << " java alloc=" << PrettySize(heap->GetBytesAllocated()); -#ifdef HAVE_MALLOC_H +#if defined(__BIONIC__) || defined(__GLIBC__) struct mallinfo info = mallinfo(); const size_t allocated_space = static_cast<size_t>(info.uordblks); const size_t free_space = static_cast<size_t>(info.fordblks); diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index b825293c33..efcaae4cdd 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -26,11 +26,8 @@ #include "base/mutex.h" #include "base/timing_logger.h" #include "class_reference.h" -#include "compiled_method.h" #include "compiler.h" #include "dex_file.h" -#include "dex/verified_method.h" -#include "driver/compiler_options.h" #include "invoke_type.h" #include "method_reference.h" #include "mirror/class.h" // For mirror::Class::Status. @@ -39,7 +36,9 @@ #include "runtime.h" #include "safe_map.h" #include "thread_pool.h" +#include "utils/array_ref.h" #include "utils/dedupe_set.h" +#include "utils/dex_cache_arrays_layout.h" #include "utils/swap_space.h" #include "utils.h" @@ -54,6 +53,7 @@ class MethodVerifier; } // namespace verifier class CompiledClass; +class CompiledMethod; class CompilerOptions; class DexCompilationUnit; class DexFileToMethodInlinerMap; @@ -62,6 +62,9 @@ class InstructionSetFeatures; class OatWriter; class ParallelCompilationManager; class ScopedObjectAccess; +template <class Allocator> class SrcMap; +class SrcMapElem; +using SwapSrcMap = SrcMap<SwapAllocator<SrcMapElem>>; template<class T> class Handle; class TimingLogger; class VerificationResults; @@ -318,6 +321,10 @@ class CompilerDriver { bool IsMethodsClassInitialized(mirror::Class* referrer_class, mirror::ArtMethod* resolved_method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + // Get the layout of dex cache arrays for a dex file. Returns invalid layout if the + // dex cache arrays don't have a fixed layout. + DexCacheArraysLayout GetDexCacheArraysLayout(const DexFile* dex_file); + void ProcessedInstanceField(bool resolved); void ProcessedStaticField(bool resolved, bool local); void ProcessedInvoke(InvokeType invoke_type, int flags); diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index e436f52db3..fc00c926b2 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -42,6 +42,11 @@ CompilerOptions::CompilerOptions() init_failure_output_(nullptr) { } +CompilerOptions::~CompilerOptions() { + // The destructor looks empty but it destroys a PassManagerOptions object. We keep it here + // because we don't want to include the PassManagerOptions definition from the header file. +} + CompilerOptions::CompilerOptions(CompilerFilter compiler_filter, size_t huge_method_threshold, size_t large_method_threshold, diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index d06ec278ab..f7ea385e19 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -53,6 +53,7 @@ class CompilerOptions FINAL { static const bool kDefaultIncludePatchInformation = false; CompilerOptions(); + ~CompilerOptions(); CompilerOptions(CompilerFilter compiler_filter, size_t huge_method_threshold, diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index a822b24cde..3ce19ab0df 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -21,8 +21,12 @@ #include "base/logging.h" #include "base/unix_file/fd_file.h" #include "buffered_output_stream.h" +#include "compiled_method.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "dwarf.h" +#include "dwarf/debug_frame_writer.h" +#include "dwarf/debug_line_writer.h" #include "elf_builder.h" #include "elf_file.h" #include "elf_utils.h" @@ -273,96 +277,8 @@ bool ElfWriterQuick<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn, return builder->Write(); } -class LineTableGenerator FINAL : public Leb128Encoder { - public: - LineTableGenerator(int line_base, int line_range, int opcode_base, - std::vector<uint8_t>* data, uintptr_t current_address, - size_t current_line) - : Leb128Encoder(data), line_base_(line_base), line_range_(line_range), - opcode_base_(opcode_base), current_address_(current_address), - current_line_(current_line), current_file_index_(0) {} - - void PutDelta(unsigned delta_addr, int delta_line) { - current_line_ += delta_line; - current_address_ += delta_addr; - - if (delta_line >= line_base_ && delta_line < line_base_ + line_range_) { - unsigned special_opcode = (delta_line - line_base_) + - (line_range_ * delta_addr) + opcode_base_; - if (special_opcode <= 255) { - PushByte(data_, special_opcode); - return; - } - } - - // generate standart opcode for address advance - if (delta_addr != 0) { - PushByte(data_, DW_LNS_advance_pc); - PushBackUnsigned(delta_addr); - } - - // generate standart opcode for line delta - if (delta_line != 0) { - PushByte(data_, DW_LNS_advance_line); - PushBackSigned(delta_line); - } - - // generate standart opcode for new LTN entry - PushByte(data_, DW_LNS_copy); - } - - void SetAddr(uintptr_t addr) { - if (current_address_ == addr) { - return; - } - - current_address_ = addr; - - PushByte(data_, 0); // extended opcode: - PushByte(data_, 1 + 4); // length: opcode_size + address_size - PushByte(data_, DW_LNE_set_address); - Push32(data_, addr); - } - - void SetLine(unsigned line) { - int delta_line = line - current_line_; - if (delta_line) { - current_line_ = line; - PushByte(data_, DW_LNS_advance_line); - PushBackSigned(delta_line); - } - } - - void SetFile(unsigned file_index) { - if (current_file_index_ != file_index) { - current_file_index_ = file_index; - PushByte(data_, DW_LNS_set_file); - PushBackUnsigned(file_index); - } - } - - void EndSequence() { - // End of Line Table Program - // 0(=ext), 1(len), DW_LNE_end_sequence - PushByte(data_, 0); - PushByte(data_, 1); - PushByte(data_, DW_LNE_end_sequence); - } - - private: - const int line_base_; - const int line_range_; - const int opcode_base_; - uintptr_t current_address_; - size_t current_line_; - unsigned current_file_index_; - - DISALLOW_COPY_AND_ASSIGN(LineTableGenerator); -}; - // TODO: rewriting it using DexFile::DecodeDebugInfo needs unneeded stuff. -static void GetLineInfoForJava(const uint8_t* dbgstream, const SwapSrcMap& pc2dex, - DefaultSrcMap* result, uint32_t start_pc = 0) { +static void GetLineInfoForJava(const uint8_t* dbgstream, DefaultSrcMap* dex2line) { if (dbgstream == nullptr) { return; } @@ -417,12 +333,7 @@ static void GetLineInfoForJava(const uint8_t* dbgstream, const SwapSrcMap& pc2de adjopcode = opcode - DexFile::DBG_FIRST_SPECIAL; dex_offset += adjopcode / DexFile::DBG_LINE_RANGE; java_line += DexFile::DBG_LINE_BASE + (adjopcode % DexFile::DBG_LINE_RANGE); - - for (SwapSrcMap::const_iterator found = pc2dex.FindByTo(dex_offset); - found != pc2dex.end() && found->to_ == static_cast<int32_t>(dex_offset); - found++) { - result->push_back({found->from_ + start_pc, static_cast<int32_t>(java_line)}); - } + dex2line->push_back({dex_offset, static_cast<int32_t>(java_line)}); break; } } @@ -441,71 +352,78 @@ static void FillInCFIInformation(OatWriter* oat_writer, std::vector<uint8_t>* dbg_str, std::vector<uint8_t>* dbg_line, uint32_t text_section_offset) { - const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetCFIMethodInfo(); + const std::vector<OatWriter::DebugInfo>& method_infos = oat_writer->GetCFIMethodInfo(); uint32_t producer_str_offset = PushStr(dbg_str, "Android dex2oat"); + constexpr bool use_64bit_addresses = false; + // Create the debug_abbrev section with boilerplate information. // We only care about low_pc and high_pc right now for the compilation // unit and methods. // Tag 1: Compilation unit: DW_TAG_compile_unit. PushByte(dbg_abbrev, 1); - PushByte(dbg_abbrev, DW_TAG_compile_unit); + PushByte(dbg_abbrev, dwarf::DW_TAG_compile_unit); // There are children (the methods). - PushByte(dbg_abbrev, DW_CHILDREN_yes); + PushByte(dbg_abbrev, dwarf::DW_CHILDREN_yes); // DW_AT_producer DW_FORM_data1. // REVIEW: we can get rid of dbg_str section if // DW_FORM_string (immediate string) was used everywhere instead of // DW_FORM_strp (ref to string from .debug_str section). // DW_FORM_strp makes sense only if we reuse the strings. - PushByte(dbg_abbrev, DW_AT_producer); - PushByte(dbg_abbrev, DW_FORM_strp); + PushByte(dbg_abbrev, dwarf::DW_AT_producer); + PushByte(dbg_abbrev, dwarf::DW_FORM_strp); // DW_LANG_Java DW_FORM_data1. - PushByte(dbg_abbrev, DW_AT_language); - PushByte(dbg_abbrev, DW_FORM_data1); + PushByte(dbg_abbrev, dwarf::DW_AT_language); + PushByte(dbg_abbrev, dwarf::DW_FORM_data1); // DW_AT_low_pc DW_FORM_addr. - PushByte(dbg_abbrev, DW_AT_low_pc); - PushByte(dbg_abbrev, DW_FORM_addr); + PushByte(dbg_abbrev, dwarf::DW_AT_low_pc); + PushByte(dbg_abbrev, dwarf::DW_FORM_addr); // DW_AT_high_pc DW_FORM_addr. - PushByte(dbg_abbrev, DW_AT_high_pc); - PushByte(dbg_abbrev, DW_FORM_addr); + PushByte(dbg_abbrev, dwarf::DW_AT_high_pc); + PushByte(dbg_abbrev, dwarf::DW_FORM_addr); if (dbg_line != nullptr) { // DW_AT_stmt_list DW_FORM_sec_offset. - PushByte(dbg_abbrev, DW_AT_stmt_list); - PushByte(dbg_abbrev, DW_FORM_sec_offset); + PushByte(dbg_abbrev, dwarf::DW_AT_stmt_list); + PushByte(dbg_abbrev, dwarf::DW_FORM_data4); } // End of DW_TAG_compile_unit. - PushHalf(dbg_abbrev, 0); + PushByte(dbg_abbrev, 0); // DW_AT. + PushByte(dbg_abbrev, 0); // DW_FORM. // Tag 2: Compilation unit: DW_TAG_subprogram. PushByte(dbg_abbrev, 2); - PushByte(dbg_abbrev, DW_TAG_subprogram); + PushByte(dbg_abbrev, dwarf::DW_TAG_subprogram); // There are no children. - PushByte(dbg_abbrev, DW_CHILDREN_no); + PushByte(dbg_abbrev, dwarf::DW_CHILDREN_no); // Name of the method. - PushByte(dbg_abbrev, DW_AT_name); - PushByte(dbg_abbrev, DW_FORM_strp); + PushByte(dbg_abbrev, dwarf::DW_AT_name); + PushByte(dbg_abbrev, dwarf::DW_FORM_strp); // DW_AT_low_pc DW_FORM_addr. - PushByte(dbg_abbrev, DW_AT_low_pc); - PushByte(dbg_abbrev, DW_FORM_addr); + PushByte(dbg_abbrev, dwarf::DW_AT_low_pc); + PushByte(dbg_abbrev, dwarf::DW_FORM_addr); // DW_AT_high_pc DW_FORM_addr. - PushByte(dbg_abbrev, DW_AT_high_pc); - PushByte(dbg_abbrev, DW_FORM_addr); + PushByte(dbg_abbrev, dwarf::DW_AT_high_pc); + PushByte(dbg_abbrev, dwarf::DW_FORM_addr); // End of DW_TAG_subprogram. - PushHalf(dbg_abbrev, 0); + PushByte(dbg_abbrev, 0); // DW_AT. + PushByte(dbg_abbrev, 0); // DW_FORM. + + // End of abbrevs for compilation unit + PushByte(dbg_abbrev, 0); // Start the debug_info section with the header information // 'unit_length' will be filled in later. @@ -518,8 +436,8 @@ static void FillInCFIInformation(OatWriter* oat_writer, // Offset into .debug_abbrev section (always 0). Push32(dbg_info, 0); - // Address size: 4. - PushByte(dbg_info, 4); + // Address size: 4 or 8. + PushByte(dbg_info, use_64bit_addresses ? 8 : 4); // Start the description for the compilation unit. // This uses tag 1. @@ -529,31 +447,34 @@ static void FillInCFIInformation(OatWriter* oat_writer, Push32(dbg_info, producer_str_offset); // The language is Java. - PushByte(dbg_info, DW_LANG_Java); + PushByte(dbg_info, dwarf::DW_LANG_Java); // low_pc and high_pc. - uint32_t cunit_low_pc = 0 - 1; + uint32_t cunit_low_pc = static_cast<uint32_t>(-1); uint32_t cunit_high_pc = 0; - int cunit_low_pc_pos = dbg_info->size(); - Push32(dbg_info, 0); - Push32(dbg_info, 0); + for (auto method_info : method_infos) { + cunit_low_pc = std::min(cunit_low_pc, method_info.low_pc_); + cunit_high_pc = std::max(cunit_high_pc, method_info.high_pc_); + } + Push32(dbg_info, cunit_low_pc + text_section_offset); + Push32(dbg_info, cunit_high_pc + text_section_offset); - if (dbg_line == nullptr) { - for (size_t i = 0; i < method_info.size(); ++i) { - const OatWriter::DebugInfo &dbg = method_info[i]; + if (dbg_line != nullptr) { + // Line number table offset. + Push32(dbg_info, dbg_line->size()); + } - cunit_low_pc = std::min(cunit_low_pc, dbg.low_pc_); - cunit_high_pc = std::max(cunit_high_pc, dbg.high_pc_); + for (auto method_info : method_infos) { + // Start a new TAG: subroutine (2). + PushByte(dbg_info, 2); - // Start a new TAG: subroutine (2). - PushByte(dbg_info, 2); + // Enter name, low_pc, high_pc. + Push32(dbg_info, PushStr(dbg_str, method_info.method_name_)); + Push32(dbg_info, method_info.low_pc_ + text_section_offset); + Push32(dbg_info, method_info.high_pc_ + text_section_offset); + } - // Enter name, low_pc, high_pc. - Push32(dbg_info, PushStr(dbg_str, dbg.method_name_)); - Push32(dbg_info, dbg.low_pc_ + text_section_offset); - Push32(dbg_info, dbg.high_pc_ + text_section_offset); - } - } else { + if (dbg_line != nullptr) { // TODO: in gdb info functions <regexp> - reports Java functions, but // source file is <unknown> because .debug_line is formed as one // compilation unit. To fix this it is possible to generate @@ -561,110 +482,135 @@ static void FillInCFIInformation(OatWriter* oat_writer, // Each of the these compilation units can have several non-adjacent // method ranges. - // Line number table offset - Push32(dbg_info, dbg_line->size()); + std::vector<dwarf::DebugLineWriter<>::FileEntry> files; + std::unordered_map<std::string, size_t> files_map; + std::vector<std::string> directories; + std::unordered_map<std::string, size_t> directories_map; + + int code_factor_bits_ = 0; + int isa = -1; + switch (oat_writer->GetOatHeader().GetInstructionSet()) { + case kThumb2: + code_factor_bits_ = 1; // 16-bit instuctions + isa = 1; // DW_ISA_ARM_thumb. + break; + case kArm: + code_factor_bits_ = 1; // 16-bit instructions + isa = 2; // DW_ISA_ARM_arm. + break; + case kArm64: + case kMips: + case kMips64: + code_factor_bits_ = 2; // 32-bit instructions + break; + case kNone: + case kX86: + case kX86_64: + break; + } - size_t lnt_length = dbg_line->size(); - Push32(dbg_line, 0); - - PushHalf(dbg_line, 4); // LNT Version DWARF v4 => 4 - - size_t lnt_hdr_length = dbg_line->size(); - Push32(dbg_line, 0); // TODO: 64-bit uses 8-byte here - - PushByte(dbg_line, 1); // minimum_instruction_length (ubyte) - PushByte(dbg_line, 1); // maximum_operations_per_instruction (ubyte) = always 1 - PushByte(dbg_line, 1); // default_is_stmt (ubyte) - - const int8_t LINE_BASE = -5; - PushByte(dbg_line, LINE_BASE); // line_base (sbyte) - - const uint8_t LINE_RANGE = 14; - PushByte(dbg_line, LINE_RANGE); // line_range (ubyte) - - const uint8_t OPCODE_BASE = 13; - PushByte(dbg_line, OPCODE_BASE); // opcode_base (ubyte) - - // Standard_opcode_lengths (array of ubyte). - PushByte(dbg_line, 0); PushByte(dbg_line, 1); PushByte(dbg_line, 1); - PushByte(dbg_line, 1); PushByte(dbg_line, 1); PushByte(dbg_line, 0); - PushByte(dbg_line, 0); PushByte(dbg_line, 0); PushByte(dbg_line, 1); - PushByte(dbg_line, 0); PushByte(dbg_line, 0); PushByte(dbg_line, 1); - - PushByte(dbg_line, 0); // include_directories (sequence of path names) = EMPTY - - // File_names (sequence of file entries). - std::unordered_map<const char*, size_t> files; - for (size_t i = 0; i < method_info.size(); ++i) { - const OatWriter::DebugInfo &dbg = method_info[i]; - // TODO: add package directory to the file name - const char* file_name = dbg.src_file_name_ == nullptr ? "null" : dbg.src_file_name_; - auto found = files.find(file_name); - if (found == files.end()) { - size_t file_index = 1 + files.size(); - files[file_name] = file_index; - PushStr(dbg_line, file_name); - PushByte(dbg_line, 0); // include directory index = LEB128(0) - no directory - PushByte(dbg_line, 0); // modification time = LEB128(0) - NA - PushByte(dbg_line, 0); // file length = LEB128(0) - NA - } + dwarf::DebugLineOpCodeWriter<> opcodes(use_64bit_addresses, code_factor_bits_); + opcodes.SetAddress(text_section_offset + cunit_low_pc); + if (isa != -1) { + opcodes.SetISA(isa); } - PushByte(dbg_line, 0); // End of file_names. - - // Set lnt header length. - UpdateWord(dbg_line, lnt_hdr_length, dbg_line->size() - lnt_hdr_length - 4); - - // Generate Line Number Program code, one long program for all methods. - LineTableGenerator line_table_generator(LINE_BASE, LINE_RANGE, OPCODE_BASE, - dbg_line, 0, 1); - - DefaultSrcMap pc2java_map; - for (size_t i = 0; i < method_info.size(); ++i) { - const OatWriter::DebugInfo &dbg = method_info[i]; - const char* file_name = (dbg.src_file_name_ == nullptr) ? "null" : dbg.src_file_name_; - size_t file_index = files[file_name]; - DCHECK_NE(file_index, 0U) << file_name; - - cunit_low_pc = std::min(cunit_low_pc, dbg.low_pc_); - cunit_high_pc = std::max(cunit_high_pc, dbg.high_pc_); - - // Start a new TAG: subroutine (2). - PushByte(dbg_info, 2); - - // Enter name, low_pc, high_pc. - Push32(dbg_info, PushStr(dbg_str, dbg.method_name_)); - Push32(dbg_info, dbg.low_pc_ + text_section_offset); - Push32(dbg_info, dbg.high_pc_ + text_section_offset); - - GetLineInfoForJava(dbg.dbgstream_, dbg.compiled_method_->GetSrcMappingTable(), - &pc2java_map, dbg.low_pc_); - pc2java_map.DeltaFormat({dbg.low_pc_, 1}, dbg.high_pc_); - if (!pc2java_map.empty()) { - line_table_generator.SetFile(file_index); - line_table_generator.SetAddr(dbg.low_pc_ + text_section_offset); - line_table_generator.SetLine(1); - for (auto& src_map_elem : pc2java_map) { - line_table_generator.PutDelta(src_map_elem.from_, src_map_elem.to_); + DefaultSrcMap dex2line_map; + for (size_t i = 0; i < method_infos.size(); i++) { + const OatWriter::DebugInfo& method_info = method_infos[i]; + + // Addresses in the line table should be unique and increasing. + if (method_info.deduped_) { + continue; + } + + // Get and deduplicate directory and filename. + int file_index = 0; // 0 - primary source file of the compilation. + if (method_info.src_file_name_ != nullptr) { + std::string file_name(method_info.src_file_name_); + size_t file_name_slash = file_name.find_last_of('/'); + std::string class_name(method_info.class_descriptor_); + size_t class_name_slash = class_name.find_last_of('/'); + std::string full_path(file_name); + + // Guess directory from package name. + int directory_index = 0; // 0 - current directory of the compilation. + if (file_name_slash == std::string::npos && // Just filename. + class_name.front() == 'L' && // Type descriptor for a class. + class_name_slash != std::string::npos) { // Has package name. + std::string package_name = class_name.substr(1, class_name_slash - 1); + auto it = directories_map.find(package_name); + if (it == directories_map.end()) { + directory_index = 1 + directories.size(); + directories_map.emplace(package_name, directory_index); + directories.push_back(package_name); + } else { + directory_index = it->second; + } + full_path = package_name + "/" + file_name; + } + + // Add file entry. + auto it2 = files_map.find(full_path); + if (it2 == files_map.end()) { + file_index = 1 + files.size(); + files_map.emplace(full_path, file_index); + files.push_back(dwarf::DebugLineWriter<>::FileEntry { + file_name, + directory_index, + 0, // Modification time - NA. + 0, // File size - NA. + }); + } else { + file_index = it2->second; } - pc2java_map.clear(); + } + opcodes.SetFile(file_index); + + // Generate mapping opcodes from PC to Java lines. + dex2line_map.clear(); + GetLineInfoForJava(method_info.dbgstream_, &dex2line_map); + uint32_t low_pc = text_section_offset + method_info.low_pc_; + if (file_index != 0 && !dex2line_map.empty()) { + bool first = true; + for (SrcMapElem pc2dex : method_info.compiled_method_->GetSrcMappingTable()) { + uint32_t pc = pc2dex.from_; + int dex = pc2dex.to_; + auto dex2line = dex2line_map.Find(static_cast<uint32_t>(dex)); + if (dex2line.first) { + int line = dex2line.second; + if (first) { + first = false; + if (pc > 0) { + // Assume that any preceding code is prologue. + int first_line = dex2line_map.front().to_; + // Prologue is not a sensible place for a breakpoint. + opcodes.NegateStmt(); + opcodes.AddRow(low_pc, first_line); + opcodes.NegateStmt(); + opcodes.SetPrologueEnd(); + } + opcodes.AddRow(low_pc + pc, line); + } else if (line != opcodes.CurrentLine()) { + opcodes.AddRow(low_pc + pc, line); + } + } + } + } else { + // line 0 - instruction cannot be attributed to any source line. + opcodes.AddRow(low_pc, 0); } } - // End Sequence should have the highest address set. - line_table_generator.SetAddr(cunit_high_pc + text_section_offset); - line_table_generator.EndSequence(); + opcodes.AdvancePC(text_section_offset + cunit_high_pc); + opcodes.EndSequence(); - // set lnt length - UpdateWord(dbg_line, lnt_length, dbg_line->size() - lnt_length - 4); + dwarf::DebugLineWriter<> dbg_line_writer(dbg_line); + dbg_line_writer.WriteTable(directories, files, opcodes); } - // One byte terminator + // One byte terminator. PushByte(dbg_info, 0); - // Fill in cunit's low_pc and high_pc. - UpdateWord(dbg_info, cunit_low_pc_pos, cunit_low_pc + text_section_offset); - UpdateWord(dbg_info, cunit_low_pc_pos + 4, cunit_high_pc + text_section_offset); - // We have now walked all the methods. Fill in lengths. UpdateWord(dbg_info, cunit_length, dbg_info->size() - cunit_length - 4); } @@ -688,8 +634,11 @@ static void WriteDebugSymbols(const CompilerDriver* compiler_driver, ElfSymtabBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Shdr>* symtab = builder->GetSymtabBuilder(); for (auto it = method_info.begin(); it != method_info.end(); ++it) { - symtab->AddSymbol(it->method_name_, &builder->GetTextBuilder(), it->low_pc_, true, - it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC); + uint32_t low_pc = it->low_pc_; + // Add in code delta, e.g., thumb bit 0 for Thumb2 code. + low_pc += it->compiled_method_->CodeDelta(); + symtab->AddSymbol(it->method_name_, &builder->GetTextBuilder(), low_pc, + true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC); // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2 // instructions, so that disassembler tools can correctly disassemble. diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index c1555aa523..1ede228c4f 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -19,6 +19,7 @@ #include <sys/stat.h> #include <memory> +#include <numeric> #include <vector> #include "base/logging.h" @@ -54,8 +55,7 @@ #include "runtime.h" #include "scoped_thread_state_change.h" #include "handle_scope-inl.h" - -#include <numeric> +#include "utils/dex_cache_arrays_layout-inl.h" using ::art::mirror::ArtField; using ::art::mirror::ArtMethod; @@ -238,7 +238,7 @@ void ImageWriter::AssignImageOffset(mirror::Object* object, ImageWriter::BinSlot DCHECK(object != nullptr); DCHECK_NE(image_objects_offset_begin_, 0u); - size_t previous_bin_sizes = GetBinSizeSum(bin_slot.GetBin()); // sum sizes in [0..bin#) + size_t previous_bin_sizes = bin_slot_previous_sizes_[bin_slot.GetBin()]; size_t new_offset = image_objects_offset_begin_ + previous_bin_sizes + bin_slot.GetIndex(); DCHECK_ALIGNED(new_offset, kObjectAlignment); @@ -293,6 +293,28 @@ void ImageWriter::SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) { DCHECK(IsImageBinSlotAssigned(object)); } +void ImageWriter::PrepareDexCacheArraySlots() { + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + ReaderMutexLock mu(Thread::Current(), *class_linker->DexLock()); + size_t dex_cache_count = class_linker->GetDexCacheCount(); + uint32_t size = 0u; + for (size_t idx = 0; idx < dex_cache_count; ++idx) { + DexCache* dex_cache = class_linker->GetDexCache(idx); + const DexFile* dex_file = dex_cache->GetDexFile(); + dex_cache_array_starts_.Put(dex_file, size); + DexCacheArraysLayout layout(dex_file); + DCHECK(layout.Valid()); + dex_cache_array_indexes_.Put(dex_cache->GetResolvedTypes(), size + layout.TypesOffset()); + dex_cache_array_indexes_.Put(dex_cache->GetResolvedMethods(), size + layout.MethodsOffset()); + dex_cache_array_indexes_.Put(dex_cache->GetResolvedFields(), size + layout.FieldsOffset()); + dex_cache_array_indexes_.Put(dex_cache->GetStrings(), size + layout.StringsOffset()); + size += layout.Size(); + } + // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned() + // when AssignImageBinSlot() assigns their indexes out or order. + bin_slot_sizes_[kBinDexCacheArray] = size; +} + void ImageWriter::AssignImageBinSlot(mirror::Object* object) { DCHECK(object != nullptr); size_t object_size = object->SizeOf(); @@ -307,6 +329,7 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) { // This means more pages will stay either clean or shared dirty (with zygote) and // the app will use less of its own (private) memory. Bin bin = kBinRegular; + size_t current_offset = 0u; if (kBinObjects) { // @@ -316,6 +339,12 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) { // Memory analysis has determined that the following types of objects get dirtied // the most: // + // * Dex cache arrays are stored in a special bin. The arrays for each dex cache have + // a fixed layout which helps improve generated code (using PC-relative addressing), + // so we pre-calculate their offsets separately in PrepareDexCacheArraySlots(). + // Since these arrays are huge, most pages do not overlap other objects and it's not + // really important where they are for the clean/dirty separation. Due to their + // special PC-relative addressing, we arbitrarily keep them at the beginning. // * Class'es which are verified [their clinit runs only at runtime] // - classes in general [because their static fields get overwritten] // - initialized classes with all-final statics are unlikely to be ever dirty, @@ -376,13 +405,21 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) { } } else if (object->GetClass<kVerifyNone>()->IsStringClass()) { bin = kBinString; // Strings are almost always immutable (except for object header). + } else if (object->IsObjectArray()) { + auto it = dex_cache_array_indexes_.find(object); + if (it != dex_cache_array_indexes_.end()) { + bin = kBinDexCacheArray; + current_offset = it->second; // Use prepared offset defined by the DexCacheLayout. + } // else bin = kBinRegular } // else bin = kBinRegular } - size_t current_offset = bin_slot_sizes_[bin]; // How many bytes the current bin is at (aligned). - // Move the current bin size up to accomodate the object we just assigned a bin slot. size_t offset_delta = RoundUp(object_size, kObjectAlignment); // 64-bit alignment - bin_slot_sizes_[bin] += offset_delta; + if (bin != kBinDexCacheArray) { + current_offset = bin_slot_sizes_[bin]; // How many bytes the current bin is at (aligned). + // Move the current bin size up to accomodate the object we just assigned a bin slot. + bin_slot_sizes_[bin] += offset_delta; + } BinSlot new_bin_slot(bin, current_offset); SetImageBinSlot(object, new_bin_slot); @@ -887,8 +924,17 @@ void ImageWriter::CalculateNewObjectOffsets() { // TODO: Image spaces only? DCHECK_LT(image_end_, image_->Size()); image_objects_offset_begin_ = image_end_; + // Prepare bin slots for dex cache arrays. + PrepareDexCacheArraySlots(); // Clear any pre-existing monitors which may have been in the monitor words, assign bin slots. heap->VisitObjects(WalkFieldsCallback, this); + // Calculate cumulative bin slot sizes. + size_t previous_sizes = 0u; + for (size_t i = 0; i != kBinSize; ++i) { + bin_slot_previous_sizes_[i] = previous_sizes; + previous_sizes += bin_slot_sizes_[i]; + } + DCHECK_EQ(previous_sizes, GetBinSizeSum()); // Transform each object's bin slot into an offset which will be used to do the final copy. heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this); DCHECK(saved_hashes_map_.empty()); // All binslot hashes should've been put into vector by now. @@ -1187,8 +1233,8 @@ size_t ImageWriter::GetBinSizeSum(ImageWriter::Bin up_to) const { ImageWriter::BinSlot::BinSlot(uint32_t lockword) : lockword_(lockword) { // These values may need to get updated if more bins are added to the enum Bin - static_assert(kBinBits == 3, "wrong number of bin bits"); - static_assert(kBinShift == 29, "wrong number of shift"); + static_assert(kBinBits == 4, "wrong number of bin bits"); + static_assert(kBinShift == 28, "wrong number of shift"); static_assert(sizeof(BinSlot) == sizeof(LockWord), "BinSlot/LockWord must have equal sizes"); DCHECK_LT(GetBin(), kBinSize); diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 53f5ce4545..71044f7b6e 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -52,7 +52,8 @@ class ImageWriter FINAL { quick_imt_conflict_trampoline_offset_(0), quick_resolution_trampoline_offset_(0), quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic), target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())), - bin_slot_sizes_(), bin_slot_count_() { + bin_slot_sizes_(), bin_slot_previous_sizes_(), bin_slot_count_(), + string_data_array_(nullptr) { CHECK_NE(image_begin, 0U); } @@ -80,6 +81,14 @@ class ImageWriter FINAL { return reinterpret_cast<mirror::Object*>(image_begin_ + GetImageOffset(object)); } + mirror::HeapReference<mirror::Object>* GetDexCacheArrayElementImageAddress( + const DexFile* dex_file, uint32_t offset) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + auto it = dex_cache_array_starts_.find(dex_file); + DCHECK(it != dex_cache_array_starts_.end()); + return reinterpret_cast<mirror::HeapReference<mirror::Object>*>( + image_begin_ + RoundUp(sizeof(ImageHeader), kObjectAlignment) + it->second + offset); + } + uint8_t* GetOatFileBegin() const { return image_begin_ + RoundUp(image_end_, kPageSize); } @@ -101,6 +110,10 @@ class ImageWriter FINAL { // Classify different kinds of bins that objects end up getting packed into during image writing. enum Bin { + // Dex cache arrays have a special slot for PC-relative addressing. Since they are + // huge, and as such their dirtiness is not important for the clean/dirty separation, + // we arbitrarily keep them at the beginning. + kBinDexCacheArray, // Object arrays belonging to dex cache. // Likely-clean: kBinString, // [String] Almost always immutable (except for obj header). kBinArtMethodsManagedInitialized, // [ArtMethod] Not-native, and initialized. Unlikely to dirty @@ -113,7 +126,6 @@ class ImageWriter FINAL { kBinClassVerified, // Class verified, but initializers haven't been run kBinArtMethodNative, // Art method that is actually native kBinArtMethodNotInitialized, // Art method with a declaring class that wasn't initialized - // Don't care about other art methods since they don't dirty // Add more bins here if we add more segregation code. kBinSize, }; @@ -157,6 +169,7 @@ class ImageWriter FINAL { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); size_t GetImageOffset(mirror::Object* object) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void PrepareDexCacheArraySlots() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void AssignImageBinSlot(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -282,6 +295,12 @@ class ImageWriter FINAL { // Memory mapped for generating the image. std::unique_ptr<MemMap> image_; + // Indexes for dex cache arrays (objects are inside of the image so that they don't move). + SafeMap<mirror::Object*, size_t> dex_cache_array_indexes_; + + // The start offsets of the dex cache arrays. + SafeMap<const DexFile*, size_t> dex_cache_array_starts_; + // Saved hashes (objects are inside of the image so that they don't move). std::vector<std::pair<mirror::Object*, uint32_t>> saved_hashes_; @@ -309,6 +328,7 @@ class ImageWriter FINAL { // Bin slot tracking for dirty object packing size_t bin_slot_sizes_[kBinSize]; // Number of bytes in a bin + size_t bin_slot_previous_sizes_[kBinSize]; // Number of bytes in previous bins. size_t bin_slot_count_[kBinSize]; // Number of objects in a bin void* string_data_array_; // The backing for the interned strings. diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc new file mode 100644 index 0000000000..ceace824ea --- /dev/null +++ b/compiler/linker/arm/relative_patcher_arm_base.cc @@ -0,0 +1,182 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/arm/relative_patcher_arm_base.h" + +#include "compiled_method.h" +#include "oat.h" +#include "output_stream.h" + +namespace art { +namespace linker { + +uint32_t ArmBaseRelativePatcher::ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method, + MethodReference method_ref) { + return ReserveSpaceInternal(offset, compiled_method, method_ref, 0u); +} + +uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) { + // NOTE: The final thunk can be reserved from InitCodeMethodVisitor::EndClass() while it + // may be written early by WriteCodeMethodVisitor::VisitMethod() for a deduplicated chunk + // of code. To avoid any alignment discrepancies for the final chunk, we always align the + // offset after reserving of writing any chunk. + uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); + bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset, MethodReference(nullptr, 0u), + aligned_offset); + if (needs_thunk) { + thunk_locations_.push_back(aligned_offset); + offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_); + } + return offset; +} + +uint32_t ArmBaseRelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) { + if (current_thunk_to_write_ == thunk_locations_.size()) { + return offset; + } + uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); + if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) { + ++current_thunk_to_write_; + uint32_t aligned_code_delta = aligned_offset - offset; + if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) { + return 0u; + } + if (UNLIKELY(!WriteRelCallThunk(out, ArrayRef<const uint8_t>(thunk_code_)))) { + return 0u; + } + uint32_t thunk_end_offset = aligned_offset + thunk_code_.size(); + // Align after writing chunk, see the ReserveSpace() above. + offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_); + aligned_code_delta = offset - thunk_end_offset; + if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) { + return 0u; + } + } + return offset; +} + +ArmBaseRelativePatcher::ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider, + InstructionSet instruction_set, + std::vector<uint8_t> thunk_code, + uint32_t max_positive_displacement, + uint32_t max_negative_displacement) + : provider_(provider), instruction_set_(instruction_set), thunk_code_(thunk_code), + max_positive_displacement_(max_positive_displacement), + max_negative_displacement_(max_negative_displacement), + thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() { +} + +uint32_t ArmBaseRelativePatcher::ReserveSpaceInternal(uint32_t offset, + const CompiledMethod* compiled_method, + MethodReference method_ref, + uint32_t max_extra_space) { + DCHECK(compiled_method->GetQuickCode() != nullptr); + uint32_t quick_code_size = compiled_method->GetQuickCode()->size(); + uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader); + uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size); + // Adjust for extra space required by the subclass. + next_aligned_offset = compiled_method->AlignCode(next_aligned_offset + max_extra_space); + // TODO: ignore unprocessed patches targeting this method if they can reach quick_code_offset. + // We need the MethodReference for that. + if (!unprocessed_patches_.empty() && + next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) { + bool needs_thunk = ReserveSpaceProcessPatches(quick_code_offset, method_ref, + next_aligned_offset); + if (needs_thunk) { + // A single thunk will cover all pending patches. + unprocessed_patches_.clear(); + uint32_t thunk_location = compiled_method->AlignCode(offset); + thunk_locations_.push_back(thunk_location); + offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_); + } + } + for (const LinkerPatch& patch : compiled_method->GetPatches()) { + if (patch.Type() == kLinkerPatchCallRelative) { + unprocessed_patches_.emplace_back(patch.TargetMethod(), + quick_code_offset + patch.LiteralOffset()); + } + } + return offset; +} + +uint32_t ArmBaseRelativePatcher::CalculateDisplacement(uint32_t patch_offset, + uint32_t target_offset) { + // Unsigned arithmetic with its well-defined overflow behavior is just fine here. + uint32_t displacement = target_offset - patch_offset; + // NOTE: With unsigned arithmetic we do mean to use && rather than || below. + if (displacement > max_positive_displacement_ && displacement < -max_negative_displacement_) { + // Unwritten thunks have higher offsets, check if it's within range. + DCHECK(current_thunk_to_write_ == thunk_locations_.size() || + thunk_locations_[current_thunk_to_write_] > patch_offset); + if (current_thunk_to_write_ != thunk_locations_.size() && + thunk_locations_[current_thunk_to_write_] - patch_offset < max_positive_displacement_) { + displacement = thunk_locations_[current_thunk_to_write_] - patch_offset; + } else { + // We must have a previous thunk then. + DCHECK_NE(current_thunk_to_write_, 0u); + DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset); + displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset; + DCHECK(displacement >= -max_negative_displacement_); + } + } + return displacement; +} + +bool ArmBaseRelativePatcher::ReserveSpaceProcessPatches(uint32_t quick_code_offset, + MethodReference method_ref, + uint32_t next_aligned_offset) { + // Process as many patches as possible, stop only on unresolved targets or calls too far back. + while (!unprocessed_patches_.empty()) { + MethodReference patch_ref = unprocessed_patches_.front().first; + uint32_t patch_offset = unprocessed_patches_.front().second; + DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset); + if (patch_ref.dex_file == method_ref.dex_file && + patch_ref.dex_method_index == method_ref.dex_method_index) { + DCHECK_GT(quick_code_offset, patch_offset); + if (quick_code_offset - patch_offset > max_positive_displacement_) { + return true; + } + } else { + auto result = provider_->FindMethodOffset(patch_ref); + if (!result.first) { + // If still unresolved, check if we have a thunk within range. + if (thunk_locations_.empty() || + patch_offset - thunk_locations_.back() > max_negative_displacement_) { + return next_aligned_offset - patch_offset > max_positive_displacement_; + } + } else { + uint32_t target_offset = result.second - CompiledCode::CodeDelta(instruction_set_); + if (target_offset >= patch_offset) { + DCHECK_LE(target_offset - patch_offset, max_positive_displacement_); + } else { + // When calling back, check if we have a thunk that's closer than the actual target. + if (!thunk_locations_.empty()) { + target_offset = std::max(target_offset, thunk_locations_.back()); + } + if (patch_offset - target_offset > max_negative_displacement_) { + return true; + } + } + } + } + unprocessed_patches_.pop_front(); + } + return false; +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h new file mode 100644 index 0000000000..f80dd962ce --- /dev/null +++ b/compiler/linker/arm/relative_patcher_arm_base.h @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_ +#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_ + +#include <deque> + +#include "linker/relative_patcher.h" +#include "method_reference.h" + +namespace art { +namespace linker { + +class ArmBaseRelativePatcher : public RelativePatcher { + public: + uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method, + MethodReference method_ref) OVERRIDE; + uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; + uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; + + protected: + ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider, + InstructionSet instruction_set, std::vector<uint8_t> thunk_code, + uint32_t max_positive_displacement, uint32_t max_negative_displacement); + + uint32_t ReserveSpaceInternal(uint32_t offset, const CompiledMethod* compiled_method, + MethodReference method_ref, uint32_t max_extra_space); + uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset); + + private: + bool ReserveSpaceProcessPatches(uint32_t quick_code_offset, MethodReference method_ref, + uint32_t next_aligned_offset); + + RelativePatcherTargetProvider* const provider_; + const InstructionSet instruction_set_; + const std::vector<uint8_t> thunk_code_; + const uint32_t max_positive_displacement_; + const uint32_t max_negative_displacement_; + std::vector<uint32_t> thunk_locations_; + size_t current_thunk_to_write_; + + // ReserveSpace() tracks unprocessed patches. + typedef std::pair<MethodReference, uint32_t> UnprocessedPatch; + std::deque<UnprocessedPatch> unprocessed_patches_; + + friend class Arm64RelativePatcherTest; + friend class Thumb2RelativePatcherTest; + + DISALLOW_COPY_AND_ASSIGN(ArmBaseRelativePatcher); +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_ diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc new file mode 100644 index 0000000000..4267743097 --- /dev/null +++ b/compiler/linker/arm/relative_patcher_thumb2.cc @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/arm/relative_patcher_thumb2.h" + +#include "compiled_method.h" +#include "mirror/art_method.h" +#include "utils/arm/assembler_thumb2.h" + +namespace art { +namespace linker { + +Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider) + : ArmBaseRelativePatcher(provider, kThumb2, CompileThunkCode(), + kMaxPositiveDisplacement, kMaxNegativeDisplacement) { +} + +void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, + uint32_t patch_offset, uint32_t target_offset) { + DCHECK_LE(literal_offset + 4u, code->size()); + DCHECK_EQ(literal_offset & 1u, 0u); + DCHECK_EQ(patch_offset & 1u, 0u); + DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit. + uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u); + displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. + DCHECK_EQ(displacement & 1u, 0u); + DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u); // 25-bit signed. + uint32_t signbit = (displacement >> 31) & 0x1; + uint32_t i1 = (displacement >> 23) & 0x1; + uint32_t i2 = (displacement >> 22) & 0x1; + uint32_t imm10 = (displacement >> 12) & 0x03ff; + uint32_t imm11 = (displacement >> 1) & 0x07ff; + uint32_t j1 = i1 ^ (signbit ^ 1); + uint32_t j2 = i2 ^ (signbit ^ 1); + uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11; + value |= 0xf000d000; // BL + + uint8_t* addr = &(*code)[literal_offset]; + // Check that we're just overwriting an existing BL. + DCHECK_EQ(addr[1] & 0xf8, 0xf0); + DCHECK_EQ(addr[3] & 0xd0, 0xd0); + // Write the new BL. + addr[0] = (value >> 16) & 0xff; + addr[1] = (value >> 24) & 0xff; + addr[2] = (value >> 0) & 0xff; + addr[3] = (value >> 8) & 0xff; +} + +void Thumb2RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + const LinkerPatch& patch ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED, + uint32_t target_offset ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unexpected relative dex cache array patch."; +} + +std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() { + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + arm::Thumb2Assembler assembler; + assembler.LoadFromOffset( + arm::kLoadWord, arm::PC, arm::R0, + mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); + assembler.bkpt(0); + std::vector<uint8_t> thunk_code(assembler.CodeSize()); + MemoryRegion code(thunk_code.data(), thunk_code.size()); + assembler.FinalizeInstructions(code); + return thunk_code; +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h new file mode 100644 index 0000000000..561130305e --- /dev/null +++ b/compiler/linker/arm/relative_patcher_thumb2.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ +#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ + +#include "linker/arm/relative_patcher_arm_base.h" + +namespace art { +namespace linker { + +class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { + public: + explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider); + + void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, + uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, + uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + + private: + static std::vector<uint8_t> CompileThunkCode(); + + // PC displacement from patch location; Thumb2 PC is always at instruction address + 4. + static constexpr int32_t kPcDisplacement = 4; + + // Maximum positive and negative displacement measured from the patch location. + // (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from + // the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.) + static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement; + static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement; + + DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher); +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc new file mode 100644 index 0000000000..abdfd6d64b --- /dev/null +++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc @@ -0,0 +1,289 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher_test.h" +#include "linker/arm/relative_patcher_thumb2.h" + +namespace art { +namespace linker { + +class Thumb2RelativePatcherTest : public RelativePatcherTest { + public: + Thumb2RelativePatcherTest() : RelativePatcherTest(kThumb2, "default") { } + + protected: + static const uint8_t kCallRawCode[]; + static const ArrayRef<const uint8_t> kCallCode; + static const uint8_t kNopRawCode[]; + static const ArrayRef<const uint8_t> kNopCode; + + // Branches within range [-256, 256) can be created from these by adding the low 8 bits. + static constexpr uint32_t kBlPlus0 = 0xf000f800; + static constexpr uint32_t kBlMinus256 = 0xf7ffff00; + + // Special BL values. + static constexpr uint32_t kBlPlusMax = 0xf3ffd7ff; + static constexpr uint32_t kBlMinusMax = 0xf400d000; + + bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code, + const ArrayRef<LinkerPatch>& method1_patches, + const ArrayRef<const uint8_t>& method3_code, + const ArrayRef<LinkerPatch>& method3_patches, + uint32_t distance_without_thunks) { + CHECK_EQ(distance_without_thunks % kArmAlignment, 0u); + const uint32_t method1_offset = + CompiledCode::AlignCode(kTrampolineSize, kThumb2) + sizeof(OatQuickMethodHeader); + AddCompiledMethod(MethodRef(1u), method1_code, ArrayRef<LinkerPatch>(method1_patches)); + + // We want to put the method3 at a very precise offset. + const uint32_t method3_offset = method1_offset + distance_without_thunks; + CHECK(IsAligned<kArmAlignment>(method3_offset - sizeof(OatQuickMethodHeader))); + + // Calculate size of method2 so that we put method3 at the correct place. + const uint32_t method2_offset = + CompiledCode::AlignCode(method1_offset + method1_code.size(), kThumb2) + + sizeof(OatQuickMethodHeader); + const uint32_t method2_size = (method3_offset - sizeof(OatQuickMethodHeader) - method2_offset); + std::vector<uint8_t> method2_raw_code(method2_size); + ArrayRef<const uint8_t> method2_code(method2_raw_code); + AddCompiledMethod(MethodRef(2u), method2_code, ArrayRef<LinkerPatch>()); + + AddCompiledMethod(MethodRef(3u), method3_code, method3_patches); + + Link(); + + // Check assumptions. + CHECK_EQ(GetMethodOffset(1), method1_offset); + CHECK_EQ(GetMethodOffset(2), method2_offset); + auto result3 = method_offset_map_.FindMethodOffset(MethodRef(3)); + CHECK(result3.first); + // There may be a thunk before method2. + if (result3.second == method3_offset + 1 /* thumb mode */) { + return false; // No thunk. + } else { + uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kThumb2); + CHECK_EQ(result3.second, method3_offset + aligned_thunk_size + 1 /* thumb mode */); + return true; // Thunk present. + } + } + + uint32_t GetMethodOffset(uint32_t method_idx) { + auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(result.first); + CHECK_NE(result.second & 1u, 0u); + return result.second - 1 /* thumb mode */; + } + + uint32_t ThunkSize() { + return static_cast<Thumb2RelativePatcher*>(patcher_.get())->thunk_code_.size(); + } + + bool CheckThunk(uint32_t thunk_offset) { + Thumb2RelativePatcher* patcher = static_cast<Thumb2RelativePatcher*>(patcher_.get()); + ArrayRef<const uint8_t> expected_code(patcher->thunk_code_); + if (output_.size() < thunk_offset + expected_code.size()) { + LOG(ERROR) << "output_.size() == " << output_.size() << " < " + << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size()); + return false; + } + ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size()); + if (linked_code == expected_code) { + return true; + } + // Log failure info. + DumpDiff(expected_code, linked_code); + return false; + } + + std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) { + std::vector<uint8_t> result; + result.reserve(num_nops * 2u + 4u); + for (size_t i = 0; i != num_nops; ++i) { + result.push_back(0x00); + result.push_back(0xbf); + } + result.push_back(static_cast<uint8_t>(bl >> 16)); + result.push_back(static_cast<uint8_t>(bl >> 24)); + result.push_back(static_cast<uint8_t>(bl)); + result.push_back(static_cast<uint8_t>(bl >> 8)); + return result; + } +}; + +const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = { + 0x00, 0xf0, 0x00, 0xf8 +}; + +const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kCallCode(kCallRawCode); + +const uint8_t Thumb2RelativePatcherTest::kNopRawCode[] = { + 0x00, 0xbf +}; + +const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kNopCode(kNopRawCode); + +TEST_F(Thumb2RelativePatcherTest, CallSelf) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches)); + Link(); + + static const uint8_t expected_code[] = { + 0xff, 0xf7, 0xfe, 0xff + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Thumb2RelativePatcherTest, CallOther) { + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(method1_patches)); + LinkerPatch method2_patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<LinkerPatch>(method2_patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t method2_offset = GetMethodOffset(2u); + uint32_t diff_after = method2_offset - (method1_offset + 4u /* PC adjustment */); + ASSERT_EQ(diff_after & 1u, 0u); + ASSERT_LT(diff_after >> 1, 1u << 8); // Simple encoding, (diff_after >> 1) fits into 8 bits. + static const uint8_t method1_expected_code[] = { + 0x00, 0xf0, static_cast<uint8_t>(diff_after >> 1), 0xf8 + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); + uint32_t diff_before = method1_offset - (method2_offset + 4u /* PC adjustment */); + ASSERT_EQ(diff_before & 1u, 0u); + ASSERT_GE(diff_before, -1u << 9); // Simple encoding, -256 <= (diff >> 1) < 0. + auto method2_expected_code = GenNopsAndBl(0u, kBlMinus256 | ((diff_before >> 1) & 0xffu)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); +} + +TEST_F(Thumb2RelativePatcherTest, CallTrampoline) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t diff = kTrampolineOffset - (method1_offset + 4u); + ASSERT_EQ(diff & 1u, 0u); + ASSERT_GE(diff, -1u << 9); // Simple encoding, -256 <= (diff >> 1) < 0 (checked as unsigned). + auto expected_code = GenNopsAndBl(0u, kBlMinus256 | ((diff >> 1) & 0xffu)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) { + auto method1_raw_code = GenNopsAndBl(3u, kBlPlus0); + constexpr uint32_t bl_offset_in_method1 = 3u * 2u; // After NOPs. + ArrayRef<const uint8_t> method1_code(method1_raw_code); + ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), + }; + + constexpr uint32_t max_positive_disp = 16 * MB - 2u + 4u /* PC adjustment */; + bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches, + kNopCode, ArrayRef<LinkerPatch>(), + bl_offset_in_method1 + max_positive_disp); + ASSERT_FALSE(thunk_in_gap); // There should be no thunk. + + // Check linked code. + auto expected_code = GenNopsAndBl(3u, kBlPlusMax); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarBefore) { + auto method3_raw_code = GenNopsAndBl(2u, kBlPlus0); + constexpr uint32_t bl_offset_in_method3 = 2u * 2u; // After NOPs. + ArrayRef<const uint8_t> method3_code(method3_raw_code); + ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); + LinkerPatch method3_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), + }; + + constexpr uint32_t just_over_max_negative_disp = 16 * MB - 4u /* PC adjustment */; + bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<LinkerPatch>(), + method3_code, method3_patches, + just_over_max_negative_disp - bl_offset_in_method3); + ASSERT_FALSE(thunk_in_gap); // There should be no thunk. + + // Check linked code. + auto expected_code = GenNopsAndBl(2u, kBlMinusMax); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) { + auto method1_raw_code = GenNopsAndBl(2u, kBlPlus0); + constexpr uint32_t bl_offset_in_method1 = 2u * 2u; // After NOPs. + ArrayRef<const uint8_t> method1_code(method1_raw_code); + ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), + }; + + constexpr uint32_t just_over_max_positive_disp = 16 * MB + 4u /* PC adjustment */; + bool thunk_in_gap = Create2MethodsWithGap(method1_code, method1_patches, + kNopCode, ArrayRef<LinkerPatch>(), + bl_offset_in_method1 + just_over_max_positive_disp); + ASSERT_TRUE(thunk_in_gap); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t method3_offset = GetMethodOffset(3u); + uint32_t method3_header_offset = method3_offset - sizeof(OatQuickMethodHeader); + ASSERT_TRUE(IsAligned<kArmAlignment>(method3_header_offset)); + uint32_t thunk_offset = method3_header_offset - CompiledCode::AlignCode(ThunkSize(), kThumb2); + ASSERT_TRUE(IsAligned<kArmAlignment>(thunk_offset)); + uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1 + 4u /* PC adjustment */); + ASSERT_EQ(diff & 1u, 0u); + ASSERT_GE(diff, 16 * MB - (1u << 9)); // Simple encoding, unknown bits fit into the low 8 bits. + auto expected_code = GenNopsAndBl(2u, 0xf3ffd700 | ((diff >> 1) & 0xffu)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); + CheckThunk(thunk_offset); +} + +TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) { + auto method3_raw_code = GenNopsAndBl(3u, kBlPlus0); + constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs. + ArrayRef<const uint8_t> method3_code(method3_raw_code); + ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); + LinkerPatch method3_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), + }; + + constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */; + bool thunk_in_gap = Create2MethodsWithGap(kNopCode, ArrayRef<LinkerPatch>(), + method3_code, method3_patches, + just_over_max_negative_disp - bl_offset_in_method3); + ASSERT_FALSE(thunk_in_gap); // There should be a thunk but it should be after the method2. + + // Check linked code. + uint32_t method3_offset = GetMethodOffset(3u); + uint32_t thunk_offset = CompiledCode::AlignCode(method3_offset + method3_code.size(), kThumb2); + uint32_t diff = thunk_offset - (method3_offset + bl_offset_in_method3 + 4u /* PC adjustment */); + ASSERT_EQ(diff & 1u, 0u); + ASSERT_LT(diff >> 1, 1u << 8); // Simple encoding, (diff >> 1) fits into 8 bits. + auto expected_code = GenNopsAndBl(3u, kBlPlus0 | ((diff >> 1) & 0xffu)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code))); + EXPECT_TRUE(CheckThunk(thunk_offset)); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc new file mode 100644 index 0000000000..1cbe481321 --- /dev/null +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -0,0 +1,298 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/arm64/relative_patcher_arm64.h" + +#include "arch/arm64/instruction_set_features_arm64.h" +#include "compiled_method.h" +#include "driver/compiler_driver.h" +#include "mirror/art_method.h" +#include "utils/arm64/assembler_arm64.h" +#include "oat.h" +#include "output_stream.h" + +namespace art { +namespace linker { + +Arm64RelativePatcher::Arm64RelativePatcher(RelativePatcherTargetProvider* provider, + const Arm64InstructionSetFeatures* features) + : ArmBaseRelativePatcher(provider, kArm64, CompileThunkCode(), + kMaxPositiveDisplacement, kMaxNegativeDisplacement), + fix_cortex_a53_843419_(features->NeedFixCortexA53_843419()), + reserved_adrp_thunks_(0u), + processed_adrp_thunks_(0u) { + if (fix_cortex_a53_843419_) { + adrp_thunk_locations_.reserve(16u); + current_method_thunks_.reserve(16u * kAdrpThunkSize); + } +} + +uint32_t Arm64RelativePatcher::ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method, + MethodReference method_ref) { + if (!fix_cortex_a53_843419_) { + DCHECK(adrp_thunk_locations_.empty()); + return ReserveSpaceInternal(offset, compiled_method, method_ref, 0u); + } + + // Add thunks for previous method if any. + if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) { + size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_; + offset = CompiledMethod::AlignCode(offset, kArm64) + kAdrpThunkSize * num_adrp_thunks; + reserved_adrp_thunks_ = adrp_thunk_locations_.size(); + } + + // Count the number of ADRP insns as the upper bound on the number of thunks needed + // and use it to reserve space for other linker patches. + size_t num_adrp = 0u; + DCHECK(compiled_method != nullptr); + for (const LinkerPatch& patch : compiled_method->GetPatches()) { + if (patch.Type() == kLinkerPatchDexCacheArray && + patch.LiteralOffset() == patch.PcInsnOffset()) { // ADRP patch + ++num_adrp; + } + } + offset = ReserveSpaceInternal(offset, compiled_method, method_ref, kAdrpThunkSize * num_adrp); + if (num_adrp == 0u) { + return offset; + } + + // Now that we have the actual offset where the code will be placed, locate the ADRP insns + // that actually require the thunk. + uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader); + ArrayRef<const uint8_t> code(*compiled_method->GetQuickCode()); + uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size()); + DCHECK(compiled_method != nullptr); + for (const LinkerPatch& patch : compiled_method->GetPatches()) { + if (patch.Type() == kLinkerPatchDexCacheArray && + patch.LiteralOffset() == patch.PcInsnOffset()) { // ADRP patch + uint32_t patch_offset = quick_code_offset + patch.LiteralOffset(); + if (NeedsErratum843419Thunk(code, patch.LiteralOffset(), patch_offset)) { + adrp_thunk_locations_.emplace_back(patch_offset, thunk_offset); + thunk_offset += kAdrpThunkSize; + } + } + } + return offset; +} + +uint32_t Arm64RelativePatcher::ReserveSpaceEnd(uint32_t offset) { + if (!fix_cortex_a53_843419_) { + DCHECK(adrp_thunk_locations_.empty()); + } else { + // Add thunks for the last method if any. + if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) { + size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_; + offset = CompiledMethod::AlignCode(offset, kArm64) + kAdrpThunkSize * num_adrp_thunks; + reserved_adrp_thunks_ = adrp_thunk_locations_.size(); + } + } + return ArmBaseRelativePatcher::ReserveSpaceEnd(offset); +} + +uint32_t Arm64RelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) { + if (fix_cortex_a53_843419_) { + if (!current_method_thunks_.empty()) { + uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kArm64); + if (kIsDebugBuild) { + CHECK(IsAligned<kAdrpThunkSize>(current_method_thunks_.size())); + size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize; + CHECK_LE(num_thunks, processed_adrp_thunks_); + for (size_t i = 0u; i != num_thunks; ++i) { + const auto& entry = adrp_thunk_locations_[processed_adrp_thunks_ - num_thunks + i]; + CHECK_EQ(entry.second, aligned_offset + i * kAdrpThunkSize); + } + } + uint32_t aligned_code_delta = aligned_offset - offset; + if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) { + return 0u; + } + if (!WriteMiscThunk(out, ArrayRef<const uint8_t>(current_method_thunks_))) { + return 0u; + } + offset = aligned_offset + current_method_thunks_.size(); + current_method_thunks_.clear(); + } + } + return ArmBaseRelativePatcher::WriteThunks(out, offset); +} + +void Arm64RelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, + uint32_t patch_offset, uint32_t target_offset) { + DCHECK_LE(literal_offset + 4u, code->size()); + DCHECK_EQ(literal_offset & 3u, 0u); + DCHECK_EQ(patch_offset & 3u, 0u); + DCHECK_EQ(target_offset & 3u, 0u); + uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u); + DCHECK_EQ(displacement & 3u, 0u); + DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u); // 28-bit signed. + uint32_t insn = (displacement & 0x0fffffffu) >> 2; + insn |= 0x94000000; // BL + + // Check that we're just overwriting an existing BL. + DCHECK_EQ(GetInsn(code, literal_offset) & 0xfc000000u, 0x94000000u); + // Write the new BL. + SetInsn(code, literal_offset, insn); +} + +void Arm64RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, + uint32_t target_offset) { + DCHECK_EQ(patch_offset & 3u, 0u); + DCHECK_EQ(target_offset & 3u, 0u); + uint32_t literal_offset = patch.LiteralOffset(); + uint32_t insn = GetInsn(code, literal_offset); + uint32_t pc_insn_offset = patch.PcInsnOffset(); + uint32_t disp = target_offset - ((patch_offset - literal_offset + pc_insn_offset) & ~0xfffu); + if (literal_offset == pc_insn_offset) { + // Check it's an ADRP with imm == 0 (unset). + DCHECK_EQ((insn & 0xffffffe0u), 0x90000000u) + << literal_offset << ", " << pc_insn_offset << ", 0x" << std::hex << insn; + if (fix_cortex_a53_843419_ && processed_adrp_thunks_ != adrp_thunk_locations_.size() && + adrp_thunk_locations_[processed_adrp_thunks_].first == patch_offset) { + DCHECK(NeedsErratum843419Thunk(ArrayRef<const uint8_t>(*code), + literal_offset, patch_offset)); + uint32_t thunk_offset = adrp_thunk_locations_[processed_adrp_thunks_].second; + uint32_t adrp_disp = target_offset - (thunk_offset & ~0xfffu); + uint32_t adrp = PatchAdrp(insn, adrp_disp); + + uint32_t out_disp = thunk_offset - patch_offset; + DCHECK_EQ(out_disp & 3u, 0u); + DCHECK((out_disp >> 27) == 0u || (out_disp >> 27) == 31u); // 28-bit signed. + insn = (out_disp & 0x0fffffffu) >> 2; + insn |= 0x14000000; // B <thunk> + + uint32_t back_disp = -out_disp; + DCHECK_EQ(back_disp & 3u, 0u); + DCHECK((back_disp >> 27) == 0u || (back_disp >> 27) == 31u); // 28-bit signed. + uint32_t b_back = (back_disp & 0x0fffffffu) >> 2; + b_back |= 0x14000000; // B <back> + size_t thunks_code_offset = current_method_thunks_.size(); + current_method_thunks_.resize(thunks_code_offset + kAdrpThunkSize); + SetInsn(¤t_method_thunks_, thunks_code_offset, adrp); + SetInsn(¤t_method_thunks_, thunks_code_offset + 4u, b_back); + static_assert(kAdrpThunkSize == 2 * 4u, "thunk has 2 instructions"); + + processed_adrp_thunks_ += 1u; + } else { + insn = PatchAdrp(insn, disp); + } + // Write the new ADRP (or B to the erratum 843419 thunk). + SetInsn(code, literal_offset, insn); + } else { + DCHECK_EQ(insn & 0xfffffc00, 0xb9400000); // LDR 32-bit with imm12 == 0 (unset). + if (kIsDebugBuild) { + uint32_t adrp = GetInsn(code, pc_insn_offset); + if ((adrp & 0x9f000000u) != 0x90000000u) { + CHECK(fix_cortex_a53_843419_); + CHECK_EQ(adrp & 0xfc000000u, 0x14000000u); // B <thunk> + CHECK(IsAligned<kAdrpThunkSize>(current_method_thunks_.size())); + size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize; + CHECK_LE(num_thunks, processed_adrp_thunks_); + uint32_t b_offset = patch_offset - literal_offset + pc_insn_offset; + for (size_t i = processed_adrp_thunks_ - num_thunks; ; ++i) { + CHECK_NE(i, processed_adrp_thunks_); + if (adrp_thunk_locations_[i].first == b_offset) { + size_t idx = num_thunks - (processed_adrp_thunks_ - i); + adrp = GetInsn(¤t_method_thunks_, idx * kAdrpThunkSize); + break; + } + } + } + CHECK_EQ(adrp & 0x9f00001fu, // Check that pc_insn_offset points + 0x90000000 | ((insn >> 5) & 0x1fu)); // to ADRP with matching register. + } + uint32_t imm12 = (disp & 0xfffu) >> 2; + insn = (insn & ~(0xfffu << 10)) | (imm12 << 10); + SetInsn(code, literal_offset, insn); + } +} + +std::vector<uint8_t> Arm64RelativePatcher::CompileThunkCode() { + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + arm64::Arm64Assembler assembler; + Offset offset(mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArm64PointerSize).Int32Value()); + assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); + // Ensure we emit the literal pool. + assembler.EmitSlowPaths(); + std::vector<uint8_t> thunk_code(assembler.CodeSize()); + MemoryRegion code(thunk_code.data(), thunk_code.size()); + assembler.FinalizeInstructions(code); + return thunk_code; +} + +uint32_t Arm64RelativePatcher::PatchAdrp(uint32_t adrp, uint32_t disp) { + return (adrp & 0x9f00001fu) | // Clear offset bits, keep ADRP with destination reg. + // Bottom 12 bits are ignored, the next 2 lowest bits are encoded in bits 29-30. + ((disp & 0x00003000u) << (29 - 12)) | + // The next 16 bits are encoded in bits 5-22. + ((disp & 0xffffc000u) >> (12 + 2 - 5)) | + // Since the target_offset is based on the beginning of the oat file and the + // image space precedes the oat file, the target_offset into image space will + // be negative yet passed as uint32_t. Therefore we limit the displacement + // to +-2GiB (rather than the maximim +-4GiB) and determine the sign bit from + // the highest bit of the displacement. This is encoded in bit 23. + ((disp & 0x80000000u) >> (31 - 23)); +} + +bool Arm64RelativePatcher::NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, + uint32_t literal_offset, + uint32_t patch_offset) { + DCHECK_EQ(patch_offset & 0x3u, 0u); + if ((patch_offset & 0xff8) == 0xff8) { // ...ff8 or ...ffc + uint32_t adrp = GetInsn(code, literal_offset); + DCHECK_EQ(adrp & 0xff000000, 0x90000000); + // TODO: Improve the check. For now, we're just checking if the next insn is + // the LDR using the result of the ADRP, otherwise we implement the workaround. + uint32_t next_insn = GetInsn(code, literal_offset + 4u); + bool ok = (next_insn & 0xffc00000) == 0xb9400000 && // LDR <Wt>, [<Xn>, #pimm] + (((next_insn >> 5) ^ adrp) & 0x1f) == 0; // <Xn> == ADRP destination reg + return !ok; + } + return false; +} + +void Arm64RelativePatcher::SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) { + DCHECK_LE(offset + 4u, code->size()); + DCHECK_EQ(offset & 3u, 0u); + uint8_t* addr = &(*code)[offset]; + addr[0] = (value >> 0) & 0xff; + addr[1] = (value >> 8) & 0xff; + addr[2] = (value >> 16) & 0xff; + addr[3] = (value >> 24) & 0xff; +} + +uint32_t Arm64RelativePatcher::GetInsn(ArrayRef<const uint8_t> code, uint32_t offset) { + DCHECK_LE(offset + 4u, code.size()); + DCHECK_EQ(offset & 3u, 0u); + const uint8_t* addr = &code[offset]; + return + (static_cast<uint32_t>(addr[0]) << 0) + + (static_cast<uint32_t>(addr[1]) << 8) + + (static_cast<uint32_t>(addr[2]) << 16)+ + (static_cast<uint32_t>(addr[3]) << 24); +} + +template <typename Alloc> +uint32_t Arm64RelativePatcher::GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset) { + return GetInsn(ArrayRef<const uint8_t>(*code), offset); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h new file mode 100644 index 0000000000..2d07e75c85 --- /dev/null +++ b/compiler/linker/arm64/relative_patcher_arm64.h @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_ +#define ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_ + +#include "linker/arm/relative_patcher_arm_base.h" +#include "utils/array_ref.h" + +namespace art { +namespace linker { + +class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { + public: + Arm64RelativePatcher(RelativePatcherTargetProvider* provider, + const Arm64InstructionSetFeatures* features); + + uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method, + MethodReference method_ref) OVERRIDE; + uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; + uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; + void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, + uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, + uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + + private: + static std::vector<uint8_t> CompileThunkCode(); + static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp); + + static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset, + uint32_t patch_offset); + void SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value); + static uint32_t GetInsn(ArrayRef<const uint8_t> code, uint32_t offset); + + template <typename Alloc> + static uint32_t GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset); + + // Maximum positive and negative displacement measured from the patch location. + // (Signed 28 bit displacement with the last bit 0 has range [-2^27, 2^27-4] measured from + // the ARM64 PC pointing to the BL.) + static constexpr uint32_t kMaxPositiveDisplacement = (1u << 27) - 4u; + static constexpr uint32_t kMaxNegativeDisplacement = (1u << 27); + + // The ADRP thunk for erratum 843419 is 2 instructions, i.e. 8 bytes. + static constexpr uint32_t kAdrpThunkSize = 8u; + + const bool fix_cortex_a53_843419_; + // Map original patch_offset to thunk offset. + std::vector<std::pair<uint32_t, uint32_t>> adrp_thunk_locations_; + size_t reserved_adrp_thunks_; + size_t processed_adrp_thunks_; + std::vector<uint8_t> current_method_thunks_; + + DISALLOW_COPY_AND_ASSIGN(Arm64RelativePatcher); +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_ diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc new file mode 100644 index 0000000000..b0399369a6 --- /dev/null +++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc @@ -0,0 +1,511 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher_test.h" +#include "linker/arm64/relative_patcher_arm64.h" + +namespace art { +namespace linker { + +class Arm64RelativePatcherTest : public RelativePatcherTest { + public: + explicit Arm64RelativePatcherTest(const std::string& variant) + : RelativePatcherTest(kArm64, variant) { } + + protected: + static const uint8_t kCallRawCode[]; + static const ArrayRef<const uint8_t> kCallCode; + static const uint8_t kNopRawCode[]; + static const ArrayRef<const uint8_t> kNopCode; + + // All branches can be created from kBlPlus0 or kBPlus0 by adding the low 26 bits. + static constexpr uint32_t kBlPlus0 = 0x94000000u; + static constexpr uint32_t kBPlus0 = 0x14000000u; + + // Special BL values. + static constexpr uint32_t kBlPlusMax = 0x95ffffffu; + static constexpr uint32_t kBlMinusMax = 0x96000000u; + + // LDUR x2, [sp, #4], i.e. unaligned load crossing 64-bit boundary (assuming aligned sp). + static constexpr uint32_t kLdurInsn = 0xf840405fu; + + uint32_t Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code, + const ArrayRef<LinkerPatch>& method1_patches, + const ArrayRef<const uint8_t>& last_method_code, + const ArrayRef<LinkerPatch>& last_method_patches, + uint32_t distance_without_thunks) { + CHECK_EQ(distance_without_thunks % kArm64Alignment, 0u); + const uint32_t method1_offset = + CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader); + AddCompiledMethod(MethodRef(1u), method1_code, ArrayRef<LinkerPatch>(method1_patches)); + const uint32_t gap_start = + CompiledCode::AlignCode(method1_offset + method1_code.size(), kArm64); + + // We want to put the method3 at a very precise offset. + const uint32_t last_method_offset = method1_offset + distance_without_thunks; + const uint32_t gap_end = last_method_offset - sizeof(OatQuickMethodHeader); + CHECK(IsAligned<kArm64Alignment>(gap_end)); + + // Fill the gap with intermediate methods in chunks of 2MiB and the last in [2MiB, 4MiB). + // (This allows deduplicating the small chunks to avoid using 256MiB of memory for +-128MiB + // offsets by this test.) + uint32_t method_idx = 2u; + constexpr uint32_t kSmallChunkSize = 2 * MB; + std::vector<uint8_t> gap_code; + size_t gap_size = gap_end - gap_start; + for (; gap_size >= 2u * kSmallChunkSize; gap_size -= kSmallChunkSize) { + uint32_t chunk_code_size = kSmallChunkSize - sizeof(OatQuickMethodHeader); + gap_code.resize(chunk_code_size, 0u); + AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code), + ArrayRef<LinkerPatch>()); + method_idx += 1u; + } + uint32_t chunk_code_size = gap_size - sizeof(OatQuickMethodHeader); + gap_code.resize(chunk_code_size, 0u); + AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code), + ArrayRef<LinkerPatch>()); + method_idx += 1u; + + // Add the last method and link + AddCompiledMethod(MethodRef(method_idx), last_method_code, last_method_patches); + Link(); + + // Check assumptions. + CHECK_EQ(GetMethodOffset(1), method1_offset); + auto last_result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(last_result.first); + // There may be a thunk before method2. + if (last_result.second != last_method_offset) { + // Thunk present. Check that there's only one. + uint32_t aligned_thunk_size = CompiledCode::AlignCode(ThunkSize(), kArm64); + CHECK_EQ(last_result.second, last_method_offset + aligned_thunk_size); + } + return method_idx; + } + + uint32_t GetMethodOffset(uint32_t method_idx) { + auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(result.first); + CHECK_EQ(result.second & 3u, 0u); + return result.second; + } + + uint32_t ThunkSize() { + return static_cast<Arm64RelativePatcher*>(patcher_.get())->thunk_code_.size(); + } + + bool CheckThunk(uint32_t thunk_offset) { + Arm64RelativePatcher* patcher = static_cast<Arm64RelativePatcher*>(patcher_.get()); + ArrayRef<const uint8_t> expected_code(patcher->thunk_code_); + if (output_.size() < thunk_offset + expected_code.size()) { + LOG(ERROR) << "output_.size() == " << output_.size() << " < " + << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size()); + return false; + } + ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size()); + if (linked_code == expected_code) { + return true; + } + // Log failure info. + DumpDiff(expected_code, linked_code); + return false; + } + + std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) { + std::vector<uint8_t> result; + result.reserve(num_nops * 4u + 4u); + for (size_t i = 0; i != num_nops; ++i) { + result.insert(result.end(), kNopCode.begin(), kNopCode.end()); + } + result.push_back(static_cast<uint8_t>(bl)); + result.push_back(static_cast<uint8_t>(bl >> 8)); + result.push_back(static_cast<uint8_t>(bl >> 16)); + result.push_back(static_cast<uint8_t>(bl >> 24)); + return result; + } + + std::vector<uint8_t> GenNopsAndAdrpLdr(size_t num_nops, + uint32_t method_offset, uint32_t target_offset) { + std::vector<uint8_t> result; + result.reserve(num_nops * 4u + 8u); + for (size_t i = 0; i != num_nops; ++i) { + result.insert(result.end(), kNopCode.begin(), kNopCode.end()); + } + DCHECK_EQ(method_offset & 3u, 0u); + DCHECK_EQ(target_offset & 3u, 0u); + uint32_t adrp_offset = method_offset + num_nops * 4u; + uint32_t disp = target_offset - (adrp_offset & ~0xfffu); + DCHECK_EQ(disp & 3u, 0u); + uint32_t ldr = 0xb9400001 | // LDR w1, [x0, #(imm12 * 2)] + ((disp & 0xfffu) << (10 - 2)); // imm12 = ((disp & 0xfffu) >> 2) is at bit 10. + uint32_t adrp = 0x90000000 | // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64) + ((disp & 0x3000u) << (29 - 12)) | // immlo = ((disp & 0x3000u) >> 12) is at bit 29, + ((disp & 0xffffc000) >> (14 - 5)) | // immhi = (disp >> 14) is at bit 5, + // We take the sign bit from the disp, limiting disp to +- 2GiB. + ((disp & 0x80000000) >> (31 - 23)); // sign bit in immhi is at bit 23. + result.push_back(static_cast<uint8_t>(adrp)); + result.push_back(static_cast<uint8_t>(adrp >> 8)); + result.push_back(static_cast<uint8_t>(adrp >> 16)); + result.push_back(static_cast<uint8_t>(adrp >> 24)); + result.push_back(static_cast<uint8_t>(ldr)); + result.push_back(static_cast<uint8_t>(ldr >> 8)); + result.push_back(static_cast<uint8_t>(ldr >> 16)); + result.push_back(static_cast<uint8_t>(ldr >> 24)); + return result; + } + + void TestNopsAdrpLdr(size_t num_nops, uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + dex_cache_arrays_begin_ = dex_cache_arrays_begin; + auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched. + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(num_nops * 4u , nullptr, num_nops * 4u, element_offset), + LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, element_offset), + }; + AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code), ArrayRef<LinkerPatch>(patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t target_offset = dex_cache_arrays_begin_ + element_offset; + auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); + } + + void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) { + CHECK_LE(pos, code->size()); + const uint8_t insn_code[] = { + static_cast<uint8_t>(insn), static_cast<uint8_t>(insn >> 8), + static_cast<uint8_t>(insn >> 16), static_cast<uint8_t>(insn >> 24), + }; + static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code)."); + code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); + } + + void PrepareNopsAdrpInsn2Ldr(size_t num_nops, uint32_t insn2, + uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + dex_cache_arrays_begin_ = dex_cache_arrays_begin; + auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched. + InsertInsn(&code, num_nops * 4u + 4u, insn2); + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(num_nops * 4u , nullptr, num_nops * 4u, element_offset), + LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, element_offset), + }; + AddCompiledMethod(MethodRef(1u), ArrayRef<const uint8_t>(code), ArrayRef<LinkerPatch>(patches)); + Link(); + } + + void TestNopsAdrpInsn2Ldr(size_t num_nops, uint32_t insn2, + uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t target_offset = dex_cache_arrays_begin_ + element_offset; + auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset); + InsertInsn(&expected_code, num_nops * 4u + 4u, insn2); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); + } + + void TestNopsAdrpInsn2LdrHasThunk(size_t num_nops, uint32_t insn2, + uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + PrepareNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset); + + uint32_t method1_offset = GetMethodOffset(1u); + CHECK(!compiled_method_refs_.empty()); + CHECK_EQ(compiled_method_refs_[0].dex_method_index, 1u); + CHECK_EQ(compiled_method_refs_.size(), compiled_methods_.size()); + uint32_t method1_size = compiled_methods_[0]->GetQuickCode()->size(); + uint32_t thunk_offset = CompiledCode::AlignCode(method1_offset + method1_size, kArm64); + uint32_t b_diff = thunk_offset - (method1_offset + num_nops * 4u); + ASSERT_EQ(b_diff & 3u, 0u); + ASSERT_LT(b_diff, 128 * MB); + uint32_t b_out = kBPlus0 + ((b_diff >> 2) & 0x03ffffffu); + uint32_t b_in = kBPlus0 + ((-b_diff >> 2) & 0x03ffffffu); + + uint32_t target_offset = dex_cache_arrays_begin_ + element_offset; + auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset); + InsertInsn(&expected_code, num_nops * 4u + 4u, insn2); + // Replace adrp with bl. + expected_code.erase(expected_code.begin() + num_nops * 4u, + expected_code.begin() + num_nops * 4u + 4u); + InsertInsn(&expected_code, num_nops * 4u, b_out); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); + + auto expected_thunk_code = GenNopsAndAdrpLdr(0u, thunk_offset, target_offset); + ASSERT_EQ(expected_thunk_code.size(), 8u); + expected_thunk_code.erase(expected_thunk_code.begin() + 4u, expected_thunk_code.begin() + 8u); + InsertInsn(&expected_thunk_code, 4u, b_in); + ASSERT_EQ(expected_thunk_code.size(), 8u); + + uint32_t thunk_size = ThunkSize(); + ASSERT_EQ(thunk_offset + thunk_size, output_.size()); + ASSERT_EQ(thunk_size, expected_thunk_code.size()); + ArrayRef<const uint8_t> thunk_code(&output_[thunk_offset], thunk_size); + if (ArrayRef<const uint8_t>(expected_thunk_code) != thunk_code) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk_code), thunk_code); + FAIL(); + } + } + + void TestAdrpLdurLdr(uint32_t adrp_offset, bool has_thunk, + uint32_t dex_cache_arrays_begin, uint32_t element_offset) { + uint32_t method1_offset = + CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader); + ASSERT_LT(method1_offset, adrp_offset); + ASSERT_EQ(adrp_offset & 3u, 0u); + uint32_t num_nops = (adrp_offset - method1_offset) / 4u; + if (has_thunk) { + TestNopsAdrpInsn2LdrHasThunk(num_nops, kLdurInsn, dex_cache_arrays_begin, element_offset); + } else { + TestNopsAdrpInsn2Ldr(num_nops, kLdurInsn, dex_cache_arrays_begin, element_offset); + } + ASSERT_EQ(method1_offset, GetMethodOffset(1u)); // If this fails, num_nops is wrong. + } +}; + +const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = { + 0x00, 0x00, 0x00, 0x94 +}; + +const ArrayRef<const uint8_t> Arm64RelativePatcherTest::kCallCode(kCallRawCode); + +const uint8_t Arm64RelativePatcherTest::kNopRawCode[] = { + 0x1f, 0x20, 0x03, 0xd5 +}; + +const ArrayRef<const uint8_t> Arm64RelativePatcherTest::kNopCode(kNopRawCode); + +class Arm64RelativePatcherTestDefault : public Arm64RelativePatcherTest { + public: + Arm64RelativePatcherTestDefault() : Arm64RelativePatcherTest("default") { } +}; + +class Arm64RelativePatcherTestDenver64 : public Arm64RelativePatcherTest { + public: + Arm64RelativePatcherTestDenver64() : Arm64RelativePatcherTest("denver64") { } +}; + +TEST_F(Arm64RelativePatcherTestDefault, CallSelf) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches)); + Link(); + + static const uint8_t expected_code[] = { + 0x00, 0x00, 0x00, 0x94 + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Arm64RelativePatcherTestDefault, CallOther) { + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(method1_patches)); + LinkerPatch method2_patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<LinkerPatch>(method2_patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t method2_offset = GetMethodOffset(2u); + uint32_t diff_after = method2_offset - method1_offset; + ASSERT_EQ(diff_after & 3u, 0u); + ASSERT_LT(diff_after >> 2, 1u << 8); // Simple encoding, (diff_after >> 2) fits into 8 bits. + static const uint8_t method1_expected_code[] = { + static_cast<uint8_t>(diff_after >> 2), 0x00, 0x00, 0x94 + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); + uint32_t diff_before = method1_offset - method2_offset; + ASSERT_EQ(diff_before & 3u, 0u); + ASSERT_GE(diff_before, -1u << 27); + auto method2_expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff_before >> 2) & 0x03ffffffu)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); +} + +TEST_F(Arm64RelativePatcherTestDefault, CallTrampoline) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t diff = kTrampolineOffset - method1_offset; + ASSERT_EQ(diff & 1u, 0u); + ASSERT_GE(diff, -1u << 9); // Simple encoding, -256 <= (diff >> 1) < 0 (checked as unsigned). + auto expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff >> 2) & 0x03ffffffu)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarAfter) { + auto method1_raw_code = GenNopsAndBl(1u, kBlPlus0); + constexpr uint32_t bl_offset_in_method1 = 1u * 4u; // After NOPs. + ArrayRef<const uint8_t> method1_code(method1_raw_code); + ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); + uint32_t expected_last_method_idx = 65; // Based on 2MiB chunks in Create2MethodsWithGap(). + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx), + }; + + constexpr uint32_t max_positive_disp = 128 * MB - 4u; + uint32_t last_method_idx = Create2MethodsWithGap(method1_code, method1_patches, + kNopCode, ArrayRef<LinkerPatch>(), + bl_offset_in_method1 + max_positive_disp); + ASSERT_EQ(expected_last_method_idx, last_method_idx); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(last_method_idx); + ASSERT_EQ(method1_offset + bl_offset_in_method1 + max_positive_disp, last_method_offset); + + // Check linked code. + auto expected_code = GenNopsAndBl(1u, kBlPlusMax); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarBefore) { + auto last_method_raw_code = GenNopsAndBl(0u, kBlPlus0); + constexpr uint32_t bl_offset_in_last_method = 0u * 4u; // After NOPs. + ArrayRef<const uint8_t> last_method_code(last_method_raw_code); + ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size()); + LinkerPatch last_method_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u), + }; + + constexpr uint32_t max_negative_disp = 128 * MB; + uint32_t last_method_idx = Create2MethodsWithGap(kNopCode, ArrayRef<LinkerPatch>(), + last_method_code, last_method_patches, + max_negative_disp - bl_offset_in_last_method); + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(last_method_idx); + ASSERT_EQ(method1_offset, last_method_offset + bl_offset_in_last_method - max_negative_disp); + + // Check linked code. + auto expected_code = GenNopsAndBl(0u, kBlMinusMax); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx), + ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarAfter) { + auto method1_raw_code = GenNopsAndBl(0u, kBlPlus0); + constexpr uint32_t bl_offset_in_method1 = 0u * 4u; // After NOPs. + ArrayRef<const uint8_t> method1_code(method1_raw_code); + ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); + uint32_t expected_last_method_idx = 65; // Based on 2MiB chunks in Create2MethodsWithGap(). + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx), + }; + + constexpr uint32_t just_over_max_positive_disp = 128 * MB; + uint32_t last_method_idx = Create2MethodsWithGap( + method1_code, method1_patches, kNopCode, ArrayRef<LinkerPatch>(), + bl_offset_in_method1 + just_over_max_positive_disp); + ASSERT_EQ(expected_last_method_idx, last_method_idx); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(last_method_idx); + uint32_t last_method_header_offset = last_method_offset - sizeof(OatQuickMethodHeader); + ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_header_offset)); + uint32_t thunk_offset = last_method_header_offset - CompiledCode::AlignCode(ThunkSize(), kArm64); + ASSERT_TRUE(IsAligned<kArm64Alignment>(thunk_offset)); + uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1); + ASSERT_EQ(diff & 3u, 0u); + ASSERT_LT(diff, 128 * MB); + auto expected_code = GenNopsAndBl(0u, kBlPlus0 | (diff >> 2)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); + CheckThunk(thunk_offset); +} + +TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarBefore) { + auto last_method_raw_code = GenNopsAndBl(1u, kBlPlus0); + constexpr uint32_t bl_offset_in_last_method = 1u * 4u; // After NOPs. + ArrayRef<const uint8_t> last_method_code(last_method_raw_code); + ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size()); + LinkerPatch last_method_patches[] = { + LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u), + }; + + constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4; + uint32_t last_method_idx = Create2MethodsWithGap( + kNopCode, ArrayRef<LinkerPatch>(), last_method_code, last_method_patches, + just_over_max_negative_disp - bl_offset_in_last_method); + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(last_method_idx); + ASSERT_EQ(method1_offset, + last_method_offset + bl_offset_in_last_method - just_over_max_negative_disp); + + // Check linked code. + uint32_t thunk_offset = + CompiledCode::AlignCode(last_method_offset + last_method_code.size(), kArm64); + uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method); + ASSERT_EQ(diff & 3u, 0u); + ASSERT_LT(diff, 128 * MB); + auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2)); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx), + ArrayRef<const uint8_t>(expected_code))); + EXPECT_TRUE(CheckThunk(thunk_offset)); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference1) { + TestNopsAdrpLdr(0u, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference2) { + TestNopsAdrpLdr(0u, -0x12345678u, 0x4444u); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference3) { + TestNopsAdrpLdr(0u, 0x12345000u, 0x3ffcu); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference4) { + TestNopsAdrpLdr(0u, 0x12345000u, 0x4000u); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xff4) { + TestAdrpLdurLdr(0xff4u, false, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xff8) { + TestAdrpLdurLdr(0xff8u, true, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0xffc) { + TestAdrpLdurLdr(0xffcu, true, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference0x1000) { + TestAdrpLdurLdr(0x1000u, false, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xff4) { + TestAdrpLdurLdr(0xff4u, false, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xff8) { + TestAdrpLdurLdr(0xff8u, false, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0xffc) { + TestAdrpLdurLdr(0xffcu, false, 0x12345678u, 0x1234u); +} + +TEST_F(Arm64RelativePatcherTestDenver64, DexCacheReference0x1000) { + TestAdrpLdurLdr(0x1000u, false, 0x12345678u, 0x1234u); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc new file mode 100644 index 0000000000..8ee87aa339 --- /dev/null +++ b/compiler/linker/relative_patcher.cc @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher.h" + +#include "linker/arm/relative_patcher_thumb2.h" +#include "linker/arm64/relative_patcher_arm64.h" +#include "linker/x86/relative_patcher_x86.h" +#include "linker/x86_64/relative_patcher_x86_64.h" +#include "output_stream.h" + +namespace art { +namespace linker { + +std::unique_ptr<RelativePatcher> RelativePatcher::Create( + InstructionSet instruction_set, const InstructionSetFeatures* features, + RelativePatcherTargetProvider* provider) { + class RelativePatcherNone FINAL : public RelativePatcher { + public: + RelativePatcherNone() { } + + uint32_t ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, + MethodReference method_ref ATTRIBUTE_UNUSED) OVERRIDE { + return offset; // No space reserved; no patches expected. + } + + uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE { + return offset; // No space reserved; no patches expected. + } + + uint32_t WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) OVERRIDE { + return offset; // No thunks added; no patches expected. + } + + void PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + uint32_t literal_offset ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED, + uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE { + LOG(FATAL) << "Unexpected relative call patch."; + } + + virtual void PatchDexCacheReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + const LinkerPatch& patch ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED, + uint32_t target_offset ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unexpected relative dex cache array patch."; + } + + private: + DISALLOW_COPY_AND_ASSIGN(RelativePatcherNone); + }; + + switch (instruction_set) { + case kX86: + return std::unique_ptr<RelativePatcher>(new X86RelativePatcher()); + break; + case kX86_64: + return std::unique_ptr<RelativePatcher>(new X86_64RelativePatcher()); + break; + case kArm: + // Fall through: we generate Thumb2 code for "arm". + case kThumb2: + return std::unique_ptr<RelativePatcher>(new Thumb2RelativePatcher(provider)); + break; + case kArm64: + return std::unique_ptr<RelativePatcher>( + new Arm64RelativePatcher(provider, features->AsArm64InstructionSetFeatures())); + break; + default: + return std::unique_ptr<RelativePatcher>(new RelativePatcherNone); + break; + } +} + +bool RelativePatcher::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) { + static const uint8_t kPadding[] = { + 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u + }; + DCHECK_LE(aligned_code_delta, sizeof(kPadding)); + if (UNLIKELY(!out->WriteFully(kPadding, aligned_code_delta))) { + return false; + } + size_code_alignment_ += aligned_code_delta; + return true; +} + +bool RelativePatcher::WriteRelCallThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) { + if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) { + return false; + } + size_relative_call_thunks_ += thunk.size(); + return true; +} + +bool RelativePatcher::WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) { + if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) { + return false; + } + size_misc_thunks_ += thunk.size(); + return true; +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/relative_patcher.h b/compiler/linker/relative_patcher.h new file mode 100644 index 0000000000..8a9f3f8364 --- /dev/null +++ b/compiler/linker/relative_patcher.h @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_RELATIVE_PATCHER_H_ +#define ART_COMPILER_LINKER_RELATIVE_PATCHER_H_ + +#include <vector> + +#include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" +#include "base/macros.h" +#include "method_reference.h" +#include "utils/array_ref.h" + +namespace art { + +class CompiledMethod; +class LinkerPatch; +class OutputStream; + +namespace linker { + +/** + * @class RelativePatcherTargetProvider + * @brief Interface for providing method offsets for relative call targets. + */ +class RelativePatcherTargetProvider { + public: + /** + * Find the offset of the target method of a relative call if known. + * + * The process of assigning target method offsets includes calls to the relative patcher's + * ReserveSpace() which in turn can use FindMethodOffset() to determine if a method already + * has an offset assigned and, if so, what's that offset. If the offset has not yet been + * assigned or if it's too far for the particular architecture's relative call, + * ReserveSpace() may need to allocate space for a special dispatch thunk. + * + * @param ref the target method of the relative call. + * @return true in the first element of the pair if the method was found, false otherwise; + * if found, the second element specifies the offset. + */ + virtual std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) = 0; + + protected: + virtual ~RelativePatcherTargetProvider() { } +}; + +/** + * @class RelativePatcher + * @brief Interface for architecture-specific link-time patching of PC-relative references. + */ +class RelativePatcher { + public: + static std::unique_ptr<RelativePatcher> Create( + InstructionSet instruction_set, const InstructionSetFeatures* features, + RelativePatcherTargetProvider* provider); + + virtual ~RelativePatcher() { } + + uint32_t CodeAlignmentSize() const { + return size_code_alignment_; + } + + uint32_t RelativeCallThunksSize() const { + return size_relative_call_thunks_; + } + + uint32_t MiscThunksSize() const { + return size_misc_thunks_; + } + + // Reserve space for thunks if needed before a method, return adjusted offset. + virtual uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method, + MethodReference method_ref) = 0; + + // Reserve space for thunks if needed after the last method, return adjusted offset. + virtual uint32_t ReserveSpaceEnd(uint32_t offset) = 0; + + // Write relative call thunks if needed, return adjusted offset. + virtual uint32_t WriteThunks(OutputStream* out, uint32_t offset) = 0; + + // Patch method code. The input displacement is relative to the patched location, + // the patcher may need to adjust it if the correct base is different. + virtual void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, + uint32_t patch_offset, uint32_t target_offset) = 0; + + // Patch a reference to a dex cache location. + virtual void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, + uint32_t patch_offset, uint32_t target_offset) = 0; + + protected: + RelativePatcher() + : size_code_alignment_(0u), + size_relative_call_thunks_(0u), + size_misc_thunks_(0u) { + } + + bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta); + bool WriteRelCallThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk); + bool WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk); + + private: + uint32_t size_code_alignment_; + uint32_t size_relative_call_thunks_; + uint32_t size_misc_thunks_; + + DISALLOW_COPY_AND_ASSIGN(RelativePatcher); +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_RELATIVE_PATCHER_H_ diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h new file mode 100644 index 0000000000..08167b3309 --- /dev/null +++ b/compiler/linker/relative_patcher_test.h @@ -0,0 +1,255 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_ +#define ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_ + +#include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" +#include "base/macros.h" +#include "compiled_method.h" +#include "dex/quick/dex_file_to_method_inliner_map.h" +#include "dex/verification_results.h" +#include "driver/compiler_driver.h" +#include "driver/compiler_options.h" +#include "globals.h" +#include "gtest/gtest.h" +#include "linker/relative_patcher.h" +#include "method_reference.h" +#include "oat.h" +#include "utils/array_ref.h" +#include "vector_output_stream.h" + +namespace art { +namespace linker { + +// Base class providing infrastructure for architecture-specific tests. +class RelativePatcherTest : public testing::Test { + protected: + RelativePatcherTest(InstructionSet instruction_set, const std::string& variant) + : compiler_options_(), + verification_results_(&compiler_options_), + inliner_map_(), + driver_(&compiler_options_, &verification_results_, &inliner_map_, + Compiler::kQuick, instruction_set, nullptr, + false, nullptr, nullptr, 1u, + false, false, "", nullptr, -1, ""), + error_msg_(), + instruction_set_(instruction_set), + features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)), + method_offset_map_(), + patcher_(RelativePatcher::Create(instruction_set, features_.get(), &method_offset_map_)), + dex_cache_arrays_begin_(0u), + compiled_method_refs_(), + compiled_methods_(), + patched_code_(), + output_(), + out_("test output stream", &output_) { + CHECK(error_msg_.empty()) << instruction_set << "/" << variant; + patched_code_.reserve(16 * KB); + } + + MethodReference MethodRef(uint32_t method_idx) { + CHECK_NE(method_idx, 0u); + return MethodReference(nullptr, method_idx); + } + + void AddCompiledMethod(MethodReference method_ref, + const ArrayRef<const uint8_t>& code, + const ArrayRef<LinkerPatch>& patches) { + compiled_method_refs_.push_back(method_ref); + compiled_methods_.emplace_back(new CompiledMethod( + &driver_, instruction_set_, code, + 0u, 0u, 0u, nullptr, ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(), + ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(), + patches)); + } + + void Link() { + // Reserve space. + static_assert(kTrampolineOffset == 0u, "Unexpected trampoline offset."); + uint32_t offset = kTrampolineSize; + size_t idx = 0u; + for (auto& compiled_method : compiled_methods_) { + offset = patcher_->ReserveSpace(offset, compiled_method.get(), compiled_method_refs_[idx]); + + uint32_t aligned_offset = compiled_method->AlignCode(offset); + uint32_t aligned_code_delta = aligned_offset - offset; + offset += aligned_code_delta; + + offset += sizeof(OatQuickMethodHeader); + uint32_t quick_code_offset = offset + compiled_method->CodeDelta(); + const auto& code = *compiled_method->GetQuickCode(); + offset += code.size(); + + method_offset_map_.map.Put(compiled_method_refs_[idx], quick_code_offset); + ++idx; + } + offset = patcher_->ReserveSpaceEnd(offset); + uint32_t output_size = offset; + output_.reserve(output_size); + + // Write data. + DCHECK(output_.empty()); + uint8_t dummy_trampoline[kTrampolineSize]; + memset(dummy_trampoline, 0, sizeof(dummy_trampoline)); + out_.WriteFully(dummy_trampoline, kTrampolineSize); + offset = kTrampolineSize; + static const uint8_t kPadding[] = { + 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u + }; + uint8_t dummy_header[sizeof(OatQuickMethodHeader)]; + memset(dummy_header, 0, sizeof(dummy_header)); + for (auto& compiled_method : compiled_methods_) { + offset = patcher_->WriteThunks(&out_, offset); + + uint32_t aligned_offset = compiled_method->AlignCode(offset); + uint32_t aligned_code_delta = aligned_offset - offset; + CHECK_LE(aligned_code_delta, sizeof(kPadding)); + out_.WriteFully(kPadding, aligned_code_delta); + offset += aligned_code_delta; + + out_.WriteFully(dummy_header, sizeof(OatQuickMethodHeader)); + offset += sizeof(OatQuickMethodHeader); + ArrayRef<const uint8_t> code(*compiled_method->GetQuickCode()); + if (!compiled_method->GetPatches().empty()) { + patched_code_.assign(code.begin(), code.end()); + code = ArrayRef<const uint8_t>(patched_code_); + for (const LinkerPatch& patch : compiled_method->GetPatches()) { + if (patch.Type() == kLinkerPatchCallRelative) { + auto result = method_offset_map_.FindMethodOffset(patch.TargetMethod()); + uint32_t target_offset = + result.first ? result.second : kTrampolineOffset + compiled_method->CodeDelta(); + patcher_->PatchCall(&patched_code_, patch.LiteralOffset(), + offset + patch.LiteralOffset(), target_offset); + } else if (patch.Type() == kLinkerPatchDexCacheArray) { + uint32_t target_offset = dex_cache_arrays_begin_ + patch.TargetDexCacheElementOffset(); + patcher_->PatchDexCacheReference(&patched_code_, patch, + offset + patch.LiteralOffset(), target_offset); + } else { + LOG(FATAL) << "Bad patch type."; + } + } + } + out_.WriteFully(&code[0], code.size()); + offset += code.size(); + } + offset = patcher_->WriteThunks(&out_, offset); + CHECK_EQ(offset, output_size); + CHECK_EQ(output_.size(), output_size); + } + + bool CheckLinkedMethod(MethodReference method_ref, const ArrayRef<const uint8_t>& expected_code) { + // Sanity check: original code size must match linked_code.size(). + size_t idx = 0u; + for (auto ref : compiled_method_refs_) { + if (ref.dex_file == method_ref.dex_file && + ref.dex_method_index == method_ref.dex_method_index) { + break; + } + ++idx; + } + CHECK_NE(idx, compiled_method_refs_.size()); + CHECK_EQ(compiled_methods_[idx]->GetQuickCode()->size(), expected_code.size()); + + auto result = method_offset_map_.FindMethodOffset(method_ref); + CHECK(result.first); // Must have been linked. + size_t offset = result.second - compiled_methods_[idx]->CodeDelta(); + CHECK_LT(offset, output_.size()); + CHECK_LE(offset + expected_code.size(), output_.size()); + ArrayRef<const uint8_t> linked_code(&output_[offset], expected_code.size()); + if (linked_code == expected_code) { + return true; + } + // Log failure info. + DumpDiff(expected_code, linked_code); + return false; + } + + void DumpDiff(const ArrayRef<const uint8_t>& expected_code, + const ArrayRef<const uint8_t>& linked_code) { + std::ostringstream expected_hex; + std::ostringstream linked_hex; + std::ostringstream diff_indicator; + static const char digits[] = "0123456789abcdef"; + bool found_diff = false; + for (size_t i = 0; i != expected_code.size(); ++i) { + expected_hex << " " << digits[expected_code[i] >> 4] << digits[expected_code[i] & 0xf]; + linked_hex << " " << digits[linked_code[i] >> 4] << digits[linked_code[i] & 0xf]; + if (!found_diff) { + found_diff = (expected_code[i] != linked_code[i]); + diff_indicator << (found_diff ? " ^^" : " "); + } + } + CHECK(found_diff); + std::string expected_hex_str = expected_hex.str(); + std::string linked_hex_str = linked_hex.str(); + std::string diff_indicator_str = diff_indicator.str(); + if (diff_indicator_str.length() > 60) { + CHECK_EQ(diff_indicator_str.length() % 3u, 0u); + size_t remove = diff_indicator_str.length() / 3 - 5; + std::ostringstream oss; + oss << "[stripped " << remove << "]"; + std::string replacement = oss.str(); + expected_hex_str.replace(0u, remove * 3u, replacement); + linked_hex_str.replace(0u, remove * 3u, replacement); + diff_indicator_str.replace(0u, remove * 3u, replacement); + } + LOG(ERROR) << "diff expected_code linked_code"; + LOG(ERROR) << "<" << expected_hex_str; + LOG(ERROR) << ">" << linked_hex_str; + LOG(ERROR) << " " << diff_indicator_str; + } + + // Map method reference to assinged offset. + // Wrap the map in a class implementing linker::RelativePatcherTargetProvider. + class MethodOffsetMap FINAL : public linker::RelativePatcherTargetProvider { + public: + std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) OVERRIDE { + auto it = map.find(ref); + if (it == map.end()) { + return std::pair<bool, uint32_t>(false, 0u); + } else { + return std::pair<bool, uint32_t>(true, it->second); + } + } + SafeMap<MethodReference, uint32_t, MethodReferenceComparator> map; + }; + + static const uint32_t kTrampolineSize = 4u; + static const uint32_t kTrampolineOffset = 0u; + + CompilerOptions compiler_options_; + VerificationResults verification_results_; + DexFileToMethodInlinerMap inliner_map_; + CompilerDriver driver_; // Needed for constructing CompiledMethod. + std::string error_msg_; + InstructionSet instruction_set_; + std::unique_ptr<const InstructionSetFeatures> features_; + MethodOffsetMap method_offset_map_; + std::unique_ptr<RelativePatcher> patcher_; + uint32_t dex_cache_arrays_begin_; + std::vector<MethodReference> compiled_method_refs_; + std::vector<std::unique_ptr<CompiledMethod>> compiled_methods_; + std::vector<uint8_t> patched_code_; + std::vector<uint8_t> output_; + VectorOutputStream out_; +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_ diff --git a/compiler/linker/x86/relative_patcher_x86.cc b/compiler/linker/x86/relative_patcher_x86.cc new file mode 100644 index 0000000000..246cf11dae --- /dev/null +++ b/compiler/linker/x86/relative_patcher_x86.cc @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/x86/relative_patcher_x86.h" + +namespace art { +namespace linker { + +void X86RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + const LinkerPatch& patch ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED, + uint32_t target_offset ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unexpected relative dex cache array patch."; +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/x86/relative_patcher_x86.h b/compiler/linker/x86/relative_patcher_x86.h new file mode 100644 index 0000000000..0c881f00ba --- /dev/null +++ b/compiler/linker/x86/relative_patcher_x86.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_ +#define ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_ + +#include "linker/x86/relative_patcher_x86_base.h" + +namespace art { +namespace linker { + +class X86RelativePatcher FINAL : public X86BaseRelativePatcher { + public: + X86RelativePatcher() { } + + void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, + uint32_t patch_offset, uint32_t target_offset) OVERRIDE; +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_ diff --git a/compiler/linker/x86/relative_patcher_x86_base.cc b/compiler/linker/x86/relative_patcher_x86_base.cc new file mode 100644 index 0000000000..bc285a7849 --- /dev/null +++ b/compiler/linker/x86/relative_patcher_x86_base.cc @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/x86/relative_patcher_x86_base.h" + +namespace art { +namespace linker { + +uint32_t X86BaseRelativePatcher::ReserveSpace( + uint32_t offset, + const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, + MethodReference method_ref ATTRIBUTE_UNUSED) { + return offset; // No space reserved; no limit on relative call distance. +} + +uint32_t X86BaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) { + return offset; // No space reserved; no limit on relative call distance. +} + +uint32_t X86BaseRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) { + return offset; // No thunks added; no limit on relative call distance. +} + +void X86BaseRelativePatcher::PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, + uint32_t patch_offset, uint32_t target_offset) { + DCHECK_LE(literal_offset + 4u, code->size()); + // Unsigned arithmetic with its well-defined overflow behavior is just fine here. + uint32_t displacement = target_offset - patch_offset; + displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. + + typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t; + reinterpret_cast<unaligned_int32_t*>(&(*code)[literal_offset])[0] = displacement; +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/x86/relative_patcher_x86_base.h b/compiler/linker/x86/relative_patcher_x86_base.h new file mode 100644 index 0000000000..9200709398 --- /dev/null +++ b/compiler/linker/x86/relative_patcher_x86_base.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_ +#define ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_ + +#include "linker/relative_patcher.h" + +namespace art { +namespace linker { + +class X86BaseRelativePatcher : public RelativePatcher { + public: + uint32_t ReserveSpace(uint32_t offset, + const CompiledMethod* compiled_method, + MethodReference method_ref) OVERRIDE; + uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; + uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; + void PatchCall(std::vector<uint8_t>* code, uint32_t literal_offset, + uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + + protected: + X86BaseRelativePatcher() { } + + // PC displacement from patch location; the base address of x86/x86-64 relative + // calls and x86-64 RIP-relative addressing is the PC of the next instruction and + // the patch location is 4 bytes earlier. + static constexpr int32_t kPcDisplacement = 4; + + private: + DISALLOW_COPY_AND_ASSIGN(X86BaseRelativePatcher); +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_ diff --git a/compiler/linker/x86/relative_patcher_x86_test.cc b/compiler/linker/x86/relative_patcher_x86_test.cc new file mode 100644 index 0000000000..c18a743b6b --- /dev/null +++ b/compiler/linker/x86/relative_patcher_x86_test.cc @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher_test.h" +#include "linker/x86/relative_patcher_x86.h" + +namespace art { +namespace linker { + +class X86RelativePatcherTest : public RelativePatcherTest { + public: + X86RelativePatcherTest() : RelativePatcherTest(kX86, "default") { } + + protected: + static const uint8_t kCallRawCode[]; + static const ArrayRef<const uint8_t> kCallCode; + + uint32_t GetMethodOffset(uint32_t method_idx) { + auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(result.first); + return result.second; + } +}; + +const uint8_t X86RelativePatcherTest::kCallRawCode[] = { + 0xe8, 0x00, 0x01, 0x00, 0x00 +}; + +const ArrayRef<const uint8_t> X86RelativePatcherTest::kCallCode(kCallRawCode); + +TEST_F(X86RelativePatcherTest, CallSelf) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches)); + Link(); + + static const uint8_t expected_code[] = { + 0xe8, 0xfb, 0xff, 0xff, 0xff + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(X86RelativePatcherTest, CallOther) { + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(method1_patches)); + LinkerPatch method2_patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<LinkerPatch>(method2_patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t method2_offset = GetMethodOffset(2u); + uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */); + static const uint8_t method1_expected_code[] = { + 0xe8, + static_cast<uint8_t>(diff_after), static_cast<uint8_t>(diff_after >> 8), + static_cast<uint8_t>(diff_after >> 16), static_cast<uint8_t>(diff_after >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); + uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */); + static const uint8_t method2_expected_code[] = { + 0xe8, + static_cast<uint8_t>(diff_before), static_cast<uint8_t>(diff_before >> 8), + static_cast<uint8_t>(diff_before >> 16), static_cast<uint8_t>(diff_before >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); +} + +TEST_F(X86RelativePatcherTest, CallTrampoline) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches)); + Link(); + + auto result = method_offset_map_.FindMethodOffset(MethodRef(1)); + ASSERT_TRUE(result.first); + uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size()); + static const uint8_t expected_code[] = { + 0xe8, + static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), + static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.cc b/compiler/linker/x86_64/relative_patcher_x86_64.cc new file mode 100644 index 0000000000..598f3ac4a8 --- /dev/null +++ b/compiler/linker/x86_64/relative_patcher_x86_64.cc @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/x86_64/relative_patcher_x86_64.h" + +#include "compiled_method.h" + +namespace art { +namespace linker { + +void X86_64RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset, uint32_t target_offset) { + DCHECK_LE(patch.LiteralOffset() + 4u, code->size()); + // Unsigned arithmetic with its well-defined overflow behavior is just fine here. + uint32_t displacement = target_offset - patch_offset; + displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. + + typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t; + reinterpret_cast<unaligned_int32_t*>(&(*code)[patch.LiteralOffset()])[0] = displacement; +} + +} // namespace linker +} // namespace art diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.h b/compiler/linker/x86_64/relative_patcher_x86_64.h new file mode 100644 index 0000000000..af687b4a2f --- /dev/null +++ b/compiler/linker/x86_64/relative_patcher_x86_64.h @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_ +#define ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_ + +#include "linker/x86/relative_patcher_x86_base.h" + +namespace art { +namespace linker { + +class X86_64RelativePatcher FINAL : public X86BaseRelativePatcher { + public: + X86_64RelativePatcher() { } + + void PatchDexCacheReference(std::vector<uint8_t>* code, const LinkerPatch& patch, + uint32_t patch_offset, uint32_t target_offset) OVERRIDE; +}; + +} // namespace linker +} // namespace art + +#endif // ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_ diff --git a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc new file mode 100644 index 0000000000..9d9529ced7 --- /dev/null +++ b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc @@ -0,0 +1,136 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "linker/relative_patcher_test.h" +#include "linker/x86_64/relative_patcher_x86_64.h" + +namespace art { +namespace linker { + +class X86_64RelativePatcherTest : public RelativePatcherTest { + public: + X86_64RelativePatcherTest() : RelativePatcherTest(kX86_64, "default") { } + + protected: + static const uint8_t kCallRawCode[]; + static const ArrayRef<const uint8_t> kCallCode; + static const uint8_t kDexCacheLoadRawCode[]; + static const ArrayRef<const uint8_t> kDexCacheLoadCode; + + uint32_t GetMethodOffset(uint32_t method_idx) { + auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); + CHECK(result.first); + return result.second; + } +}; + +const uint8_t X86_64RelativePatcherTest::kCallRawCode[] = { + 0xe8, 0x00, 0x01, 0x00, 0x00 +}; + +const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kCallCode(kCallRawCode); + +const uint8_t X86_64RelativePatcherTest::kDexCacheLoadRawCode[] = { + 0x8b, 0x05, // mov eax, [rip + <offset>] + 0x00, 0x01, 0x00, 0x00 +}; + +const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kDexCacheLoadCode( + kDexCacheLoadRawCode); + +TEST_F(X86_64RelativePatcherTest, CallSelf) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches)); + Link(); + + static const uint8_t expected_code[] = { + 0xe8, 0xfb, 0xff, 0xff, 0xff + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(X86_64RelativePatcherTest, CallOther) { + LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(method1_patches)); + LinkerPatch method2_patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), + }; + AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<LinkerPatch>(method2_patches)); + Link(); + + uint32_t method1_offset = GetMethodOffset(1u); + uint32_t method2_offset = GetMethodOffset(2u); + uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */); + static const uint8_t method1_expected_code[] = { + 0xe8, + static_cast<uint8_t>(diff_after), static_cast<uint8_t>(diff_after >> 8), + static_cast<uint8_t>(diff_after >> 16), static_cast<uint8_t>(diff_after >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); + uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */); + static const uint8_t method2_expected_code[] = { + 0xe8, + static_cast<uint8_t>(diff_before), static_cast<uint8_t>(diff_before >> 8), + static_cast<uint8_t>(diff_before >> 16), static_cast<uint8_t>(diff_before >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); +} + +TEST_F(X86_64RelativePatcherTest, CallTrampoline) { + LinkerPatch patches[] = { + LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), + }; + AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<LinkerPatch>(patches)); + Link(); + + auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); + ASSERT_TRUE(result.first); + uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size()); + static const uint8_t expected_code[] = { + 0xe8, + static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), + static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(X86_64RelativePatcherTest, DexCacheReference) { + dex_cache_arrays_begin_ = 0x12345678; + constexpr size_t kElementOffset = 0x1234; + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(kDexCacheLoadCode.size() - 4u, nullptr, 0u, kElementOffset), + }; + AddCompiledMethod(MethodRef(1u), kDexCacheLoadCode, ArrayRef<LinkerPatch>(patches)); + Link(); + + auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); + ASSERT_TRUE(result.first); + uint32_t diff = + dex_cache_arrays_begin_ + kElementOffset - (result.second + kDexCacheLoadCode.size()); + static const uint8_t expected_code[] = { + 0x8b, 0x05, + static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), + static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24) + }; + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +} // namespace linker +} // namespace art diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index afd39e8874..989b04fa36 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -17,11 +17,14 @@ #include "arch/instruction_set_features.h" #include "class_linker.h" #include "common_compiler_test.h" +#include "compiled_method.h" #include "compiler.h" #include "dex/pass_manager.h" #include "dex/quick/dex_file_to_method_inliner_map.h" #include "dex/quick_compiler_callbacks.h" #include "dex/verification_results.h" +#include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "entrypoints/quick/quick_entrypoints.h" #include "mirror/art_method-inl.h" #include "mirror/class-inl.h" @@ -173,7 +176,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(72U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(28U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(91 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); + EXPECT_EQ(92 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); } TEST_F(OatTest, OatHeaderIsValid) { diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index b3bb438bac..7120920773 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -18,16 +18,21 @@ #include <zlib.h> +#include "arch/arm64/instruction_set_features_arm64.h" #include "base/allocator.h" #include "base/bit_vector.h" #include "base/stl_util.h" #include "base/unix_file/fd_file.h" #include "class_linker.h" #include "compiled_class.h" +#include "compiled_method.h" #include "dex_file-inl.h" #include "dex/verification_results.h" +#include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "gc/space/space.h" #include "image_writer.h" +#include "linker/relative_patcher.h" #include "mirror/art_method-inl.h" #include "mirror/array.h" #include "mirror/class_loader.h" @@ -37,352 +42,10 @@ #include "safe_map.h" #include "scoped_thread_state_change.h" #include "handle_scope-inl.h" -#include "utils/arm/assembler_thumb2.h" -#include "utils/arm64/assembler_arm64.h" #include "verifier/method_verifier.h" namespace art { -class OatWriter::RelativeCallPatcher { - public: - virtual ~RelativeCallPatcher() { } - - // Reserve space for relative call thunks if needed, return adjusted offset. - // After all methods have been processed it's call one last time with compiled_method == nullptr. - virtual uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) = 0; - - // Write relative call thunks if needed, return adjusted offset. - virtual uint32_t WriteThunks(OutputStream* out, uint32_t offset) = 0; - - // Patch method code. The input displacement is relative to the patched location, - // the patcher may need to adjust it if the correct base is different. - virtual void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset, - uint32_t target_offset) = 0; - - protected: - RelativeCallPatcher() { } - - private: - DISALLOW_COPY_AND_ASSIGN(RelativeCallPatcher); -}; - -class OatWriter::NoRelativeCallPatcher FINAL : public RelativeCallPatcher { - public: - NoRelativeCallPatcher() { } - - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method ATTRIBUTE_UNUSED) OVERRIDE { - return offset; // No space reserved; no patches expected. - } - - uint32_t WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) OVERRIDE { - return offset; // No thunks added; no patches expected. - } - - void Patch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, uint32_t literal_offset ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED, - uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE { - LOG(FATAL) << "Unexpected relative patch."; - } - - private: - DISALLOW_COPY_AND_ASSIGN(NoRelativeCallPatcher); -}; - -class OatWriter::X86RelativeCallPatcher FINAL : public RelativeCallPatcher { - public: - X86RelativeCallPatcher() { } - - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method ATTRIBUTE_UNUSED) OVERRIDE { - return offset; // No space reserved; no limit on relative call distance. - } - - uint32_t WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) OVERRIDE { - return offset; // No thunks added; no limit on relative call distance. - } - - void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset, - uint32_t target_offset) OVERRIDE { - DCHECK_LE(literal_offset + 4u, code->size()); - // Unsigned arithmetic with its well-defined overflow behavior is just fine here. - uint32_t displacement = target_offset - patch_offset; - displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. - - typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t; - reinterpret_cast<unaligned_int32_t*>(&(*code)[literal_offset])[0] = displacement; - } - - private: - // PC displacement from patch location; x86 PC for relative calls points to the next - // instruction and the patch location is 4 bytes earlier. - static constexpr int32_t kPcDisplacement = 4; - - DISALLOW_COPY_AND_ASSIGN(X86RelativeCallPatcher); -}; - -class OatWriter::ArmBaseRelativeCallPatcher : public RelativeCallPatcher { - public: - ArmBaseRelativeCallPatcher(OatWriter* writer, - InstructionSet instruction_set, std::vector<uint8_t> thunk_code, - uint32_t max_positive_displacement, uint32_t max_negative_displacement) - : writer_(writer), instruction_set_(instruction_set), thunk_code_(thunk_code), - max_positive_displacement_(max_positive_displacement), - max_negative_displacement_(max_negative_displacement), - thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() { - } - - uint32_t ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method) OVERRIDE { - // NOTE: The final thunk can be reserved from InitCodeMethodVisitor::EndClass() while it - // may be written early by WriteCodeMethodVisitor::VisitMethod() for a deduplicated chunk - // of code. To avoid any alignment discrepancies for the final chunk, we always align the - // offset after reserving of writing any chunk. - if (UNLIKELY(compiled_method == nullptr)) { - uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); - bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset); - if (needs_thunk) { - thunk_locations_.push_back(aligned_offset); - offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_); - } - return offset; - } - DCHECK(compiled_method->GetQuickCode() != nullptr); - uint32_t quick_code_size = compiled_method->GetQuickCode()->size(); - uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader); - uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size); - if (!unprocessed_patches_.empty() && - next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) { - bool needs_thunk = ReserveSpaceProcessPatches(next_aligned_offset); - if (needs_thunk) { - // A single thunk will cover all pending patches. - unprocessed_patches_.clear(); - uint32_t thunk_location = compiled_method->AlignCode(offset); - thunk_locations_.push_back(thunk_location); - offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_); - } - } - for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (patch.Type() == kLinkerPatchCallRelative) { - unprocessed_patches_.emplace_back(patch.TargetMethod(), - quick_code_offset + patch.LiteralOffset()); - } - } - return offset; - } - - uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE { - if (current_thunk_to_write_ == thunk_locations_.size()) { - return offset; - } - uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); - if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) { - ++current_thunk_to_write_; - uint32_t aligned_code_delta = aligned_offset - offset; - if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) { - return 0u; - } - if (!out->WriteFully(thunk_code_.data(), thunk_code_.size())) { - return 0u; - } - writer_->size_relative_call_thunks_ += thunk_code_.size(); - uint32_t thunk_end_offset = aligned_offset + thunk_code_.size(); - // Align after writing chunk, see the ReserveSpace() above. - offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_); - aligned_code_delta = offset - thunk_end_offset; - if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) { - return 0u; - } - } - return offset; - } - - protected: - uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset) { - // Unsigned arithmetic with its well-defined overflow behavior is just fine here. - uint32_t displacement = target_offset - patch_offset; - // NOTE: With unsigned arithmetic we do mean to use && rather than || below. - if (displacement > max_positive_displacement_ && displacement < -max_negative_displacement_) { - // Unwritten thunks have higher offsets, check if it's within range. - DCHECK(current_thunk_to_write_ == thunk_locations_.size() || - thunk_locations_[current_thunk_to_write_] > patch_offset); - if (current_thunk_to_write_ != thunk_locations_.size() && - thunk_locations_[current_thunk_to_write_] - patch_offset < max_positive_displacement_) { - displacement = thunk_locations_[current_thunk_to_write_] - patch_offset; - } else { - // We must have a previous thunk then. - DCHECK_NE(current_thunk_to_write_, 0u); - DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset); - displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset; - DCHECK(displacement >= -max_negative_displacement_); - } - } - return displacement; - } - - private: - bool ReserveSpaceProcessPatches(uint32_t next_aligned_offset) { - // Process as many patches as possible, stop only on unresolved targets or calls too far back. - while (!unprocessed_patches_.empty()) { - uint32_t patch_offset = unprocessed_patches_.front().second; - auto it = writer_->method_offset_map_.find(unprocessed_patches_.front().first); - if (it == writer_->method_offset_map_.end()) { - // If still unresolved, check if we have a thunk within range. - DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset); - if (thunk_locations_.empty() || - patch_offset - thunk_locations_.back() > max_negative_displacement_) { - return next_aligned_offset - patch_offset > max_positive_displacement_; - } - } else if (it->second >= patch_offset) { - DCHECK_LE(it->second - patch_offset, max_positive_displacement_); - } else { - // When calling back, check if we have a thunk that's closer than the actual target. - uint32_t target_offset = (thunk_locations_.empty() || it->second > thunk_locations_.back()) - ? it->second - : thunk_locations_.back(); - DCHECK_GT(patch_offset, target_offset); - if (patch_offset - target_offset > max_negative_displacement_) { - return true; - } - } - unprocessed_patches_.pop_front(); - } - return false; - } - - OatWriter* const writer_; - const InstructionSet instruction_set_; - const std::vector<uint8_t> thunk_code_; - const uint32_t max_positive_displacement_; - const uint32_t max_negative_displacement_; - std::vector<uint32_t> thunk_locations_; - size_t current_thunk_to_write_; - - // ReserveSpace() tracks unprocessed patches. - typedef std::pair<MethodReference, uint32_t> UnprocessedPatch; - std::deque<UnprocessedPatch> unprocessed_patches_; - - DISALLOW_COPY_AND_ASSIGN(ArmBaseRelativeCallPatcher); -}; - -class OatWriter::Thumb2RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher { - public: - explicit Thumb2RelativeCallPatcher(OatWriter* writer) - : ArmBaseRelativeCallPatcher(writer, kThumb2, CompileThunkCode(), - kMaxPositiveDisplacement, kMaxNegativeDisplacement) { - } - - void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset, - uint32_t target_offset) OVERRIDE { - DCHECK_LE(literal_offset + 4u, code->size()); - DCHECK_EQ(literal_offset & 1u, 0u); - DCHECK_EQ(patch_offset & 1u, 0u); - DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit. - uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u); - displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. - DCHECK_EQ(displacement & 1u, 0u); - DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u); // 25-bit signed. - uint32_t signbit = (displacement >> 31) & 0x1; - uint32_t i1 = (displacement >> 23) & 0x1; - uint32_t i2 = (displacement >> 22) & 0x1; - uint32_t imm10 = (displacement >> 12) & 0x03ff; - uint32_t imm11 = (displacement >> 1) & 0x07ff; - uint32_t j1 = i1 ^ (signbit ^ 1); - uint32_t j2 = i2 ^ (signbit ^ 1); - uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11; - value |= 0xf000d000; // BL - - uint8_t* addr = &(*code)[literal_offset]; - // Check that we're just overwriting an existing BL. - DCHECK_EQ(addr[1] & 0xf8, 0xf0); - DCHECK_EQ(addr[3] & 0xd0, 0xd0); - // Write the new BL. - addr[0] = (value >> 16) & 0xff; - addr[1] = (value >> 24) & 0xff; - addr[2] = (value >> 0) & 0xff; - addr[3] = (value >> 8) & 0xff; - } - - private: - static std::vector<uint8_t> CompileThunkCode() { - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. - arm::Thumb2Assembler assembler; - assembler.LoadFromOffset( - arm::kLoadWord, arm::PC, arm::R0, - mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); - assembler.bkpt(0); - std::vector<uint8_t> thunk_code(assembler.CodeSize()); - MemoryRegion code(thunk_code.data(), thunk_code.size()); - assembler.FinalizeInstructions(code); - return thunk_code; - } - - // PC displacement from patch location; Thumb2 PC is always at instruction address + 4. - static constexpr int32_t kPcDisplacement = 4; - - // Maximum positive and negative displacement measured from the patch location. - // (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from - // the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.) - static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement; - static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement; - - DISALLOW_COPY_AND_ASSIGN(Thumb2RelativeCallPatcher); -}; - -class OatWriter::Arm64RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher { - public: - explicit Arm64RelativeCallPatcher(OatWriter* writer) - : ArmBaseRelativeCallPatcher(writer, kArm64, CompileThunkCode(), - kMaxPositiveDisplacement, kMaxNegativeDisplacement) { - } - - void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset, - uint32_t target_offset) OVERRIDE { - DCHECK_LE(literal_offset + 4u, code->size()); - DCHECK_EQ(literal_offset & 3u, 0u); - DCHECK_EQ(patch_offset & 3u, 0u); - DCHECK_EQ(target_offset & 3u, 0u); - uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u); - DCHECK_EQ(displacement & 3u, 0u); - DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u); // 28-bit signed. - uint32_t value = (displacement & 0x0fffffffu) >> 2; - value |= 0x94000000; // BL - - uint8_t* addr = &(*code)[literal_offset]; - // Check that we're just overwriting an existing BL. - DCHECK_EQ(addr[3] & 0xfc, 0x94); - // Write the new BL. - addr[0] = (value >> 0) & 0xff; - addr[1] = (value >> 8) & 0xff; - addr[2] = (value >> 16) & 0xff; - addr[3] = (value >> 24) & 0xff; - } - - private: - static std::vector<uint8_t> CompileThunkCode() { - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. - arm64::Arm64Assembler assembler; - Offset offset(mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArm64PointerSize).Int32Value()); - assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); - // Ensure we emit the literal pool. - assembler.EmitSlowPaths(); - std::vector<uint8_t> thunk_code(assembler.CodeSize()); - MemoryRegion code(thunk_code.data(), thunk_code.size()); - assembler.FinalizeInstructions(code); - return thunk_code; - } - - // Maximum positive and negative displacement measured from the patch location. - // (Signed 28 bit displacement with the last bit 0 has range [-2^27, 2^27-4] measured from - // the ARM64 PC pointing to the BL.) - static constexpr uint32_t kMaxPositiveDisplacement = (1u << 27) - 4u; - static constexpr uint32_t kMaxNegativeDisplacement = (1u << 27); - - DISALLOW_COPY_AND_ASSIGN(Arm64RelativeCallPatcher); -}; - #define DCHECK_OFFSET() \ DCHECK_EQ(static_cast<off_t>(file_offset + relative_offset), out->Seek(0, kSeekCurrent)) \ << "file_offset=" << file_offset << " relative_offset=" << relative_offset @@ -427,6 +90,7 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, size_code_(0), size_code_alignment_(0), size_relative_call_thunks_(0), + size_misc_thunks_(0), size_mapping_table_(0), size_vmap_table_(0), size_gc_map_(0), @@ -442,23 +106,10 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, method_offset_map_() { CHECK(key_value_store != nullptr); - switch (compiler_driver_->GetInstructionSet()) { - case kX86: - case kX86_64: - relative_call_patcher_.reset(new X86RelativeCallPatcher); - break; - case kArm: - // Fall through: we generate Thumb2 code for "arm". - case kThumb2: - relative_call_patcher_.reset(new Thumb2RelativeCallPatcher(this)); - break; - case kArm64: - relative_call_patcher_.reset(new Arm64RelativeCallPatcher(this)); - break; - default: - relative_call_patcher_.reset(new NoRelativeCallPatcher); - break; - } + InstructionSet instruction_set = compiler_driver_->GetInstructionSet(); + const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures(); + relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features, + &method_offset_map_); size_t offset; { @@ -706,7 +357,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { bool EndClass() { OatDexMethodVisitor::EndClass(); if (oat_class_index_ == writer_->oat_classes_.size()) { - offset_ = writer_->relative_call_patcher_->ReserveSpace(offset_, nullptr); + offset_ = writer_->relative_patcher_->ReserveSpaceEnd(offset_); } return true; } @@ -722,36 +373,36 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { const SwapVector<uint8_t>* quick_code = compiled_method->GetQuickCode(); CHECK(quick_code != nullptr); - offset_ = writer_->relative_call_patcher_->ReserveSpace(offset_, compiled_method); - offset_ = compiled_method->AlignCode(offset_); - DCHECK_ALIGNED_PARAM(offset_, - GetInstructionSetAlignment(compiled_method->GetInstructionSet())); uint32_t code_size = quick_code->size() * sizeof(uint8_t); CHECK_NE(code_size, 0U); uint32_t thumb_offset = compiled_method->CodeDelta(); - quick_code_offset = offset_ + sizeof(OatQuickMethodHeader) + thumb_offset; - - bool deduped = false; // Deduplicate code arrays. + bool deduped = false; auto lb = dedupe_map_.lower_bound(compiled_method); if (lb != dedupe_map_.end() && !dedupe_map_.key_comp()(compiled_method, lb->first)) { quick_code_offset = lb->second; deduped = true; } else { + offset_ = writer_->relative_patcher_->ReserveSpace( + offset_, compiled_method, MethodReference(dex_file_, it.GetMemberIndex())); + offset_ = compiled_method->AlignCode(offset_); + DCHECK_ALIGNED_PARAM(offset_, + GetInstructionSetAlignment(compiled_method->GetInstructionSet())); + quick_code_offset = offset_ + sizeof(OatQuickMethodHeader) + thumb_offset; dedupe_map_.PutBefore(lb, compiled_method, quick_code_offset); } MethodReference method_ref(dex_file_, it.GetMemberIndex()); - auto method_lb = writer_->method_offset_map_.lower_bound(method_ref); - if (method_lb != writer_->method_offset_map_.end() && - !writer_->method_offset_map_.key_comp()(method_ref, method_lb->first)) { + auto method_lb = writer_->method_offset_map_.map.lower_bound(method_ref); + if (method_lb != writer_->method_offset_map_.map.end() && + !writer_->method_offset_map_.map.key_comp()(method_ref, method_lb->first)) { // TODO: Should this be a hard failure? LOG(WARNING) << "Multiple definitions of " << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file) << ((method_lb->second != quick_code_offset) ? "; OFFSET MISMATCH" : ""); } else { - writer_->method_offset_map_.PutBefore(method_lb, method_ref, quick_code_offset); + writer_->method_offset_map_.map.PutBefore(method_lb, method_ref, quick_code_offset); } // Update quick method header. @@ -790,7 +441,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { if (!compiled_method->GetPatches().empty()) { uintptr_t base_loc = offset_ - code_size - writer_->oat_header_->GetExecutableOffset(); for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (patch.Type() != kLinkerPatchCallRelative) { + if (!patch.IsPcRelative()) { writer_->absolute_patch_locations_.push_back(base_loc + patch.LiteralOffset()); } } @@ -808,10 +459,12 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { } const uint32_t quick_code_start = quick_code_offset - - writer_->oat_header_->GetExecutableOffset(); + writer_->oat_header_->GetExecutableOffset() - thumb_offset; const DexFile::CodeItem *code_item = it.GetMethodCodeItem(); - writer_->method_info_.push_back(DebugInfo(name, - dex_file_->GetSourceFile(dex_file_->GetClassDef(class_def_index_)), + const DexFile::ClassDef& class_def = dex_file_->GetClassDef(class_def_index_); + writer_->method_info_.push_back(DebugInfo(name, deduped, + dex_file_->GetClassDescriptor(class_def), + dex_file_->GetSourceFile(class_def), quick_code_start, quick_code_start + code_size, code_item == nullptr ? nullptr : dex_file_->GetDebugInfoStream(code_item), compiled_method)); @@ -851,6 +504,37 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { } private: + struct CodeOffsetsKeyComparator { + bool operator()(const CompiledMethod* lhs, const CompiledMethod* rhs) const { + if (lhs->GetQuickCode() != rhs->GetQuickCode()) { + return lhs->GetQuickCode() < rhs->GetQuickCode(); + } + // If the code is the same, all other fields are likely to be the same as well. + if (UNLIKELY(lhs->GetMappingTable() != rhs->GetMappingTable())) { + return lhs->GetMappingTable() < rhs->GetMappingTable(); + } + if (UNLIKELY(lhs->GetVmapTable() != rhs->GetVmapTable())) { + return lhs->GetVmapTable() < rhs->GetVmapTable(); + } + if (UNLIKELY(lhs->GetGcMap() != rhs->GetGcMap())) { + return lhs->GetGcMap() < rhs->GetGcMap(); + } + const auto& lhs_patches = lhs->GetPatches(); + const auto& rhs_patches = rhs->GetPatches(); + if (UNLIKELY(lhs_patches.size() != rhs_patches.size())) { + return lhs_patches.size() < rhs_patches.size(); + } + auto rit = rhs_patches.begin(); + for (const LinkerPatch& lpatch : lhs_patches) { + if (UNLIKELY(!(lpatch == *rit))) { + return lpatch < *rit; + } + ++rit; + } + return false; + } + }; + // Deduplication is already done on a pointer basis by the compiler driver, // so we can simply compare the pointers to find out if things are duplicated. SafeMap<const CompiledMethod*, uint32_t, CodeOffsetsKeyComparator> dedupe_map_; @@ -978,7 +662,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { bool result = OatDexMethodVisitor::EndClass(); if (oat_class_index_ == writer_->oat_classes_.size()) { DCHECK(result); // OatDexMethodVisitor::EndClass() never fails. - offset_ = writer_->relative_call_patcher_->WriteThunks(out_, offset_); + offset_ = writer_->relative_patcher_->WriteThunks(out_, offset_); if (UNLIKELY(offset_ == 0u)) { PLOG(ERROR) << "Failed to write final relative call thunks"; result = false; @@ -1000,33 +684,32 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { if (quick_code != nullptr) { // Need a wrapper if we create a copy for patching. ArrayRef<const uint8_t> wrapped(*quick_code); - - offset_ = writer_->relative_call_patcher_->WriteThunks(out, offset_); - if (offset_ == 0u) { - ReportWriteFailure("relative call thunk", it); - return false; - } - uint32_t aligned_offset = compiled_method->AlignCode(offset_); - uint32_t aligned_code_delta = aligned_offset - offset_; - if (aligned_code_delta != 0) { - if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) { - ReportWriteFailure("code alignment padding", it); - return false; - } - offset_ += aligned_code_delta; - DCHECK_OFFSET_(); - } - DCHECK_ALIGNED_PARAM(offset_, - GetInstructionSetAlignment(compiled_method->GetInstructionSet())); uint32_t code_size = quick_code->size() * sizeof(uint8_t); CHECK_NE(code_size, 0U); // Deduplicate code arrays. const OatMethodOffsets& method_offsets = oat_class->method_offsets_[method_offsets_index_]; - DCHECK(method_offsets.code_offset_ < offset_ || method_offsets.code_offset_ == - offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta()) - << PrettyMethod(it.GetMemberIndex(), *dex_file_); if (method_offsets.code_offset_ >= offset_) { + offset_ = writer_->relative_patcher_->WriteThunks(out, offset_); + if (offset_ == 0u) { + ReportWriteFailure("relative call thunk", it); + return false; + } + uint32_t aligned_offset = compiled_method->AlignCode(offset_); + uint32_t aligned_code_delta = aligned_offset - offset_; + if (aligned_code_delta != 0) { + if (!writer_->WriteCodeAlignment(out, aligned_code_delta)) { + ReportWriteFailure("code alignment padding", it); + return false; + } + offset_ += aligned_code_delta; + DCHECK_OFFSET_(); + } + DCHECK_ALIGNED_PARAM(offset_, + GetInstructionSetAlignment(compiled_method->GetInstructionSet())); + DCHECK_EQ(method_offsets.code_offset_, + offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta()) + << PrettyMethod(it.GetMemberIndex(), *dex_file_); const OatQuickMethodHeader& method_header = oat_class->method_headers_[method_offsets_index_]; writer_->oat_header_->UpdateChecksum(&method_header, sizeof(method_header)); @@ -1039,15 +722,21 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { DCHECK_OFFSET_(); if (!compiled_method->GetPatches().empty()) { - patched_code_ = std::vector<uint8_t>(quick_code->begin(), quick_code->end()); + patched_code_.assign(quick_code->begin(), quick_code->end()); wrapped = ArrayRef<const uint8_t>(patched_code_); for (const LinkerPatch& patch : compiled_method->GetPatches()) { if (patch.Type() == kLinkerPatchCallRelative) { // NOTE: Relative calls across oat files are not supported. uint32_t target_offset = GetTargetOffset(patch); uint32_t literal_offset = patch.LiteralOffset(); - writer_->relative_call_patcher_->Patch(&patched_code_, literal_offset, + writer_->relative_patcher_->PatchCall(&patched_code_, literal_offset, offset_ + literal_offset, target_offset); + } else if (patch.Type() == kLinkerPatchDexCacheArray) { + uint32_t target_offset = GetDexCacheOffset(patch); + uint32_t literal_offset = patch.LiteralOffset(); + writer_->relative_patcher_->PatchDexCacheReference(&patched_code_, patch, + offset_ + literal_offset, + target_offset); } else if (patch.Type() == kLinkerPatchCall) { uint32_t target_offset = GetTargetOffset(patch); PatchCodeAddress(&patched_code_, patch.LiteralOffset(), target_offset); @@ -1102,9 +791,9 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } uint32_t GetTargetOffset(const LinkerPatch& patch) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - auto target_it = writer_->method_offset_map_.find(patch.TargetMethod()); + auto target_it = writer_->method_offset_map_.map.find(patch.TargetMethod()); uint32_t target_offset = - (target_it != writer_->method_offset_map_.end()) ? target_it->second : 0u; + (target_it != writer_->method_offset_map_.map.end()) ? target_it->second : 0u; // If there's no compiled code, point to the correct trampoline. if (UNLIKELY(target_offset == 0)) { mirror::ArtMethod* target = GetTargetMethod(patch); @@ -1134,6 +823,18 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { return type; } + uint32_t GetDexCacheOffset(const LinkerPatch& patch) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (writer_->image_writer_ != nullptr) { + auto* element = writer_->image_writer_->GetDexCacheArrayElementImageAddress( + patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset()); + const uint8_t* oat_data = writer_->image_writer_->GetOatFileBegin() + file_offset_; + return reinterpret_cast<const uint8_t*>(element) - oat_data; + } else { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); + } + } + void PatchObjectAddress(std::vector<uint8_t>* code, uint32_t offset, mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { // NOTE: Direct method pointers across oat files don't use linker patches. However, direct @@ -1472,6 +1173,7 @@ bool OatWriter::Write(OutputStream* out) { DO_STAT(size_code_); DO_STAT(size_code_alignment_); DO_STAT(size_relative_call_thunks_); + DO_STAT(size_misc_thunks_); DO_STAT(size_mapping_table_); DO_STAT(size_vmap_table_); DO_STAT(size_gc_map_); @@ -1630,6 +1332,10 @@ size_t OatWriter::WriteCodeDexFiles(OutputStream* out, #undef VISIT + size_code_alignment_ += relative_patcher_->CodeAlignmentSize(); + size_relative_call_thunks_ += relative_patcher_->RelativeCallThunksSize(); + size_misc_thunks_ += relative_patcher_->MiscThunksSize(); + return relative_offset; } @@ -1645,6 +1351,15 @@ bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delt return true; } +std::pair<bool, uint32_t> OatWriter::MethodOffsetMap::FindMethodOffset(MethodReference ref) { + auto it = map.find(ref); + if (it == map.end()) { + return std::pair<bool, uint32_t>(false, 0u); + } else { + return std::pair<bool, uint32_t>(true, it->second); + } +} + OatWriter::OatDexFile::OatDexFile(size_t offset, const DexFile& dex_file) { offset_ = offset; const std::string& location(dex_file.GetLocation()); diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index fd2ccae4a5..c472000f37 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -21,7 +21,7 @@ #include <cstddef> #include <memory> -#include "driver/compiler_driver.h" +#include "linker/relative_patcher.h" // For linker::RelativePatcherTargetProvider. #include "mem_map.h" #include "method_reference.h" #include "oat.h" @@ -32,8 +32,10 @@ namespace art { class BitVector; class CompiledMethod; +class CompilerDriver; class ImageWriter; class OutputStream; +class TimingLogger; // OatHeader variable length with count of D OatDexFiles // @@ -113,14 +115,18 @@ class OatWriter { ~OatWriter(); struct DebugInfo { - DebugInfo(const std::string& method_name, const char* src_file_name, - uint32_t low_pc, uint32_t high_pc, const uint8_t* dbgstream, - CompiledMethod* compiled_method) - : method_name_(method_name), src_file_name_(src_file_name), - low_pc_(low_pc), high_pc_(high_pc), dbgstream_(dbgstream), - compiled_method_(compiled_method) { + DebugInfo(const std::string& method_name, bool deduped, + const char* class_descriptor, const char* src_file_name, + uint32_t low_pc, uint32_t high_pc, + const uint8_t* dbgstream, CompiledMethod* compiled_method) + : method_name_(method_name), deduped_(deduped), + class_descriptor_(class_descriptor), src_file_name_(src_file_name), + low_pc_(low_pc), high_pc_(high_pc), + dbgstream_(dbgstream), compiled_method_(compiled_method) { } std::string method_name_; // Note: this name is a pretty-printed name. + bool deduped_; + const char* class_descriptor_; const char* src_file_name_; uint32_t low_pc_; uint32_t high_pc_; @@ -132,6 +138,10 @@ class OatWriter { return method_info_; } + const CompilerDriver* GetCompilerDriver() { + return compiler_driver_; + } + private: // The DataAccess classes are helper classes that provide access to members related to // a given map, i.e. GC map, mapping table or vmap table. By abstracting these away @@ -312,6 +322,7 @@ class OatWriter { uint32_t size_code_; uint32_t size_code_alignment_; uint32_t size_relative_call_thunks_; + uint32_t size_misc_thunks_; uint32_t size_mapping_table_; uint32_t size_vmap_table_; uint32_t size_gc_map_; @@ -325,50 +336,19 @@ class OatWriter { uint32_t size_oat_class_method_bitmaps_; uint32_t size_oat_class_method_offsets_; - class RelativeCallPatcher; - class NoRelativeCallPatcher; - class X86RelativeCallPatcher; - class ArmBaseRelativeCallPatcher; - class Thumb2RelativeCallPatcher; - class Arm64RelativeCallPatcher; - - std::unique_ptr<RelativeCallPatcher> relative_call_patcher_; + std::unique_ptr<linker::RelativePatcher> relative_patcher_; // The locations of absolute patches relative to the start of the executable section. std::vector<uintptr_t> absolute_patch_locations_; - SafeMap<MethodReference, uint32_t, MethodReferenceComparator> method_offset_map_; - - struct CodeOffsetsKeyComparator { - bool operator()(const CompiledMethod* lhs, const CompiledMethod* rhs) const { - if (lhs->GetQuickCode() != rhs->GetQuickCode()) { - return lhs->GetQuickCode() < rhs->GetQuickCode(); - } - // If the code is the same, all other fields are likely to be the same as well. - if (UNLIKELY(lhs->GetMappingTable() != rhs->GetMappingTable())) { - return lhs->GetMappingTable() < rhs->GetMappingTable(); - } - if (UNLIKELY(lhs->GetVmapTable() != rhs->GetVmapTable())) { - return lhs->GetVmapTable() < rhs->GetVmapTable(); - } - if (UNLIKELY(lhs->GetGcMap() != rhs->GetGcMap())) { - return lhs->GetGcMap() < rhs->GetGcMap(); - } - const auto& lhs_patches = lhs->GetPatches(); - const auto& rhs_patches = rhs->GetPatches(); - if (UNLIKELY(lhs_patches.size() != rhs_patches.size())) { - return lhs_patches.size() < rhs_patches.size(); - } - auto rit = rhs_patches.begin(); - for (const LinkerPatch& lpatch : lhs_patches) { - if (UNLIKELY(!(lpatch == *rit))) { - return lpatch < *rit; - } - ++rit; - } - return false; - } + // Map method reference to assigned offset. + // Wrap the map in a class implementing linker::RelativePatcherTargetProvider. + class MethodOffsetMap FINAL : public linker::RelativePatcherTargetProvider { + public: + std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) OVERRIDE; + SafeMap<MethodReference, uint32_t, MethodReferenceComparator> map; }; + MethodOffsetMap method_offset_map_; DISALLOW_COPY_AND_ASSIGN(OatWriter); }; diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 1d167949f4..dce02f794d 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -443,9 +443,31 @@ class MonotonicValueRange : public ValueRange { class BCEVisitor : public HGraphVisitor { public: + // The least number of bounds checks that should be eliminated by triggering + // the deoptimization technique. + static constexpr size_t kThresholdForAddingDeoptimize = 2; + + // Very large constant index is considered as an anomaly. This is a threshold + // beyond which we don't bother to apply the deoptimization technique since + // it's likely some AIOOBE will be thrown. + static constexpr int32_t kMaxConstantForAddingDeoptimize = INT_MAX - 1024 * 1024; + explicit BCEVisitor(HGraph* graph) : HGraphVisitor(graph), - maps_(graph->GetBlocks().Size()) {} + maps_(graph->GetBlocks().Size()), + need_to_revisit_block_(false) {} + + void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + first_constant_index_bounds_check_map_.clear(); + HGraphVisitor::VisitBasicBlock(block); + if (need_to_revisit_block_) { + AddComparesWithDeoptimization(block); + need_to_revisit_block_ = false; + first_constant_index_bounds_check_map_.clear(); + GetValueRangeMap(block)->clear(); + HGraphVisitor::VisitBasicBlock(block); + } + } private: // Return the map of proven value ranges at the beginning of a basic block. @@ -701,9 +723,26 @@ class BCEVisitor : public HGraphVisitor { } } + if (first_constant_index_bounds_check_map_.find(array_length->GetId()) == + first_constant_index_bounds_check_map_.end()) { + // Remember the first bounds check against array_length of a constant index. + // That bounds check instruction has an associated HEnvironment where we + // may add an HDeoptimize to eliminate bounds checks of constant indices + // against array_length. + first_constant_index_bounds_check_map_.Put(array_length->GetId(), bounds_check); + } else { + // We've seen it at least twice. It's beneficial to introduce a compare with + // deoptimization fallback to eliminate the bounds checks. + need_to_revisit_block_ = true; + } + // Once we have an array access like 'array[5] = 1', we record array.length >= 6. // We currently don't do it for non-constant index since a valid array[i] can't prove // a valid array[i-1] yet due to the lower bound side. + if (constant == INT_MAX) { + // INT_MAX as an index will definitely throw AIOOBE. + return; + } ValueBound lower = ValueBound(nullptr, constant + 1); ValueBound upper = ValueBound::Max(); ValueRange* range = new (GetGraph()->GetArena()) @@ -938,8 +977,90 @@ class BCEVisitor : public HGraphVisitor { } } + void VisitDeoptimize(HDeoptimize* deoptimize) { + // Right now it's only HLessThanOrEqual. + DCHECK(deoptimize->InputAt(0)->IsLessThanOrEqual()); + HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual(); + HInstruction* instruction = less_than_or_equal->InputAt(0); + if (instruction->IsArrayLength()) { + HInstruction* constant = less_than_or_equal->InputAt(1); + DCHECK(constant->IsIntConstant()); + DCHECK(constant->AsIntConstant()->GetValue() <= kMaxConstantForAddingDeoptimize); + ValueBound lower = ValueBound(nullptr, constant->AsIntConstant()->GetValue() + 1); + ValueRange* range = new (GetGraph()->GetArena()) + ValueRange(GetGraph()->GetArena(), lower, ValueBound::Max()); + GetValueRangeMap(deoptimize->GetBlock())->Overwrite(instruction->GetId(), range); + } + } + + void AddCompareWithDeoptimization(HInstruction* array_length, + HIntConstant* const_instr, + HBasicBlock* block) { + DCHECK(array_length->IsArrayLength()); + ValueRange* range = LookupValueRange(array_length, block); + ValueBound lower_bound = range->GetLower(); + DCHECK(lower_bound.IsConstant()); + DCHECK(const_instr->GetValue() <= kMaxConstantForAddingDeoptimize); + DCHECK_EQ(lower_bound.GetConstant(), const_instr->GetValue() + 1); + + // If array_length is less than lower_const, deoptimize. + HBoundsCheck* bounds_check = first_constant_index_bounds_check_map_.Get( + array_length->GetId())->AsBoundsCheck(); + HCondition* cond = new (GetGraph()->GetArena()) HLessThanOrEqual(array_length, const_instr); + HDeoptimize* deoptimize = new (GetGraph()->GetArena()) + HDeoptimize(cond, bounds_check->GetDexPc()); + block->InsertInstructionBefore(cond, bounds_check); + block->InsertInstructionBefore(deoptimize, bounds_check); + deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment()); + } + + void AddComparesWithDeoptimization(HBasicBlock* block) { + for (ArenaSafeMap<int, HBoundsCheck*>::iterator it = + first_constant_index_bounds_check_map_.begin(); + it != first_constant_index_bounds_check_map_.end(); + ++it) { + HBoundsCheck* bounds_check = it->second; + HArrayLength* array_length = bounds_check->InputAt(1)->AsArrayLength(); + HIntConstant* lower_bound_const_instr = nullptr; + int32_t lower_bound_const = INT_MIN; + size_t counter = 0; + // Count the constant indexing for which bounds checks haven't + // been removed yet. + for (HUseIterator<HInstruction*> it2(array_length->GetUses()); + !it2.Done(); + it2.Advance()) { + HInstruction* user = it2.Current()->GetUser(); + if (user->GetBlock() == block && + user->IsBoundsCheck() && + user->AsBoundsCheck()->InputAt(0)->IsIntConstant()) { + DCHECK_EQ(array_length, user->AsBoundsCheck()->InputAt(1)); + HIntConstant* const_instr = user->AsBoundsCheck()->InputAt(0)->AsIntConstant(); + if (const_instr->GetValue() > lower_bound_const) { + lower_bound_const = const_instr->GetValue(); + lower_bound_const_instr = const_instr; + } + counter++; + } + } + if (counter >= kThresholdForAddingDeoptimize && + lower_bound_const_instr->GetValue() <= kMaxConstantForAddingDeoptimize) { + AddCompareWithDeoptimization(array_length, lower_bound_const_instr, block); + } + } + } + std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_; + // Map an HArrayLength instruction's id to the first HBoundsCheck instruction in + // a block that checks a constant index against that HArrayLength. + SafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_; + + // For the block, there is at least one HArrayLength instruction for which there + // is more than one bounds check instruction with constant indexing. And it's + // beneficial to add a compare instruction that has deoptimization fallback and + // eliminate those bounds checks. + bool need_to_revisit_block_; + DISALLOW_COPY_AND_ASSIGN(BCEVisitor); }; diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index b3653fe903..75cf1cf063 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -284,9 +284,9 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { ASSERT_FALSE(IsRemoved(bounds_check)); } -// array[5] = 1; // Can't eliminate. -// array[4] = 1; // Can eliminate. // array[6] = 1; // Can't eliminate. +// array[5] = 1; // Can eliminate. +// array[4] = 1; // Can eliminate. TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); @@ -311,35 +311,35 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0); HArrayLength* array_length = new (&allocator) HArrayLength(null_check); - HBoundsCheck* bounds_check5 = new (&allocator) - HBoundsCheck(constant_5, array_length, 0); + HBoundsCheck* bounds_check6 = new (&allocator) + HBoundsCheck(constant_6, array_length, 0); HInstruction* array_set = new (&allocator) HArraySet( - null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0); + null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0); block->AddInstruction(null_check); block->AddInstruction(array_length); - block->AddInstruction(bounds_check5); + block->AddInstruction(bounds_check6); block->AddInstruction(array_set); null_check = new (&allocator) HNullCheck(parameter, 0); array_length = new (&allocator) HArrayLength(null_check); - HBoundsCheck* bounds_check4 = new (&allocator) - HBoundsCheck(constant_4, array_length, 0); + HBoundsCheck* bounds_check5 = new (&allocator) + HBoundsCheck(constant_5, array_length, 0); array_set = new (&allocator) HArraySet( - null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0); + null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0); block->AddInstruction(null_check); block->AddInstruction(array_length); - block->AddInstruction(bounds_check4); + block->AddInstruction(bounds_check5); block->AddInstruction(array_set); null_check = new (&allocator) HNullCheck(parameter, 0); array_length = new (&allocator) HArrayLength(null_check); - HBoundsCheck* bounds_check6 = new (&allocator) - HBoundsCheck(constant_6, array_length, 0); + HBoundsCheck* bounds_check4 = new (&allocator) + HBoundsCheck(constant_4, array_length, 0); array_set = new (&allocator) HArraySet( - null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0); + null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0); block->AddInstruction(null_check); block->AddInstruction(array_length); - block->AddInstruction(bounds_check6); + block->AddInstruction(bounds_check4); block->AddInstruction(array_set); block->AddInstruction(new (&allocator) HGoto()); @@ -353,9 +353,9 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check5)); - ASSERT_TRUE(IsRemoved(bounds_check4)); ASSERT_FALSE(IsRemoved(bounds_check6)); + ASSERT_TRUE(IsRemoved(bounds_check5)); + ASSERT_TRUE(IsRemoved(bounds_check4)); } // for (int i=initial; i<array.length; i+=increment) { array[i] = 10; } diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 2cdd5af9f3..a912d4ccc4 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -23,6 +23,7 @@ #include "dex_instruction.h" #include "dex_instruction-inl.h" #include "driver/compiler_driver-inl.h" +#include "driver/compiler_options.h" #include "mirror/art_field.h" #include "mirror/art_field-inl.h" #include "mirror/class_loader.h" @@ -230,8 +231,7 @@ void HGraphBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) { } } -bool HGraphBuilder::SkipCompilation(size_t number_of_dex_instructions, - size_t number_of_blocks ATTRIBUTE_UNUSED, +bool HGraphBuilder::SkipCompilation(const DexFile::CodeItem& code_item, size_t number_of_branches) { const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions(); CompilerOptions::CompilerFilter compiler_filter = compiler_options.GetCompilerFilter(); @@ -239,19 +239,20 @@ bool HGraphBuilder::SkipCompilation(size_t number_of_dex_instructions, return false; } - if (compiler_options.IsHugeMethod(number_of_dex_instructions)) { + if (compiler_options.IsHugeMethod(code_item.insns_size_in_code_units_)) { VLOG(compiler) << "Skip compilation of huge method " << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) - << ": " << number_of_dex_instructions << " dex instructions"; + << ": " << code_item.insns_size_in_code_units_ << " code units"; MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod); return true; } // If it's large and contains no branches, it's likely to be machine generated initialization. - if (compiler_options.IsLargeMethod(number_of_dex_instructions) && (number_of_branches == 0)) { + if (compiler_options.IsLargeMethod(code_item.insns_size_in_code_units_) + && (number_of_branches == 0)) { VLOG(compiler) << "Skip compilation of large method with no branch " << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) - << ": " << number_of_dex_instructions << " dex instructions"; + << ": " << code_item.insns_size_in_code_units_ << " code units"; MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches); return true; } @@ -278,18 +279,14 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) { // Compute the number of dex instructions, blocks, and branches. We will // check these values against limits given to the compiler. - size_t number_of_dex_instructions = 0; - size_t number_of_blocks = 0; size_t number_of_branches = 0; // To avoid splitting blocks, we compute ahead of time the instructions that // start a new block, and create these blocks. - ComputeBranchTargets( - code_ptr, code_end, &number_of_dex_instructions, &number_of_blocks, &number_of_branches); + ComputeBranchTargets(code_ptr, code_end, &number_of_branches); // Note that the compiler driver is null when unit testing. - if ((compiler_driver_ != nullptr) - && SkipCompilation(number_of_dex_instructions, number_of_blocks, number_of_branches)) { + if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) { return false; } @@ -355,8 +352,6 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) { void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_t* code_end, - size_t* number_of_dex_instructions, - size_t* number_of_blocks, size_t* number_of_branches) { branch_targets_.SetSize(code_end - code_ptr); @@ -369,7 +364,6 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, // the locations these instructions branch to. uint32_t dex_pc = 0; while (code_ptr < code_end) { - (*number_of_dex_instructions)++; const Instruction& instruction = *Instruction::At(code_ptr); if (instruction.IsBranch()) { (*number_of_branches)++; @@ -378,14 +372,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, if (FindBlockStartingAt(target) == nullptr) { block = new (arena_) HBasicBlock(graph_, target); branch_targets_.Put(target, block); - (*number_of_blocks)++; } dex_pc += instruction.SizeInCodeUnits(); code_ptr += instruction.SizeInCodeUnits(); if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) { block = new (arena_) HBasicBlock(graph_, dex_pc); branch_targets_.Put(dex_pc, block); - (*number_of_blocks)++; } } else if (instruction.IsSwitch()) { SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH); @@ -403,14 +395,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, if (FindBlockStartingAt(target) == nullptr) { block = new (arena_) HBasicBlock(graph_, target); branch_targets_.Put(target, block); - (*number_of_blocks)++; } // The next case gets its own block. if (i < num_entries) { block = new (arena_) HBasicBlock(graph_, target); branch_targets_.Put(table.GetDexPcForIndex(i), block); - (*number_of_blocks)++; } } @@ -420,7 +410,6 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) { block = new (arena_) HBasicBlock(graph_, dex_pc); branch_targets_.Put(dex_pc, block); - (*number_of_blocks)++; } } else { code_ptr += instruction.SizeInCodeUnits(); diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 6a0738a7b9..dc6d97eb0c 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -90,8 +90,6 @@ class HGraphBuilder : public ValueObject { // branches. void ComputeBranchTargets(const uint16_t* start, const uint16_t* end, - size_t* number_of_dex_instructions, - size_t* number_of_block, size_t* number_of_branches); void MaybeUpdateCurrentBlock(size_t index); HBasicBlock* FindBlockStartingAt(int32_t index) const; @@ -217,9 +215,7 @@ class HGraphBuilder : public ValueObject { HInstruction* value, int32_t case_value_int, int32_t target_offset, uint32_t dex_pc); - bool SkipCompilation(size_t number_of_dex_instructions, - size_t number_of_blocks, - size_t number_of_branches); + bool SkipCompilation(const DexFile::CodeItem& code_item, size_t number_of_branches); void MaybeRecordStat(MethodCompilationStat compilation_stat); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index bd6e943bf0..9b1ef17274 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -378,10 +378,14 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph, case kMips: return nullptr; case kX86: { - return new x86::CodeGeneratorX86(graph, compiler_options); + return new x86::CodeGeneratorX86(graph, + *isa_features.AsX86InstructionSetFeatures(), + compiler_options); } case kX86_64: { - return new x86_64::CodeGeneratorX86_64(graph, compiler_options); + return new x86_64::CodeGeneratorX86_64(graph, + *isa_features.AsX86_64InstructionSetFeatures(), + compiler_options); } default: return nullptr; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 1f95041a92..f5e4df1390 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -287,6 +287,26 @@ class TypeCheckSlowPathARM : public SlowPathCodeARM { DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM); }; +class DeoptimizationSlowPathARM : public SlowPathCodeARM { + public: + explicit DeoptimizationSlowPathARM(HInstruction* instruction) + : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); + DCHECK(instruction_->IsDeoptimize()); + HDeoptimize* deoptimize = instruction_->AsDeoptimize(); + uint32_t dex_pc = deoptimize->GetDexPc(); + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + } + + private: + HInstruction* const instruction_; + DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM); +}; + #undef __ #undef __ @@ -887,24 +907,17 @@ void InstructionCodeGeneratorARM::VisitExit(HExit* exit) { UNUSED(exit); } -void LocationsBuilderARM::VisitIf(HIf* if_instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - locations->SetInAt(0, Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { - HInstruction* cond = if_instr->InputAt(0); +void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target) { + HInstruction* cond = instruction->InputAt(0); if (cond->IsIntConstant()) { // Constant condition, statically compared against 1. int32_t cond_value = cond->AsIntConstant()->GetValue(); if (cond_value == 1) { - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + if (always_true_target != nullptr) { + __ b(always_true_target); } return; } else { @@ -913,10 +926,10 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { } else { if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { // Condition has been materialized, compare the output to 0 - DCHECK(if_instr->GetLocations()->InAt(0).IsRegister()); - __ cmp(if_instr->GetLocations()->InAt(0).AsRegister<Register>(), + DCHECK(instruction->GetLocations()->InAt(0).IsRegister()); + __ cmp(instruction->GetLocations()->InAt(0).AsRegister<Register>(), ShifterOperand(0)); - __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), NE); + __ b(true_target, NE); } else { // Condition has not been materialized, use its inputs as the // comparison and its condition as the branch condition. @@ -938,16 +951,55 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { __ cmp(left, ShifterOperand(temp)); } } - __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), - ARMCondition(cond->AsCondition()->GetCondition())); + __ b(true_target, ARMCondition(cond->AsCondition()->GetCondition())); } } - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - __ b(codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); + if (false_target != nullptr) { + __ b(false_target); } } +void LocationsBuilderARM::VisitIf(HIf* if_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); + HInstruction* cond = if_instr->InputAt(0); + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { + Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); + Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); + Label* always_true_target = true_target; + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfTrueSuccessor())) { + always_true_target = nullptr; + } + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfFalseSuccessor())) { + false_target = nullptr; + } + GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); +} + +void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + HInstruction* cond = deoptimize->InputAt(0); + DCHECK(cond->IsCondition()); + if (cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) { + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) + DeoptimizationSlowPathARM(deoptimize); + codegen_->AddSlowPath(slow_path); + Label* slow_path_entry = slow_path->GetEntryLabel(); + GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); +} void LocationsBuilderARM::VisitCondition(HCondition* comp) { LocationSummary* locations = diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index bcdea7a639..06f425ea21 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -188,6 +188,10 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target); ArmAssembler* const assembler_; CodeGeneratorARM* const codegen_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 32ada3837e..439e85ca6c 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -352,6 +352,26 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64); }; +class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { + public: + explicit DeoptimizationSlowPathARM64(HInstruction* instruction) + : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); + DCHECK(instruction_->IsDeoptimize()); + HDeoptimize* deoptimize = instruction_->AsDeoptimize(); + uint32_t dex_pc = deoptimize->GetDexPc(); + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + } + + private: + HInstruction* const instruction_; + DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); +}; + #undef __ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { @@ -1611,25 +1631,18 @@ void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) { } } -void LocationsBuilderARM64::VisitIf(HIf* if_instr) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - locations->SetInAt(0, Location::RequiresRegister()); - } -} - -void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { - HInstruction* cond = if_instr->InputAt(0); +void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction, + vixl::Label* true_target, + vixl::Label* false_target, + vixl::Label* always_true_target) { + HInstruction* cond = instruction->InputAt(0); HCondition* condition = cond->AsCondition(); - vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); - vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); if (cond->IsIntConstant()) { int32_t cond_value = cond->AsIntConstant()->GetValue(); if (cond_value == 1) { - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) { - __ B(true_target); + if (always_true_target != nullptr) { + __ B(always_true_target); } return; } else { @@ -1637,9 +1650,9 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { } } else if (!cond->IsCondition() || condition->NeedsMaterialization()) { // The condition instruction has been materialized, compare the output to 0. - Location cond_val = if_instr->GetLocations()->InAt(0); + Location cond_val = instruction->GetLocations()->InAt(0); DCHECK(cond_val.IsRegister()); - __ Cbnz(InputRegisterAt(if_instr, 0), true_target); + __ Cbnz(InputRegisterAt(instruction, 0), true_target); } else { // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. @@ -1657,11 +1670,52 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { __ B(arm64_cond, true_target); } } - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) { + if (false_target != nullptr) { __ B(false_target); } } +void LocationsBuilderARM64::VisitIf(HIf* if_instr) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + HInstruction* cond = if_instr->InputAt(0); + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { + vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); + vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); + vixl::Label* always_true_target = true_target; + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfTrueSuccessor())) { + always_true_target = nullptr; + } + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfFalseSuccessor())) { + false_target = nullptr; + } + GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); +} + +void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + HInstruction* cond = deoptimize->InputAt(0); + DCHECK(cond->IsCondition()); + if (cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::RequiresRegister()); + } +} + +void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) + DeoptimizationSlowPathARM64(deoptimize); + codegen_->AddSlowPath(slow_path); + vixl::Label* slow_path_entry = slow_path->GetEntryLabel(); + GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); +} + void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 2c624d2926..7edb129880 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -23,8 +23,8 @@ #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/arm64/assembler_arm64.h" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +#include "vixl/a64/disasm-a64.h" +#include "vixl/a64/macro-assembler-a64.h" #include "arch/arm64/quick_method_frame_info_arm64.h" namespace art { @@ -165,6 +165,10 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { void HandleShift(HBinaryOperation* instr); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateTestAndBranch(HInstruction* instruction, + vixl::Label* true_target, + vixl::Label* false_target, + vixl::Label* always_true_target); Arm64Assembler* const assembler_; CodeGeneratorARM64* const codegen_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0d5fe49c1d..f79dbc3513 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -51,7 +51,7 @@ class NullCheckSlowPathX86 : public SlowPathCodeX86 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowNullPointer))); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); } private: @@ -66,7 +66,7 @@ class DivZeroCheckSlowPathX86 : public SlowPathCodeX86 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowDivZero))); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); } private: @@ -115,7 +115,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCodeX86 { length_location_, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowArrayBounds))); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); } private: @@ -136,7 +136,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 { __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pTestSuspend))); - codegen->RecordPcInfo(instruction_, instruction_->GetDexPc()); + RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); RestoreLiveRegisters(codegen, instruction_->GetLocations()); if (successor_ == nullptr) { __ jmp(GetReturnLabel()); @@ -294,6 +294,27 @@ class TypeCheckSlowPathX86 : public SlowPathCodeX86 { DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86); }; +class DeoptimizationSlowPathX86 : public SlowPathCodeX86 { + public: + explicit DeoptimizationSlowPathX86(HInstruction* instruction) + : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pDeoptimize))); + // No need to restore live registers. + DCHECK(instruction_->IsDeoptimize()); + HDeoptimize* deoptimize = instruction_->AsDeoptimize(); + uint32_t dex_pc = deoptimize->GetDexPc(); + codegen->RecordPcInfo(instruction_, dex_pc, this); + } + + private: + HInstruction* const instruction_; + DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86); +}; + #undef __ #define __ reinterpret_cast<X86Assembler*>(GetAssembler())-> @@ -339,7 +360,9 @@ size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32 return GetFloatingPointSpillSlotSize(); } -CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options) +CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, + const X86InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, @@ -352,7 +375,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compile block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this) { + move_resolver_(graph->GetArena(), this), + isa_features_(isa_features) { // Use a fake return address register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -784,24 +808,17 @@ void InstructionCodeGeneratorX86::VisitExit(HExit* exit) { UNUSED(exit); } -void LocationsBuilderX86::VisitIf(HIf* if_instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - locations->SetInAt(0, Location::Any()); - } -} - -void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { - HInstruction* cond = if_instr->InputAt(0); +void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target) { + HInstruction* cond = instruction->InputAt(0); if (cond->IsIntConstant()) { // Constant condition, statically compared against 1. int32_t cond_value = cond->AsIntConstant()->GetValue(); if (cond_value == 1) { - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + if (always_true_target != nullptr) { + __ jmp(always_true_target); } return; } else { @@ -814,20 +831,19 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { // evaluated just before the if, we don't need to evaluate it // again. bool eflags_set = cond->IsCondition() - && cond->AsCondition()->IsBeforeWhenDisregardMoves(if_instr); + && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction); if (materialized) { if (!eflags_set) { // Materialized condition, compare against 0. - Location lhs = if_instr->GetLocations()->InAt(0); + Location lhs = instruction->GetLocations()->InAt(0); if (lhs.IsRegister()) { __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>()); } else { __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); } - __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(kNotEqual, true_target); } else { - __ j(X86Condition(cond->AsCondition()->GetCondition()), - codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); } } else { Location lhs = cond->GetLocations()->InAt(0); @@ -846,16 +862,56 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { } else { __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex())); } - __ j(X86Condition(cond->AsCondition()->GetCondition()), - codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); } } - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); + if (false_target != nullptr) { + __ jmp(false_target); + } +} + +void LocationsBuilderX86::VisitIf(HIf* if_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); + HInstruction* cond = if_instr->InputAt(0); + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); } } +void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { + Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); + Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); + Label* always_true_target = true_target; + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfTrueSuccessor())) { + always_true_target = nullptr; + } + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfFalseSuccessor())) { + false_target = nullptr; + } + GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); +} + +void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + HInstruction* cond = deoptimize->InputAt(0); + DCHECK(cond->IsCondition()); + if (cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); + } +} + +void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { + SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) + DeoptimizationSlowPathX86(deoptimize); + codegen_->AddSlowPath(slow_path); + Label* slow_path_entry = slow_path->GetEntryLabel(); + GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); +} + void LocationsBuilderX86::VisitLocal(HLocal* local) { local->SetLocations(nullptr); } @@ -1110,7 +1166,7 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { } void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - IntrinsicLocationsBuilderX86 intrinsic(GetGraph()->GetArena()); + IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { return; } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 6a4d42dd01..0cc3c6533a 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -176,6 +176,10 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + void GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; @@ -185,7 +189,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { class CodeGeneratorX86 : public CodeGenerator { public: - CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options); + CodeGeneratorX86(HGraph* graph, + const X86InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options); virtual ~CodeGeneratorX86() {} void GenerateFrameEntry() OVERRIDE; @@ -271,6 +277,10 @@ class CodeGeneratorX86 : public CodeGenerator { Label* GetFrameEntryLabel() { return &frame_entry_label_; } + const X86InstructionSetFeatures& GetInstructionSetFeatures() const { + return isa_features_; + } + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; @@ -279,6 +289,7 @@ class CodeGeneratorX86 : public CodeGenerator { InstructionCodeGeneratorX86 instruction_visitor_; ParallelMoveResolverX86 move_resolver_; X86Assembler assembler_; + const X86InstructionSetFeatures& isa_features_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86); }; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ef60280016..9958451f31 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -314,6 +314,27 @@ class TypeCheckSlowPathX86_64 : public SlowPathCodeX86_64 { DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64); }; +class DeoptimizationSlowPathX86_64 : public SlowPathCodeX86_64 { + public: + explicit DeoptimizationSlowPathX86_64(HInstruction* instruction) + : instruction_(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, instruction_->GetLocations()); + __ gs()->call( + Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pDeoptimize), true)); + DCHECK(instruction_->IsDeoptimize()); + HDeoptimize* deoptimize = instruction_->AsDeoptimize(); + uint32_t dex_pc = deoptimize->GetDexPc(); + codegen->RecordPcInfo(instruction_, dex_pc, this); + } + + private: + HInstruction* const instruction_; + DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64); +}; + #undef __ #define __ reinterpret_cast<X86_64Assembler*>(GetAssembler())-> @@ -390,7 +411,9 @@ size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uin static constexpr int kNumberOfCpuRegisterPairs = 0; // Use a fake return address register to mimic Quick. static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); -CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options) +CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, + const X86_64InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, @@ -404,7 +427,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& c block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this) { + move_resolver_(graph->GetArena(), this), + isa_features_(isa_features) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -606,7 +630,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { source.AsFpuRegister<XmmRegister>()); } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); - int64_t value = constant->AsLongConstant()->GetValue(); + int64_t value; if (constant->IsDoubleConstant()) { value = bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); } else { @@ -734,24 +758,17 @@ void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit) { UNUSED(exit); } -void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); - HInstruction* cond = if_instr->InputAt(0); - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - locations->SetInAt(0, Location::Any()); - } -} - -void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { - HInstruction* cond = if_instr->InputAt(0); +void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target) { + HInstruction* cond = instruction->InputAt(0); if (cond->IsIntConstant()) { // Constant condition, statically compared against 1. int32_t cond_value = cond->AsIntConstant()->GetValue(); if (cond_value == 1) { - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfTrueSuccessor())) { - __ jmp(codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + if (always_true_target != nullptr) { + __ jmp(always_true_target); } return; } else { @@ -764,21 +781,20 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { // evaluated just before the if, we don't need to evaluate it // again. bool eflags_set = cond->IsCondition() - && cond->AsCondition()->IsBeforeWhenDisregardMoves(if_instr); + && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction); if (materialized) { if (!eflags_set) { // Materialized condition, compare against 0. - Location lhs = if_instr->GetLocations()->InAt(0); + Location lhs = instruction->GetLocations()->InAt(0); if (lhs.IsRegister()) { __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); } else { __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); } - __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(kNotEqual, true_target); } else { - __ j(X86_64Condition(cond->AsCondition()->GetCondition()), - codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target); } } else { Location lhs = cond->GetLocations()->InAt(0); @@ -796,16 +812,56 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); } - __ j(X86_64Condition(cond->AsCondition()->GetCondition()), - codegen_->GetLabelOf(if_instr->IfTrueSuccessor())); + __ j(X86_64Condition(cond->AsCondition()->GetCondition()), true_target); } } - if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), - if_instr->IfFalseSuccessor())) { - __ jmp(codegen_->GetLabelOf(if_instr->IfFalseSuccessor())); + if (false_target != nullptr) { + __ jmp(false_target); + } +} + +void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall); + HInstruction* cond = if_instr->InputAt(0); + if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); + } +} + +void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { + Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor()); + Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor()); + Label* always_true_target = true_target; + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfTrueSuccessor())) { + always_true_target = nullptr; + } + if (codegen_->GoesToNextBlock(if_instr->GetBlock(), + if_instr->IfFalseSuccessor())) { + false_target = nullptr; + } + GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target); +} + +void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); + HInstruction* cond = deoptimize->InputAt(0); + DCHECK(cond->IsCondition()); + if (cond->AsCondition()->NeedsMaterialization()) { + locations->SetInAt(0, Location::Any()); } } +void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { + SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) + DeoptimizationSlowPathX86_64(deoptimize); + codegen_->AddSlowPath(slow_path); + Label* slow_path_entry = slow_path->GetEntryLabel(); + GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry); +} + void LocationsBuilderX86_64::VisitLocal(HLocal* local) { local->SetLocations(nullptr); } @@ -1180,7 +1236,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena()); + IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { return; } @@ -1241,7 +1297,7 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { } void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena()); + IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { return; } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index a380b6a04c..375c0b03b9 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -182,6 +182,10 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void GenerateExplicitNullCheck(HNullCheck* instruction); void PushOntoFPStack(Location source, uint32_t temp_offset, uint32_t stack_adjustment, bool is_float); + void GenerateTestAndBranch(HInstruction* instruction, + Label* true_target, + Label* false_target, + Label* always_true_target); X86_64Assembler* const assembler_; CodeGeneratorX86_64* const codegen_; @@ -191,7 +195,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { class CodeGeneratorX86_64 : public CodeGenerator { public: - CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options); + CodeGeneratorX86_64(HGraph* graph, + const X86_64InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options); virtual ~CodeGeneratorX86_64() {} void GenerateFrameEntry() OVERRIDE; @@ -264,6 +270,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp); + const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const { + return isa_features_; + } + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; @@ -272,6 +282,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { InstructionCodeGeneratorX86_64 instruction_visitor_; ParallelMoveResolverX86_64 move_resolver_; X86_64Assembler assembler_; + const X86_64InstructionSetFeatures& isa_features_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64); }; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 6053ad51f4..2be117bf38 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -19,6 +19,8 @@ #include "arch/instruction_set.h" #include "arch/arm/instruction_set_features_arm.h" #include "arch/arm64/instruction_set_features_arm64.h" +#include "arch/x86/instruction_set_features_x86.h" +#include "arch/x86_64/instruction_set_features_x86_64.h" #include "base/macros.h" #include "builder.h" #include "code_generator_arm.h" @@ -108,7 +110,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { InternalCodeAllocator allocator; CompilerOptions compiler_options; - x86::CodeGeneratorX86 codegenX86(graph, compiler_options); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); // We avoid doing a stack overflow check that requires the runtime being setup, // by making sure the compiler knows the methods we are running are leaf methods. codegenX86.CompileBaseline(&allocator, true); @@ -124,7 +128,9 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { Run(allocator, codegenARM, has_result, expected); } - x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options); + std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( + X86_64InstructionSetFeatures::FromCppDefines()); + x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options); codegenX86_64.CompileBaseline(&allocator, true); if (kRuntimeISA == kX86_64) { Run(allocator, codegenX86_64, has_result, expected); @@ -175,10 +181,14 @@ static void RunCodeOptimized(HGraph* graph, compiler_options); RunCodeOptimized(&codegenARM64, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kX86) { - x86::CodeGeneratorX86 codegenX86(graph, compiler_options); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), compiler_options); RunCodeOptimized(&codegenX86, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kX86_64) { - x86_64::CodeGeneratorX86_64 codegenX86_64(graph, compiler_options); + std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( + X86_64InstructionSetFeatures::FromCppDefines()); + x86_64::CodeGeneratorX86_64 codegenX86_64(graph, *features_x86_64.get(), compiler_options); RunCodeOptimized(&codegenX86_64, graph, hook_before_codegen, has_result, expected); } } diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index fd8c0c6242..966165bf4c 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -20,8 +20,8 @@ #include "locations.h" #include "nodes.h" #include "utils/arm64/assembler_arm64.h" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +#include "vixl/a64/disasm-a64.h" +#include "vixl/a64/macro-assembler-a64.h" namespace art { namespace arm64 { diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index 6853d54c48..02ad675dc3 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -16,6 +16,7 @@ #include <functional> +#include "arch/x86/instruction_set_features_x86.h" #include "code_generator_x86.h" #include "constant_folding.h" #include "dead_code_elimination.h" @@ -46,7 +47,9 @@ static void TestCode(const uint16_t* data, std::string actual_before = printer_before.str(); ASSERT_EQ(expected_before, actual_before); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions()); HConstantFolding(graph).Run(); SSAChecker ssa_checker_cf(&allocator, graph); ssa_checker_cf.Run(); diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index a644719622..98ae1ec5d3 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "arch/x86/instruction_set_features_x86.h" #include "code_generator_x86.h" #include "dead_code_elimination.h" #include "driver/compiler_options.h" @@ -40,7 +41,9 @@ static void TestCode(const uint16_t* data, std::string actual_before = printer_before.str(); ASSERT_EQ(actual_before, expected_before); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions()); HDeadCodeElimination(graph).Run(); SSAChecker ssa_checker(&allocator, graph); ssa_checker.Run(); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 72d303c870..d1176c460f 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -28,8 +28,8 @@ #include "utils/arm64/assembler_arm64.h" #include "utils/arm64/constants_arm64.h" -#include "a64/disasm-a64.h" -#include "a64/macro-assembler-a64.h" +#include "vixl/a64/disasm-a64.h" +#include "vixl/a64/macro-assembler-a64.h" using namespace vixl; // NOLINT(build/namespaces) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 384737f55a..0740471e8d 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -16,6 +16,7 @@ #include "intrinsics_x86.h" +#include "arch/x86/instruction_set_features_x86.h" #include "code_generator_x86.h" #include "entrypoints/quick/quick_entrypoints.h" #include "intrinsics.h" @@ -34,6 +35,11 @@ static constexpr int kDoubleNaNHigh = 0x7FF80000; static constexpr int kDoubleNaNLow = 0x00000000; static constexpr int kFloatNaN = 0x7FC00000; +IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen) + : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) { +} + + X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() { return reinterpret_cast<X86Assembler*>(codegen_->GetAssembler()); } @@ -719,6 +725,148 @@ void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) { GetAssembler()->sqrtsd(out, in); } +static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) { + MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + + DCHECK(invoke->IsInvokeStaticOrDirect()); + codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX); + + // Copy the result back to the expected output. + Location out = invoke->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); + MoveFromReturnRegister(out, invoke->GetType(), codegen); + } +} + +static void CreateSSE41FPToFPLocations(ArenaAllocator* arena, + HInvoke* invoke, + CodeGeneratorX86* codegen) { + // Do we have instruction support? + if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { + CreateFPToFPLocations(arena, invoke); + return; + } + + // We have to fall back to a call to the intrinsic. + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::FpuRegisterLocation(XMM0)); + // Needs to be EAX for the invoke. + locations->AddTemp(Location::RegisterLocation(EAX)); +} + +static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen, + HInvoke* invoke, + X86Assembler* assembler, + int round_mode) { + LocationSummary* locations = invoke->GetLocations(); + if (locations->WillCall()) { + InvokeOutOfLineIntrinsic(codegen, invoke); + } else { + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + __ roundsd(out, in, Immediate(round_mode)); + } +} + +void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2); +} + +void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1); +} + +void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); +} + +// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble, +// as it needs 64 bit instructions. +void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { + // Do we have instruction support? + if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + return; + } + + // We have to fall back to a call to the intrinsic. + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::RegisterLocation(EAX)); + // Needs to be EAX for the invoke. + locations->AddTemp(Location::RegisterLocation(EAX)); +} + +void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + if (locations->WillCall()) { + InvokeOutOfLineIntrinsic(codegen_, invoke); + return; + } + + // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int. + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + Register out = locations->Out().AsRegister<Register>(); + XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + Label done, nan; + X86Assembler* assembler = GetAssembler(); + + // Generate 0.5 into inPlusPointFive. + __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f))); + __ movd(inPlusPointFive, out); + + // Add in the input. + __ addss(inPlusPointFive, in); + + // And truncate to an integer. + __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); + + __ movl(out, Immediate(kPrimIntMax)); + // maxInt = int-to-float(out) + __ cvtsi2ss(maxInt, out); + + // if inPlusPointFive >= maxInt goto done + __ comiss(inPlusPointFive, maxInt); + __ j(kAboveEqual, &done); + + // if input == NaN goto nan + __ j(kUnordered, &nan); + + // output = float-to-int-truncate(input) + __ cvttss2si(out, inPlusPointFive); + __ jmp(&done); + __ Bind(&nan); + + // output = 0 + __ xorl(out, out); + __ Bind(&done); +} + void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) { // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, @@ -1191,11 +1339,7 @@ void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) UNIMPLEMENTED_INTRINSIC(IntegerReverse) UNIMPLEMENTED_INTRINSIC(LongReverse) UNIMPLEMENTED_INTRINSIC(LongReverseBytes) -UNIMPLEMENTED_INTRINSIC(MathFloor) -UNIMPLEMENTED_INTRINSIC(MathCeil) -UNIMPLEMENTED_INTRINSIC(MathRint) UNIMPLEMENTED_INTRINSIC(MathRoundDouble) -UNIMPLEMENTED_INTRINSIC(MathRoundFloat) UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h index e1e8260a5f..4292ec7b99 100644 --- a/compiler/optimizing/intrinsics_x86.h +++ b/compiler/optimizing/intrinsics_x86.h @@ -32,7 +32,7 @@ class X86Assembler; class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor { public: - explicit IntrinsicLocationsBuilderX86(ArenaAllocator* arena) : arena_(arena) {} + explicit IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen); // Define visitor methods. @@ -50,6 +50,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) private: ArenaAllocator* arena_; + CodeGeneratorX86* codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86); }; diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 736cea88cb..f6fa013cc6 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -16,6 +16,7 @@ #include "intrinsics_x86_64.h" +#include "arch/x86_64/instruction_set_features_x86_64.h" #include "code_generator_x86_64.h" #include "entrypoints/quick/quick_entrypoints.h" #include "intrinsics.h" @@ -30,6 +31,11 @@ namespace art { namespace x86_64 { +IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen) + : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) { +} + + X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() { return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler()); } @@ -614,6 +620,203 @@ void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { GetAssembler()->sqrtsd(out, in); } +static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) { + MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + + DCHECK(invoke->IsInvokeStaticOrDirect()); + codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI)); + codegen->RecordPcInfo(invoke, invoke->GetDexPc()); + + // Copy the result back to the expected output. + Location out = invoke->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); + MoveFromReturnRegister(out, invoke->GetType(), codegen); + } +} + +static void CreateSSE41FPToFPLocations(ArenaAllocator* arena, + HInvoke* invoke, + CodeGeneratorX86_64* codegen) { + // Do we have instruction support? + if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { + CreateFPToFPLocations(arena, invoke); + return; + } + + // We have to fall back to a call to the intrinsic. + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::FpuRegisterLocation(XMM0)); + // Needs to be RDI for the invoke. + locations->AddTemp(Location::RegisterLocation(RDI)); +} + +static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen, + HInvoke* invoke, + X86_64Assembler* assembler, + int round_mode) { + LocationSummary* locations = invoke->GetLocations(); + if (locations->WillCall()) { + InvokeOutOfLineIntrinsic(codegen, invoke); + } else { + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + __ roundsd(out, in, Immediate(round_mode)); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) { + CreateSSE41FPToFPLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) { + GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); +} + +static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + CodeGeneratorX86_64* codegen) { + // Do we have instruction support? + if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + return; + } + + // We have to fall back to a call to the intrinsic. + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kCall); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); + locations->SetOut(Location::RegisterLocation(RAX)); + // Needs to be RDI for the invoke. + locations->AddTemp(Location::RegisterLocation(RDI)); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) { + CreateSSE41FPToIntLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + if (locations->WillCall()) { + InvokeOutOfLineIntrinsic(codegen_, invoke); + return; + } + + // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int. + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + Label done, nan; + X86_64Assembler* assembler = GetAssembler(); + + // Generate 0.5 into inPlusPointFive. + __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f))); + __ movd(inPlusPointFive, out, false); + + // Add in the input. + __ addss(inPlusPointFive, in); + + // And truncate to an integer. + __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1)); + + __ movl(out, Immediate(kPrimIntMax)); + // maxInt = int-to-float(out) + __ cvtsi2ss(maxInt, out); + + // if inPlusPointFive >= maxInt goto done + __ comiss(inPlusPointFive, maxInt); + __ j(kAboveEqual, &done); + + // if input == NaN goto nan + __ j(kUnordered, &nan); + + // output = float-to-int-truncate(input) + __ cvttss2si(out, inPlusPointFive); + __ jmp(&done); + __ Bind(&nan); + + // output = 0 + __ xorl(out, out); + __ Bind(&done); +} + +void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) { + CreateSSE41FPToIntLocations(arena_, invoke, codegen_); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + if (locations->WillCall()) { + InvokeOutOfLineIntrinsic(codegen_, invoke); + return; + } + + // Implement RoundDouble as t1 = floor(input + 0.5); convert to long. + XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + XmmRegister maxLong = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + Label done, nan; + X86_64Assembler* assembler = GetAssembler(); + + // Generate 0.5 into inPlusPointFive. + __ movq(out, Immediate(bit_cast<int64_t, double>(0.5))); + __ movd(inPlusPointFive, out, true); + + // Add in the input. + __ addsd(inPlusPointFive, in); + + // And truncate to an integer. + __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1)); + + __ movq(out, Immediate(kPrimLongMax)); + // maxLong = long-to-double(out) + __ cvtsi2sd(maxLong, out, true); + + // if inPlusPointFive >= maxLong goto done + __ comisd(inPlusPointFive, maxLong); + __ j(kAboveEqual, &done); + + // if input == NaN goto nan + __ j(kUnordered, &nan); + + // output = double-to-long-truncate(input) + __ cvttsd2si(out, inPlusPointFive, true); + __ jmp(&done); + __ Bind(&nan); + + // output = 0 + __ xorq(out, out); + __ Bind(&done); +} + void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) { // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, @@ -1009,11 +1212,6 @@ void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE UNIMPLEMENTED_INTRINSIC(IntegerReverse) UNIMPLEMENTED_INTRINSIC(LongReverse) -UNIMPLEMENTED_INTRINSIC(MathFloor) -UNIMPLEMENTED_INTRINSIC(MathCeil) -UNIMPLEMENTED_INTRINSIC(MathRint) -UNIMPLEMENTED_INTRINSIC(MathRoundDouble) -UNIMPLEMENTED_INTRINSIC(MathRoundFloat) UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h index dfae7fa90e..0e0e72c1fc 100644 --- a/compiler/optimizing/intrinsics_x86_64.h +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -32,7 +32,7 @@ class X86_64Assembler; class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor { public: - explicit IntrinsicLocationsBuilderX86_64(ArenaAllocator* arena) : arena_(arena) {} + explicit IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen); // Define visitor methods. @@ -50,6 +50,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) private: ArenaAllocator* arena_; + CodeGeneratorX86_64* codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64); }; diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index f22b7a7e82..28c5555d57 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -16,6 +16,7 @@ #include <fstream> +#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "base/stringprintf.h" #include "builder.h" @@ -46,7 +47,9 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num graph->TryBuildingSsa(); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index c102c4f02f..61d6593f2b 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" @@ -65,7 +66,9 @@ TEST(LiveRangesTest, CFG1) { ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -111,7 +114,9 @@ TEST(LiveRangesTest, CFG2) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -160,7 +165,9 @@ TEST(LiveRangesTest, CFG3) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -237,7 +244,9 @@ TEST(LiveRangesTest, Loop1) { ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); RemoveSuspendChecks(graph); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -315,7 +324,9 @@ TEST(LiveRangesTest, Loop2) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -391,7 +402,9 @@ TEST(LiveRangesTest, CFG4) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 0b0cfde0cf..81250ca133 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" @@ -53,7 +54,9 @@ static void TestCode(const uint16_t* data, const char* expected) { graph->TryBuildingSsa(); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index dca612e6b7..d8a8554610 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -752,8 +752,8 @@ HInstruction* HBinaryOperation::GetLeastConstantLeft() const { } } -bool HCondition::IsBeforeWhenDisregardMoves(HIf* if_) const { - return this == if_->GetPreviousDisregardingMoves(); +bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const { + return this == instruction->GetPreviousDisregardingMoves(); } bool HInstruction::Equals(HInstruction* other) const { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 21ed3504f1..f764eb421f 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -682,6 +682,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(ClinitCheck, Instruction) \ M(Compare, BinaryOperation) \ M(Condition, BinaryOperation) \ + M(Deoptimize, Instruction) \ M(Div, BinaryOperation) \ M(DivZeroCheck, Instruction) \ M(DoubleConstant, Constant) \ @@ -1191,7 +1192,17 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { bool HasEnvironment() const { return environment_ != nullptr; } HEnvironment* GetEnvironment() const { return environment_; } - void SetEnvironment(HEnvironment* environment) { environment_ = environment; } + // Set the `environment_` field. Raw because this method does not + // update the uses lists. + void SetRawEnvironment(HEnvironment* environment) { environment_ = environment; } + + // Set the environment of this instruction, copying it from `environment`. While + // copying, the uses lists are being updated. + void CopyEnvironmentFrom(HEnvironment* environment) { + ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena(); + environment_ = new (allocator) HEnvironment(allocator, environment->Size()); + environment_->CopyFrom(environment); + } // Returns the number of entries in the environment. Typically, that is the // number of dex registers in a method. It could be more in case of inlining. @@ -1544,12 +1555,31 @@ class HIf : public HTemplateInstruction<1> { DECLARE_INSTRUCTION(If); - virtual bool IsIfInstruction() const { return true; } - private: DISALLOW_COPY_AND_ASSIGN(HIf); }; +// Deoptimize to interpreter, upon checking a condition. +class HDeoptimize : public HTemplateInstruction<1> { + public: + HDeoptimize(HInstruction* cond, uint32_t dex_pc) + : HTemplateInstruction(SideEffects::None()), + dex_pc_(dex_pc) { + SetRawInputAt(0, cond); + } + + bool NeedsEnvironment() const OVERRIDE { return true; } + bool CanThrow() const OVERRIDE { return true; } + uint32_t GetDexPc() const { return dex_pc_; } + + DECLARE_INSTRUCTION(Deoptimize); + + private: + uint32_t dex_pc_; + + DISALLOW_COPY_AND_ASSIGN(HDeoptimize); +}; + class HUnaryOperation : public HExpression<1> { public: HUnaryOperation(Primitive::Type result_type, HInstruction* input) @@ -1667,8 +1697,8 @@ class HCondition : public HBinaryOperation { void ClearNeedsMaterialization() { needs_materialization_ = false; } // For code generation purposes, returns whether this instruction is just before - // `if_`, and disregard moves in between. - bool IsBeforeWhenDisregardMoves(HIf* if_) const; + // `instruction`, and disregard moves in between. + bool IsBeforeWhenDisregardMoves(HInstruction* instruction) const; DECLARE_INSTRUCTION(Condition); @@ -2307,6 +2337,9 @@ class HNewArray : public HExpression<1> { // Calls runtime so needs an environment. bool NeedsEnvironment() const OVERRIDE { return true; } + // May throw NegativeArraySizeException, OutOfMemoryError, etc. + bool CanThrow() const OVERRIDE { return true; } + bool CanBeNull() const OVERRIDE { return false; } QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; } diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc index 4cf22d3b2e..4e83ce576c 100644 --- a/compiler/optimizing/nodes_test.cc +++ b/compiler/optimizing/nodes_test.cc @@ -50,7 +50,7 @@ TEST(Node, RemoveInstruction) { exit_block->AddInstruction(new (&allocator) HExit()); HEnvironment* environment = new (&allocator) HEnvironment(&allocator, 1); - null_check->SetEnvironment(environment); + null_check->SetRawEnvironment(environment); environment->SetRawEnvAt(0, parameter); parameter->AddEnvUseAt(null_check->GetEnvironment(), 0); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index b2f9c65153..e474c49121 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -26,11 +26,13 @@ #include "bounds_check_elimination.h" #include "builder.h" #include "code_generator.h" +#include "compiled_method.h" #include "compiler.h" #include "constant_folding.h" #include "dead_code_elimination.h" #include "dex/quick/dex_file_to_method_inliner_map.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "elf_writer_quick.h" #include "graph_visualizer.h" diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 2d9a2bf330..f5d8d82571 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -60,11 +60,11 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { bool needs_materialization = false; - if (!condition->GetUses().HasOnlyOneUse()) { + if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) { needs_materialization = true; } else { HInstruction* user = condition->GetUses().GetFirst()->GetUser(); - if (!user->IsIf()) { + if (!user->IsIf() && !user->IsDeoptimize()) { needs_materialization = true; } else { // TODO: if there is no intervening instructions with side-effect between this condition diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index cf38bd3f8c..4bca43499f 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -1408,26 +1408,36 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { // Walk over all uses covered by this interval, and update the location // information. - while (use != nullptr && use->GetPosition() <= current->GetEnd()) { - LocationSummary* locations = use->GetUser()->GetLocations(); - if (use->GetIsEnvironment()) { - locations->SetEnvironmentAt(use->GetInputIndex(), source); - } else { - Location expected_location = locations->InAt(use->GetInputIndex()); - // The expected (actual) location may be invalid in case the input is unused. Currently - // this only happens for intrinsics. - if (expected_location.IsValid()) { - if (expected_location.IsUnallocated()) { - locations->SetInAt(use->GetInputIndex(), source); - } else if (!expected_location.IsConstant()) { - AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); - } + + LiveRange* range = current->GetFirstRange(); + while (range != nullptr) { + while (use != nullptr && use->GetPosition() < range->GetStart()) { + DCHECK(use->GetIsEnvironment()); + use = use->GetNext(); + } + while (use != nullptr && use->GetPosition() <= range->GetEnd()) { + DCHECK(current->Covers(use->GetPosition()) || (use->GetPosition() == range->GetEnd())); + LocationSummary* locations = use->GetUser()->GetLocations(); + if (use->GetIsEnvironment()) { + locations->SetEnvironmentAt(use->GetInputIndex(), source); } else { - DCHECK(use->GetUser()->IsInvoke()); - DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); + Location expected_location = locations->InAt(use->GetInputIndex()); + // The expected (actual) location may be invalid in case the input is unused. Currently + // this only happens for intrinsics. + if (expected_location.IsValid()) { + if (expected_location.IsUnallocated()) { + locations->SetInAt(use->GetInputIndex(), source); + } else if (!expected_location.IsConstant()) { + AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); + } + } else { + DCHECK(use->GetUser()->IsInvoke()); + DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); + } } + use = use->GetNext(); } - use = use->GetNext(); + range = range->GetNext(); } // If the next interval starts just after this one, and has a register, @@ -1503,7 +1513,15 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { } current = next_sibling; } while (current != nullptr); - DCHECK(use == nullptr); + + if (kIsDebugBuild) { + // Following uses can only be environment uses. The location for + // these environments will be none. + while (use != nullptr) { + DCHECK(use->GetIsEnvironment()); + use = use->GetNext(); + } + } } void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 7c3a0357d6..3951439881 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" @@ -42,7 +43,9 @@ static bool Check(const uint16_t* data) { const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); graph->TryBuildingSsa(); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -58,7 +61,9 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = new (&allocator) HGraph(&allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); GrowableArray<LiveInterval*> intervals(&allocator, 0); // Test with two intervals of the same range. @@ -298,7 +303,9 @@ TEST(RegisterAllocatorTest, Loop3) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -330,7 +337,9 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -383,7 +392,9 @@ TEST(RegisterAllocatorTest, DeadPhi) { ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); SsaDeadPhiElimination(graph).Run(); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -405,7 +416,9 @@ TEST(RegisterAllocatorTest, FreeUntil) { ArenaAllocator allocator(&pool); HGraph* graph = BuildSSAGraph(data, &allocator); SsaDeadPhiElimination(graph).Run(); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); RegisterAllocator register_allocator(&allocator, &codegen, liveness); @@ -507,7 +520,9 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -522,7 +537,9 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -539,7 +556,9 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -556,7 +575,9 @@ TEST(RegisterAllocatorTest, PhiHint) { { HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -608,7 +629,9 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { { HGraph* graph = BuildFieldReturn(&allocator, &field, &ret); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -621,7 +644,9 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { { HGraph* graph = BuildFieldReturn(&allocator, &field, &ret); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -671,7 +696,9 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { { HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -685,7 +712,9 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { { HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -734,7 +763,9 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { { HGraph* graph = BuildDiv(&allocator, &div); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); liveness.Analyze(); @@ -822,7 +853,9 @@ TEST(RegisterAllocatorTest, SpillInactive) { locations = new (&allocator) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); - x86::CodeGeneratorX86 codegen(graph, CompilerOptions()); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); SsaLivenessAnalysis liveness(*graph, &codegen); RegisterAllocator register_allocator(&allocator, &codegen, liveness); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index fcc4e69b37..e154ea4ee6 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -487,7 +487,7 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) { HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment( GetGraph()->GetArena(), current_locals_->Size()); environment->CopyFrom(current_locals_); - instruction->SetEnvironment(environment); + instruction->SetRawEnvironment(environment); } void SsaBuilder::VisitTemporary(HTemporary* temp) { diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 0f3973e5fb..95da6ef551 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -218,28 +218,34 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { current->GetLiveInterval()->SetFrom(current->GetLifetimePosition()); } - // All inputs of an instruction must be live. - for (size_t i = 0, e = current->InputCount(); i < e; ++i) { - HInstruction* input = current->InputAt(i); - // Some instructions 'inline' their inputs, that is they do not need - // to be materialized. - if (input->HasSsaIndex()) { - live_in->SetBit(input->GetSsaIndex()); - input->GetLiveInterval()->AddUse(current, i, false); - } - } - + // Process the environment first, because we know their uses come after + // or at the same liveness position of inputs. if (current->HasEnvironment()) { // Handle environment uses. See statements (b) and (c) of the // SsaLivenessAnalysis. HEnvironment* environment = current->GetEnvironment(); for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* instruction = environment->GetInstructionAt(i); - if (ShouldBeLiveForEnvironment(instruction)) { + bool should_be_live = ShouldBeLiveForEnvironment(instruction); + if (should_be_live) { DCHECK(instruction->HasSsaIndex()); live_in->SetBit(instruction->GetSsaIndex()); - instruction->GetLiveInterval()->AddUse(current, i, true); } + if (instruction != nullptr) { + instruction->GetLiveInterval()->AddUse( + current, i, /* is_environment */ true, should_be_live); + } + } + } + + // All inputs of an instruction must be live. + for (size_t i = 0, e = current->InputCount(); i < e; ++i) { + HInstruction* input = current->InputAt(i); + // Some instructions 'inline' their inputs, that is they do not need + // to be materialized. + if (input->HasSsaIndex()) { + live_in->SetBit(input->GetSsaIndex()); + input->GetLiveInterval()->AddUse(current, i, /* is_environment */ false); } } } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index bc78dc2e76..d2da84c0c0 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -189,7 +189,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { AddRange(position, position + 1); } - void AddUse(HInstruction* instruction, size_t input_index, bool is_environment) { + void AddUse(HInstruction* instruction, + size_t input_index, + bool is_environment, + bool keep_alive = false) { // Set the use within the instruction. size_t position = instruction->GetLifetimePosition() + 1; LocationSummary* locations = instruction->GetLocations(); @@ -211,6 +214,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { && (first_use_->GetPosition() < position)) { // The user uses the instruction multiple times, and one use dies before the other. // We update the use list so that the latter is first. + DCHECK(!is_environment); UsePosition* cursor = first_use_; while ((cursor->GetNext() != nullptr) && (cursor->GetNext()->GetPosition() < position)) { cursor = cursor->GetNext(); @@ -225,6 +229,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return; } + first_use_ = new (allocator_) UsePosition( + instruction, input_index, is_environment, position, first_use_); + + if (is_environment && !keep_alive) { + // If this environment use does not keep the instruction live, it does not + // affect the live range of that instruction. + return; + } + size_t start_block_position = instruction->GetBlock()->GetLifetimeStart(); if (first_range_ == nullptr) { // First time we see a use of that interval. @@ -246,8 +259,6 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // and the check line 205 would succeed. first_range_ = new (allocator_) LiveRange(start_block_position, position, first_range_); } - first_use_ = new (allocator_) UsePosition( - instruction, input_index, is_environment, position, first_use_); } void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) { @@ -425,9 +436,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { UsePosition* use = first_use_; size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { - size_t use_position = use->GetPosition(); - if (use_position > position) { - return use_position; + if (!use->GetIsEnvironment()) { + size_t use_position = use->GetPosition(); + if (use_position > position) { + return use_position; + } } use = use->GetNext(); } diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index a69be2599e..2031fe4e57 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -31,8 +31,8 @@ // TODO: make vixl clean wrt -Wshadow. #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshadow" -#include "a64/macro-assembler-a64.h" -#include "a64/disasm-a64.h" +#include "vixl/a64/macro-assembler-a64.h" +#include "vixl/a64/disasm-a64.h" #pragma GCC diagnostic pop namespace art { diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index 6f8b3012a4..b13edb68bf 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -123,6 +123,16 @@ class AssemblerTest : public testing::Test { fmt); } + std::string RepeatFFI(void (Ass::*f)(FPReg, FPReg, const Imm&), size_t imm_bytes, std::string fmt) { + return RepeatTemplatedRegistersImm<FPReg, FPReg>(f, + GetFPRegisters(), + GetFPRegisters(), + &AssemblerTest::GetFPRegName, + &AssemblerTest::GetFPRegName, + imm_bytes, + fmt); + } + std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) { return RepeatTemplatedRegisters<FPReg, Reg>(f, GetFPRegisters(), @@ -448,6 +458,57 @@ class AssemblerTest : public testing::Test { return str; } + template <typename Reg1, typename Reg2> + std::string RepeatTemplatedRegistersImm(void (Ass::*f)(Reg1, Reg2, const Imm&), + const std::vector<Reg1*> reg1_registers, + const std::vector<Reg2*> reg2_registers, + std::string (AssemblerTest::*GetName1)(const Reg1&), + std::string (AssemblerTest::*GetName2)(const Reg2&), + size_t imm_bytes, + std::string fmt) { + std::vector<int64_t> imms = CreateImmediateValues(imm_bytes); + WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size()); + + std::string str; + for (auto reg1 : reg1_registers) { + for (auto reg2 : reg2_registers) { + for (int64_t imm : imms) { + Imm new_imm = CreateImmediate(imm); + (assembler_.get()->*f)(*reg1, *reg2, new_imm); + std::string base = fmt; + + std::string reg1_string = (this->*GetName1)(*reg1); + size_t reg1_index; + while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) { + base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string); + } + + std::string reg2_string = (this->*GetName2)(*reg2); + size_t reg2_index; + while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) { + base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string); + } + + size_t imm_index = base.find(IMM_TOKEN); + if (imm_index != std::string::npos) { + std::ostringstream sreg; + sreg << imm; + std::string imm_string = sreg.str(); + base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); + } + + if (str.size() > 0) { + str += "\n"; + } + str += base; + } + } + } + // Add a newline at the end. + str += "\n"; + return str; + } + template <RegisterView kRegView> std::string GetRegName(const Reg& reg) { std::ostringstream sreg; diff --git a/compiler/utils/dex_cache_arrays_layout-inl.h b/compiler/utils/dex_cache_arrays_layout-inl.h new file mode 100644 index 0000000000..7d02ce35d8 --- /dev/null +++ b/compiler/utils/dex_cache_arrays_layout-inl.h @@ -0,0 +1,77 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_INL_H_ +#define ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_INL_H_ + +#include "dex_cache_arrays_layout.h" + +#include "base/logging.h" +#include "globals.h" +#include "mirror/array-inl.h" +#include "primitive.h" +#include "utils.h" + +namespace mirror { +class ArtField; +class ArtMethod; +class Class; +class String; +} // namespace mirror + +namespace art { + +inline DexCacheArraysLayout::DexCacheArraysLayout(const DexFile* dex_file) + : /* types_offset_ is always 0u */ + methods_offset_(types_offset_ + ArraySize<mirror::Class>(dex_file->NumTypeIds())), + strings_offset_(methods_offset_ + ArraySize<mirror::ArtMethod>(dex_file->NumMethodIds())), + fields_offset_(strings_offset_ + ArraySize<mirror::String>(dex_file->NumStringIds())), + size_(fields_offset_ + ArraySize<mirror::ArtField>(dex_file->NumFieldIds())) { +} + +inline size_t DexCacheArraysLayout::TypeOffset(uint32_t type_idx) const { + return types_offset_ + ElementOffset<mirror::Class>(type_idx); +} + +inline size_t DexCacheArraysLayout::MethodOffset(uint32_t method_idx) const { + return methods_offset_ + ElementOffset<mirror::ArtMethod>(method_idx); +} + +inline size_t DexCacheArraysLayout::StringOffset(uint32_t string_idx) const { + return strings_offset_ + ElementOffset<mirror::String>(string_idx); +} + +inline size_t DexCacheArraysLayout::FieldOffset(uint32_t field_idx) const { + return fields_offset_ + ElementOffset<mirror::ArtField>(field_idx); +} + +template <typename MirrorType> +inline size_t DexCacheArraysLayout::ElementOffset(uint32_t idx) { + return mirror::Array::DataOffset(sizeof(mirror::HeapReference<MirrorType>)).Uint32Value() + + sizeof(mirror::HeapReference<MirrorType>) * idx; +} + +template <typename MirrorType> +inline size_t DexCacheArraysLayout::ArraySize(uint32_t num_elements) { + size_t array_size = mirror::ComputeArraySize( + num_elements, ComponentSizeShiftWidth<sizeof(mirror::HeapReference<MirrorType>)>()); + DCHECK_NE(array_size, 0u); // No overflow expected for dex cache arrays. + return RoundUp(array_size, kObjectAlignment); +} + +} // namespace art + +#endif // ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_INL_H_ diff --git a/compiler/utils/dex_cache_arrays_layout.h b/compiler/utils/dex_cache_arrays_layout.h new file mode 100644 index 0000000000..b461256f63 --- /dev/null +++ b/compiler/utils/dex_cache_arrays_layout.h @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_H_ +#define ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_H_ + +namespace art { + +/** + * @class DexCacheArraysLayout + * @details This class provides the layout information for the type, method, field and + * string arrays for a DexCache with a fixed arrays' layout (such as in the boot image), + */ +class DexCacheArraysLayout { + public: + // Construct an invalid layout. + DexCacheArraysLayout() + : /* types_offset_ is always 0u */ + methods_offset_(0u), + strings_offset_(0u), + fields_offset_(0u), + size_(0u) { + } + + // Construct a layout for a particular dex file. + explicit DexCacheArraysLayout(const DexFile* dex_file); + + bool Valid() const { + return Size() != 0u; + } + + size_t Size() const { + return size_; + } + + size_t TypesOffset() const { + return types_offset_; + } + + size_t TypeOffset(uint32_t type_idx) const; + + size_t MethodsOffset() const { + return methods_offset_; + } + + size_t MethodOffset(uint32_t method_idx) const; + + size_t StringsOffset() const { + return strings_offset_; + } + + size_t StringOffset(uint32_t string_idx) const; + + size_t FieldsOffset() const { + return fields_offset_; + } + + size_t FieldOffset(uint32_t field_idx) const; + + private: + static constexpr size_t types_offset_ = 0u; + const size_t methods_offset_; + const size_t strings_offset_; + const size_t fields_offset_; + const size_t size_; + + template <typename MirrorType> + static size_t ElementOffset(uint32_t idx); + + template <typename MirrorType> + static size_t ArraySize(uint32_t num_elements); +}; + +} // namespace art + +#endif // ART_COMPILER_UTILS_DEX_CACHE_ARRAYS_LAYOUT_H_ diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 5773459ff5..b3a1376727 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -695,6 +695,28 @@ void X86Assembler::ucomisd(XmmRegister a, XmmRegister b) { } +void X86Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x3A); + EmitUint8(0x0B); + EmitXmmRegisterOperand(dst, src); + EmitUint8(imm.value()); +} + + +void X86Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x3A); + EmitUint8(0x0A); + EmitXmmRegisterOperand(dst, src); + EmitUint8(imm.value()); +} + + void X86Assembler::sqrtsd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF2); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 6ccf2e365d..bdf88435a4 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -312,6 +312,9 @@ class X86Assembler FINAL : public Assembler { void ucomiss(XmmRegister a, XmmRegister b); void ucomisd(XmmRegister a, XmmRegister b); + void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm); + void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm); + void sqrtsd(XmmRegister dst, XmmRegister src); void sqrtss(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index bd155ed788..e82d90c5c8 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -796,6 +796,30 @@ void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) { } +void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x3A); + EmitUint8(0x0B); + EmitXmmRegisterOperand(dst.LowBits(), src); + EmitUint8(imm.value()); +} + + +void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x3A); + EmitUint8(0x0A); + EmitXmmRegisterOperand(dst.LowBits(), src); + EmitUint8(imm.value()); +} + + void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF2); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 495f74f498..39f781cb1c 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -353,6 +353,9 @@ class X86_64Assembler FINAL : public Assembler { void ucomiss(XmmRegister a, XmmRegister b); void ucomisd(XmmRegister a, XmmRegister b); + void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm); + void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm); + void sqrtsd(XmmRegister dst, XmmRegister src); void sqrtss(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 00f508b23f..4402dfcb37 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -692,6 +692,14 @@ TEST_F(AssemblerX86_64Test, Sqrtsd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::sqrtsd, "sqrtsd %{reg2}, %{reg1}"), "sqrtsd"); } +TEST_F(AssemblerX86_64Test, Roundss) { + DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundss, 1, "roundss ${imm}, %{reg2}, %{reg1}"), "roundss"); +} + +TEST_F(AssemblerX86_64Test, Roundsd) { + DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundsd, 1, "roundsd ${imm}, %{reg2}, %{reg1}"), "roundsd"); +} + TEST_F(AssemblerX86_64Test, Xorps) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::xorps, "xorps %{reg2}, %{reg1}"), "xorps"); } |