diff options
Diffstat (limited to 'compiler')
81 files changed, 4111 insertions, 3252 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index 66ff46163b..fc2f02b59e 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -74,7 +74,6 @@ LIBART_COMPILER_SRC_FILES := \ llvm/md_builder.cc \ llvm/runtime_support_builder.cc \ llvm/runtime_support_builder_arm.cc \ - llvm/runtime_support_builder_thumb2.cc \ llvm/runtime_support_builder_x86.cc \ trampolines/trampoline_compiler.cc \ utils/arm/assembler_arm.cc \ diff --git a/compiler/dex/arena_allocator.cc b/compiler/dex/arena_allocator.cc index 36393e7387..95e44b3e0d 100644 --- a/compiler/dex/arena_allocator.cc +++ b/compiler/dex/arena_allocator.cc @@ -19,12 +19,15 @@ #include "arena_allocator.h" #include "base/logging.h" #include "base/mutex.h" +#include "thread-inl.h" +#include <memcheck/memcheck.h> namespace art { // Memmap is a bit slower than malloc according to my measurements. static constexpr bool kUseMemMap = false; static constexpr bool kUseMemSet = true && kUseMemMap; +static constexpr size_t kValgrindRedZoneBytes = 8; static const char* alloc_names[ArenaAllocator::kNumAllocKinds] = { "Misc ", @@ -47,7 +50,9 @@ Arena::Arena(size_t size) map_(nullptr), next_(nullptr) { if (kUseMemMap) { - map_ = MemMap::MapAnonymous("dalvik-arena", NULL, size, PROT_READ | PROT_WRITE); + std::string error_msg; + map_ = MemMap::MapAnonymous("dalvik-arena", NULL, size, PROT_READ | PROT_WRITE, &error_msg); + CHECK(map_ != nullptr) << error_msg; memory_ = map_->Begin(); size_ = map_->Size(); } else { @@ -107,6 +112,9 @@ Arena* ArenaPool::AllocArena(size_t size) { void ArenaPool::FreeArena(Arena* arena) { Thread* self = Thread::Current(); + if (UNLIKELY(RUNNING_ON_VALGRIND)) { + VALGRIND_MAKE_MEM_UNDEFINED(arena->memory_, arena->bytes_allocated_); + } { MutexLock lock(self, lock_); arena->next_ = free_arenas_; @@ -128,7 +136,8 @@ ArenaAllocator::ArenaAllocator(ArenaPool* pool) end_(nullptr), ptr_(nullptr), arena_head_(nullptr), - num_allocations_(0) { + num_allocations_(0), + running_on_valgrind_(RUNNING_ON_VALGRIND) { memset(&alloc_stats_[0], 0, sizeof(alloc_stats_)); } @@ -140,6 +149,29 @@ void ArenaAllocator::UpdateBytesAllocated() { } } +void* ArenaAllocator::AllocValgrind(size_t bytes, ArenaAllocKind kind) { + size_t rounded_bytes = (bytes + 3 + kValgrindRedZoneBytes) & ~3; + if (UNLIKELY(ptr_ + rounded_bytes > end_)) { + // Obtain a new block. + ObtainNewArenaForAllocation(rounded_bytes); + if (UNLIKELY(ptr_ == nullptr)) { + return nullptr; + } + } + if (kCountAllocations) { + alloc_stats_[kind] += rounded_bytes; + ++num_allocations_; + } + uint8_t* ret = ptr_; + ptr_ += rounded_bytes; + // Check that the memory is already zeroed out. + for (uint8_t* ptr = ret; ptr < ptr_; ++ptr) { + CHECK_EQ(*ptr, 0U); + } + VALGRIND_MAKE_MEM_NOACCESS(ret + bytes, rounded_bytes - bytes); + return ret; +} + ArenaAllocator::~ArenaAllocator() { // Reclaim all the arenas by giving them back to the thread pool. UpdateBytesAllocated(); diff --git a/compiler/dex/arena_allocator.h b/compiler/dex/arena_allocator.h index dda52a2ed0..d11d67c795 100644 --- a/compiler/dex/arena_allocator.h +++ b/compiler/dex/arena_allocator.h @@ -103,6 +103,9 @@ class ArenaAllocator { // Returns zeroed memory. void* Alloc(size_t bytes, ArenaAllocKind kind) ALWAYS_INLINE { + if (UNLIKELY(running_on_valgrind_)) { + return AllocValgrind(bytes, kind); + } bytes = (bytes + 3) & ~3; if (UNLIKELY(ptr_ + bytes > end_)) { // Obtain a new block. @@ -120,6 +123,7 @@ class ArenaAllocator { return ret; } + void* AllocValgrind(size_t bytes, ArenaAllocKind kind); void ObtainNewArenaForAllocation(size_t allocation_size); size_t BytesAllocated() const; void DumpMemStats(std::ostream& os) const; @@ -132,10 +136,9 @@ class ArenaAllocator { uint8_t* end_; uint8_t* ptr_; Arena* arena_head_; - - // Statistics. size_t num_allocations_; - size_t alloc_stats_[kNumAllocKinds]; // Bytes used by various allocation kinds. + size_t alloc_stats_[kNumAllocKinds]; // Bytes used by various allocation kinds. + bool running_on_valgrind_; DISALLOW_COPY_AND_ASSIGN(ArenaAllocator); }; // ArenaAllocator diff --git a/compiler/dex/arena_allocator_test.cc b/compiler/dex/arena_allocator_test.cc new file mode 100644 index 0000000000..63dc6159eb --- /dev/null +++ b/compiler/dex/arena_allocator_test.cc @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "arena_allocator.h" +#include "arena_bit_vector.h" +#include "gtest/gtest.h" + +namespace art { + +TEST(ArenaAllocator, Test) { + ArenaPool pool; + ArenaAllocator arena(&pool); + ArenaBitVector bv(&arena, 10, true); + bv.SetBit(5); + EXPECT_EQ(1U, bv.GetStorageSize()); + bv.SetBit(35); + EXPECT_EQ(2U, bv.GetStorageSize()); +} + +} // namespace art diff --git a/compiler/dex/arena_bit_vector.cc b/compiler/dex/arena_bit_vector.cc index 3fa9295276..b567ae8d8a 100644 --- a/compiler/dex/arena_bit_vector.cc +++ b/compiler/dex/arena_bit_vector.cc @@ -19,125 +19,29 @@ namespace art { -// TODO: profile to make sure this is still a win relative to just using shifted masks. -static uint32_t check_masks[32] = { - 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, - 0x00000020, 0x00000040, 0x00000080, 0x00000100, 0x00000200, - 0x00000400, 0x00000800, 0x00001000, 0x00002000, 0x00004000, - 0x00008000, 0x00010000, 0x00020000, 0x00040000, 0x00080000, - 0x00100000, 0x00200000, 0x00400000, 0x00800000, 0x01000000, - 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, - 0x40000000, 0x80000000 }; +class ArenaBitVectorAllocator : public Allocator { + public: + explicit ArenaBitVectorAllocator(ArenaAllocator* arena) : arena_(arena) {} + ~ArenaBitVectorAllocator() {} -ArenaBitVector::ArenaBitVector(ArenaAllocator* arena, unsigned int start_bits, - bool expandable, OatBitMapKind kind) - : arena_(arena), - expandable_(expandable), - kind_(kind), - storage_size_((start_bits + 31) >> 5), - storage_(static_cast<uint32_t*>(arena_->Alloc(storage_size_ * sizeof(uint32_t), - ArenaAllocator::kAllocGrowableBitMap))) { - DCHECK_EQ(sizeof(storage_[0]), 4U); // Assuming 32-bit units. -} - -/* - * Determine whether or not the specified bit is set. - */ -bool ArenaBitVector::IsBitSet(unsigned int num) { - DCHECK_LT(num, storage_size_ * sizeof(uint32_t) * 8); - - unsigned int val = storage_[num >> 5] & check_masks[num & 0x1f]; - return (val != 0); -} - -// Mark all bits bit as "clear". -void ArenaBitVector::ClearAllBits() { - memset(storage_, 0, storage_size_ * sizeof(uint32_t)); -} - -// Mark the specified bit as "set". -/* - * TUNING: this could have pathologically bad growth/expand behavior. Make sure we're - * not using it badly or change resize mechanism. - */ -void ArenaBitVector::SetBit(unsigned int num) { - if (num >= storage_size_ * sizeof(uint32_t) * 8) { - DCHECK(expandable_) << "Attempted to expand a non-expandable bitmap to position " << num; - - /* Round up to word boundaries for "num+1" bits */ - unsigned int new_size = (num + 1 + 31) >> 5; - DCHECK_GT(new_size, storage_size_); - uint32_t *new_storage = - static_cast<uint32_t*>(arena_->Alloc(new_size * sizeof(uint32_t), - ArenaAllocator::kAllocGrowableBitMap)); - memcpy(new_storage, storage_, storage_size_ * sizeof(uint32_t)); - // Zero out the new storage words. - memset(&new_storage[storage_size_], 0, (new_size - storage_size_) * sizeof(uint32_t)); - // TOTO: collect stats on space wasted because of resize. - storage_ = new_storage; - storage_size_ = new_size; + virtual void* Alloc(size_t size) { + return arena_->Alloc(size, ArenaAllocator::kAllocGrowableBitMap); } - storage_[num >> 5] |= check_masks[num & 0x1f]; -} - -// Mark the specified bit as "unset". -void ArenaBitVector::ClearBit(unsigned int num) { - DCHECK_LT(num, storage_size_ * sizeof(uint32_t) * 8); - storage_[num >> 5] &= ~check_masks[num & 0x1f]; -} - -// Copy a whole vector to the other. Sizes must match. -void ArenaBitVector::Copy(ArenaBitVector* src) { - DCHECK_EQ(storage_size_, src->GetStorageSize()); - memcpy(storage_, src->GetRawStorage(), sizeof(uint32_t) * storage_size_); -} + virtual void Free(void*) {} // Nop. -// Intersect with another bit vector. Sizes and expandability must be the same. -void ArenaBitVector::Intersect(const ArenaBitVector* src) { - DCHECK_EQ(storage_size_, src->GetStorageSize()); - DCHECK_EQ(expandable_, src->IsExpandable()); - for (unsigned int idx = 0; idx < storage_size_; idx++) { - storage_[idx] &= src->GetRawStorageWord(idx); + static void* operator new(size_t size, ArenaAllocator* arena) { + return arena->Alloc(sizeof(ArenaBitVectorAllocator), ArenaAllocator::kAllocGrowableBitMap); } -} + static void operator delete(void* p) {} // Nop. -/* - * Union with another bit vector. Sizes and expandability must be the same. - */ -void ArenaBitVector::Union(const ArenaBitVector* src) { - DCHECK_EQ(storage_size_, src->GetStorageSize()); - DCHECK_EQ(expandable_, src->IsExpandable()); - for (unsigned int idx = 0; idx < storage_size_; idx++) { - storage_[idx] |= src->GetRawStorageWord(idx); - } -} - -// Count the number of bits that are set. -int ArenaBitVector::NumSetBits() { - unsigned int count = 0; - - for (unsigned int word = 0; word < storage_size_; word++) { - count += __builtin_popcount(storage_[word]); - } - return count; -} + private: + ArenaAllocator* arena_; + DISALLOW_COPY_AND_ASSIGN(ArenaBitVectorAllocator); +}; -/* - * Mark specified number of bits as "set". Cannot set all bits like ClearAll - * since there might be unused bits - setting those to one will confuse the - * iterator. - */ -void ArenaBitVector::SetInitialBits(unsigned int num_bits) { - DCHECK_LE(((num_bits + 31) >> 5), storage_size_); - unsigned int idx; - for (idx = 0; idx < (num_bits >> 5); idx++) { - storage_[idx] = -1; - } - unsigned int rem_num_bits = num_bits & 0x1f; - if (rem_num_bits) { - storage_[idx] = (1 << rem_num_bits) - 1; - } -} +ArenaBitVector::ArenaBitVector(ArenaAllocator* arena, unsigned int start_bits, + bool expandable, OatBitMapKind kind) + : BitVector(start_bits, expandable, new (arena) ArenaBitVectorAllocator(arena)), kind_(kind) {} } // namespace art diff --git a/compiler/dex/arena_bit_vector.h b/compiler/dex/arena_bit_vector.h index 8bcd628dc0..4b2193a3f1 100644 --- a/compiler/dex/arena_bit_vector.h +++ b/compiler/dex/arena_bit_vector.h @@ -17,107 +17,28 @@ #ifndef ART_COMPILER_DEX_ARENA_BIT_VECTOR_H_ #define ART_COMPILER_DEX_ARENA_BIT_VECTOR_H_ -#include <stdint.h> -#include <stddef.h> -#include "compiler_enums.h" #include "arena_allocator.h" +#include "base/bit_vector.h" +#include "compiler_enums.h" namespace art { /* - * Expanding bitmap, used for tracking resources. Bits are numbered starting - * from zero. All operations on a BitVector are unsynchronized. + * A BitVector implementation that uses Arena allocation. */ -class ArenaBitVector { +class ArenaBitVector : public BitVector { public: - class Iterator { - public: - explicit Iterator(ArenaBitVector* bit_vector) - : p_bits_(bit_vector), - bit_storage_(bit_vector->GetRawStorage()), - bit_index_(0), - bit_size_(p_bits_->storage_size_ * sizeof(uint32_t) * 8) {} - - // Return the position of the next set bit. -1 means end-of-element reached. - int Next() { - // Did anything obviously change since we started? - DCHECK_EQ(bit_size_, p_bits_->GetStorageSize() * sizeof(uint32_t) * 8); - DCHECK_EQ(bit_storage_, p_bits_->GetRawStorage()); - - if (bit_index_ >= bit_size_) return -1; - - uint32_t word_index = bit_index_ / 32; - uint32_t word = bit_storage_[word_index]; - // Mask out any bits in the first word we've already considered. - word >>= bit_index_ & 0x1f; - if (word == 0) { - bit_index_ &= ~0x1f; - do { - word_index++; - if ((word_index * 32) >= bit_size_) { - bit_index_ = bit_size_; - return -1; - } - word = bit_storage_[word_index]; - bit_index_ += 32; - } while (word == 0); - } - bit_index_ += CTZ(word) + 1; - return bit_index_ - 1; - } - - static void* operator new(size_t size, ArenaAllocator* arena) { - return arena->Alloc(sizeof(ArenaBitVector::Iterator), - ArenaAllocator::kAllocGrowableBitMap); - }; - static void operator delete(void* p) {} // Nop. - - private: - ArenaBitVector* const p_bits_; - uint32_t* const bit_storage_; - uint32_t bit_index_; // Current index (size in bits). - const uint32_t bit_size_; // Size of vector in bits. - }; - - ArenaBitVector(ArenaAllocator* arena, unsigned int start_bits, bool expandable, + ArenaBitVector(ArenaAllocator* arena, uint32_t start_bits, bool expandable, OatBitMapKind kind = kBitMapMisc); ~ArenaBitVector() {} - static void* operator new(size_t size, ArenaAllocator* arena) { - return arena->Alloc(sizeof(ArenaBitVector), ArenaAllocator::kAllocGrowableBitMap); - } - static void operator delete(void* p) {} // Nop. - - void SetBit(unsigned int num); - void ClearBit(unsigned int num); - void MarkAllBits(bool set); - void DebugBitVector(char* msg, int length); - bool IsBitSet(unsigned int num); - void ClearAllBits(); - void SetInitialBits(unsigned int num_bits); - void Copy(ArenaBitVector* src); - void Intersect(const ArenaBitVector* src2); - void Union(const ArenaBitVector* src); - // Are we equal to another bit vector? Note: expandability attributes must also match. - bool Equal(const ArenaBitVector* src) { - return (storage_size_ == src->GetStorageSize()) && - (expandable_ == src->IsExpandable()) && - (memcmp(storage_, src->GetRawStorage(), storage_size_ * 4) == 0); - } - int NumSetBits(); - - uint32_t GetStorageSize() const { return storage_size_; } - bool IsExpandable() const { return expandable_; } - uint32_t GetRawStorageWord(size_t idx) const { return storage_[idx]; } - uint32_t* GetRawStorage() { return storage_; } - const uint32_t* GetRawStorage() const { return storage_; } + static void* operator new(size_t size, ArenaAllocator* arena) { + return arena->Alloc(sizeof(ArenaBitVector), ArenaAllocator::kAllocGrowableBitMap); + } + static void operator delete(void* p) {} // Nop. private: - ArenaAllocator* const arena_; - const bool expandable_; // expand bitmap if we run out? - const OatBitMapKind kind_; // for memory use tuning. - uint32_t storage_size_; // current size, in 32-bit words. - uint32_t* storage_; + const OatBitMapKind kind_; // for memory use tuning. TODO: currently unused. }; diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 97a682f2aa..56facfd889 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -44,6 +44,8 @@ enum SpecialTargetRegister { kRet0, kRet1, kInvokeTgt, + kHiddenArg, + kHiddenFpArg, kCount }; @@ -55,6 +57,7 @@ enum RegLocationType { }; enum BBType { + kNullBlock, kEntryBlock, kDalvikByteCode, kExitBlock, @@ -180,6 +183,8 @@ enum OpKind { kOpBic, kOpCmn, kOpTst, + kOpRev, + kOpRevsh, kOpBkpt, kOpBlx, kOpPush, @@ -412,6 +417,27 @@ enum OatBitMapKind { std::ostream& operator<<(std::ostream& os, const OatBitMapKind& kind); +// LIR fixup kinds for Arm +enum FixupKind { + kFixupNone, + kFixupLabel, // For labels we just adjust the offset. + kFixupLoad, // Mostly for imediates. + kFixupVLoad, // FP load which *may* be pc-relative. + kFixupCBxZ, // Cbz, Cbnz. + kFixupPushPop, // Not really pc relative, but changes size based on args. + kFixupCondBranch, // Conditional branch + kFixupT1Branch, // Thumb1 Unconditional branch + kFixupT2Branch, // Thumb2 Unconditional branch + kFixupBlx1, // Blx1 (start of Blx1/Blx2 pair). + kFixupBl1, // Bl1 (start of Bl1/Bl2 pair). + kFixupAdr, // Adr. + kFixupMovImmLST, // kThumb2MovImm16LST. + kFixupMovImmHST, // kThumb2MovImm16HST. + kFixupAlign4, // Align to 4-byte boundary. +}; + +std::ostream& operator<<(std::ostream& os, const FixupKind& kind); + } // namespace art #endif // ART_COMPILER_DEX_COMPILER_ENUMS_H_ diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h index 6607562b13..fd46975b9a 100644 --- a/compiler/dex/compiler_ir.h +++ b/compiler/dex/compiler_ir.h @@ -29,6 +29,7 @@ #include "llvm/intrinsic_helper.h" #include "llvm/ir_builder.h" #include "safe_map.h" +#include "base/timing_logger.h" namespace art { @@ -68,7 +69,14 @@ struct CompilationUnit { compiler_flip_match(false), arena(pool), mir_graph(NULL), - cg(NULL) {} + cg(NULL), + timings("QuickCompiler", true, false) { + } + + void StartTimingSplit(const char* label); + void NewTimingSplit(const char* label); + void EndTiming(); + /* * Fields needed/generated by common frontend and generally used throughout * the compiler. @@ -89,15 +97,18 @@ struct CompilationUnit { CompilerBackend compiler_backend; InstructionSet instruction_set; + const InstructionSetFeatures& GetInstructionSetFeatures() { + return compiler_driver->GetInstructionSetFeatures(); + } // TODO: much of this info available elsewhere. Go to the original source? - int num_dalvik_registers; // method->registers_size. + uint16_t num_dalvik_registers; // method->registers_size. const uint16_t* insns; - int num_ins; - int num_outs; - int num_regs; // Unlike num_dalvik_registers, does not include ins. + uint16_t num_ins; + uint16_t num_outs; + uint16_t num_regs; // Unlike num_dalvik_registers, does not include ins. // TODO: may want to move this to MIRGraph. - int num_compiler_temps; + uint16_t num_compiler_temps; // If non-empty, apply optimizer/debug flags only to matching methods. std::string compiler_method_match; @@ -109,6 +120,7 @@ struct CompilationUnit { UniquePtr<MIRGraph> mir_graph; // MIR container. UniquePtr<Backend> cg; // Target-specific codegen. + base::TimingLogger timings; }; } // namespace art diff --git a/compiler/dex/dataflow_iterator-inl.h b/compiler/dex/dataflow_iterator-inl.h index 06cc505a9a..74f36ddd81 100644 --- a/compiler/dex/dataflow_iterator-inl.h +++ b/compiler/dex/dataflow_iterator-inl.h @@ -21,42 +21,63 @@ namespace art { -inline BasicBlock* DataflowIterator::NextBody(bool had_change) { +// Single forward pass over the nodes. +inline BasicBlock* DataflowIterator::ForwardSingleNext() { + BasicBlock* res = NULL; + if (idx_ < end_idx_) { + BasicBlockId bb_id = block_id_list_->Get(idx_++); + res = mir_graph_->GetBasicBlock(bb_id); + } + return res; +} + +// Repeat full forward passes over all nodes until no change occurs during a complete pass. +inline BasicBlock* DataflowIterator::ForwardRepeatNext(bool had_change) { changed_ |= had_change; BasicBlock* res = NULL; - if (reverse_) { - if (is_iterative_ && changed_ && (idx_ < 0)) { - idx_ = start_idx_; - changed_ = false; - } - if (idx_ >= 0) { - int bb_id = block_id_list_->Get(idx_--); - res = mir_graph_->GetBasicBlock(bb_id); - } - } else { - if (is_iterative_ && changed_ && (idx_ >= end_idx_)) { - idx_ = start_idx_; - changed_ = false; - } - if (idx_ < end_idx_) { - int bb_id = block_id_list_->Get(idx_++); - res = mir_graph_->GetBasicBlock(bb_id); - } + if ((idx_ >= end_idx_) && changed_) { + idx_ = start_idx_; + changed_ = false; + } + if (idx_ < end_idx_) { + BasicBlockId bb_id = block_id_list_->Get(idx_++); + res = mir_graph_->GetBasicBlock(bb_id); } return res; } -// AllNodes uses the existing GrowableArray iterator, so use different NextBody(). -inline BasicBlock* AllNodesIterator::NextBody(bool had_change) { +// Single reverse pass over the nodes. +inline BasicBlock* DataflowIterator::ReverseSingleNext() { + BasicBlock* res = NULL; + if (idx_ >= 0) { + BasicBlockId bb_id = block_id_list_->Get(idx_--); + res = mir_graph_->GetBasicBlock(bb_id); + } + return res; +} + +// Repeat full backwards passes over all nodes until no change occurs during a complete pass. +inline BasicBlock* DataflowIterator::ReverseRepeatNext(bool had_change) { changed_ |= had_change; BasicBlock* res = NULL; + if ((idx_ < 0) && changed_) { + idx_ = start_idx_; + changed_ = false; + } + if (idx_ >= 0) { + BasicBlockId bb_id = block_id_list_->Get(idx_--); + res = mir_graph_->GetBasicBlock(bb_id); + } + return res; +} + +// AllNodes uses the existing GrowableArray iterator, and should be considered unordered. +inline BasicBlock* AllNodesIterator::Next() { + BasicBlock* res = NULL; bool keep_looking = true; while (keep_looking) { res = all_nodes_iterator_->Next(); - if (is_iterative_ && changed_ && (res == NULL)) { - all_nodes_iterator_->Reset(); - changed_ = false; - } else if ((res == NULL) || (!res->hidden)) { + if ((res == NULL) || (!res->hidden)) { keep_looking = false; } } diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h index da44ffd99c..26e36653be 100644 --- a/compiler/dex/dataflow_iterator.h +++ b/compiler/dex/dataflow_iterator.h @@ -27,124 +27,130 @@ namespace art { * interesting orders. Note that for efficiency, the visit orders have been pre-computed. * The order itself will not change during the iteration. However, for some uses, * auxiliary data associated with the basic blocks may be changed during the iteration, - * necessitating another pass over the list. - * - * To support this usage, we have is_iterative_. If false, the iteration is a one-shot - * pass through the pre-computed list using Next(). If true, the caller must tell the - * iterator whether a change has been made that necessitates another pass. Use - * Next(had_change) for this. The general idea is that the iterative_ use case means - * that the iterator will keep repeating the full basic block list until a complete pass - * is made through it with no changes. Note that calling Next(true) does not affect - * the iteration order or short-curcuit the current pass - it simply tells the iterator - * that once it has finished walking through the block list it should reset and do another - * full pass through the list. + * necessitating another pass over the list. If this behavior is required, use the + * "Repeating" variant. For the repeating variant, the caller must tell the iterator + * whether a change has been made that necessitates another pass. Note that calling Next(true) + * does not affect the iteration order or short-circuit the current pass - it simply tells + * the iterator that once it has finished walking through the block list it should reset and + * do another full pass through the list. */ class DataflowIterator { public: virtual ~DataflowIterator() {} - // Return the next BasicBlock* to visit. - BasicBlock* Next() { - DCHECK(!is_iterative_); - return NextBody(false); - } - - /* - * Return the next BasicBlock* to visit, and tell the iterator whether any change - * has occurred that requires another full pass over the block list. - */ - BasicBlock* Next(bool had_change) { - DCHECK(is_iterative_); - return NextBody(had_change); - } - protected: - DataflowIterator(MIRGraph* mir_graph, bool is_iterative, int start_idx, int end_idx, - bool reverse) + DataflowIterator(MIRGraph* mir_graph, int32_t start_idx, int32_t end_idx) : mir_graph_(mir_graph), - is_iterative_(is_iterative), start_idx_(start_idx), end_idx_(end_idx), - reverse_(reverse), block_id_list_(NULL), idx_(0), changed_(false) {} - virtual BasicBlock* NextBody(bool had_change) ALWAYS_INLINE; + virtual BasicBlock* ForwardSingleNext() ALWAYS_INLINE; + virtual BasicBlock* ReverseSingleNext() ALWAYS_INLINE; + virtual BasicBlock* ForwardRepeatNext(bool had_change) ALWAYS_INLINE; + virtual BasicBlock* ReverseRepeatNext(bool had_change) ALWAYS_INLINE; MIRGraph* const mir_graph_; - const bool is_iterative_; - const int start_idx_; - const int end_idx_; - const bool reverse_; - GrowableArray<int>* block_id_list_; - int idx_; + const int32_t start_idx_; + const int32_t end_idx_; + GrowableArray<BasicBlockId>* block_id_list_; + int32_t idx_; bool changed_; }; // DataflowIterator - class ReachableNodesIterator : public DataflowIterator { + class PreOrderDfsIterator : public DataflowIterator { public: - ReachableNodesIterator(MIRGraph* mir_graph, bool is_iterative) - : DataflowIterator(mir_graph, is_iterative, 0, - mir_graph->GetNumReachableBlocks(), false) { + explicit PreOrderDfsIterator(MIRGraph* mir_graph) + : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) { idx_ = start_idx_; block_id_list_ = mir_graph->GetDfsOrder(); } + + BasicBlock* Next() { + return ForwardSingleNext(); + } }; - class PreOrderDfsIterator : public DataflowIterator { + class RepeatingPreOrderDfsIterator : public DataflowIterator { public: - PreOrderDfsIterator(MIRGraph* mir_graph, bool is_iterative) - : DataflowIterator(mir_graph, is_iterative, 0, - mir_graph->GetNumReachableBlocks(), false) { + explicit RepeatingPreOrderDfsIterator(MIRGraph* mir_graph) + : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) { idx_ = start_idx_; block_id_list_ = mir_graph->GetDfsOrder(); } + + BasicBlock* Next(bool had_change) { + return ForwardRepeatNext(had_change); + } }; - class PostOrderDfsIterator : public DataflowIterator { + class RepeatingPostOrderDfsIterator : public DataflowIterator { public: - PostOrderDfsIterator(MIRGraph* mir_graph, bool is_iterative) - : DataflowIterator(mir_graph, is_iterative, 0, - mir_graph->GetNumReachableBlocks(), false) { + explicit RepeatingPostOrderDfsIterator(MIRGraph* mir_graph) + : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) { idx_ = start_idx_; block_id_list_ = mir_graph->GetDfsPostOrder(); } + + BasicBlock* Next(bool had_change) { + return ForwardRepeatNext(had_change); + } }; class ReversePostOrderDfsIterator : public DataflowIterator { public: - ReversePostOrderDfsIterator(MIRGraph* mir_graph, bool is_iterative) - : DataflowIterator(mir_graph, is_iterative, - mir_graph->GetNumReachableBlocks() -1, 0, true) { + explicit ReversePostOrderDfsIterator(MIRGraph* mir_graph) + : DataflowIterator(mir_graph, mir_graph->GetNumReachableBlocks() -1, 0) { + idx_ = start_idx_; + block_id_list_ = mir_graph->GetDfsPostOrder(); + } + + BasicBlock* Next() { + return ReverseSingleNext(); + } + }; + + class RepeatingReversePostOrderDfsIterator : public DataflowIterator { + public: + explicit RepeatingReversePostOrderDfsIterator(MIRGraph* mir_graph) + : DataflowIterator(mir_graph, mir_graph->GetNumReachableBlocks() -1, 0) { idx_ = start_idx_; block_id_list_ = mir_graph->GetDfsPostOrder(); } + + BasicBlock* Next(bool had_change) { + return ReverseRepeatNext(had_change); + } }; class PostOrderDOMIterator : public DataflowIterator { public: - PostOrderDOMIterator(MIRGraph* mir_graph, bool is_iterative) - : DataflowIterator(mir_graph, is_iterative, 0, - mir_graph->GetNumReachableBlocks(), false) { + explicit PostOrderDOMIterator(MIRGraph* mir_graph) + : DataflowIterator(mir_graph, 0, mir_graph->GetNumReachableBlocks()) { idx_ = start_idx_; block_id_list_ = mir_graph->GetDomPostOrder(); } + + BasicBlock* Next() { + return ForwardSingleNext(); + } }; class AllNodesIterator : public DataflowIterator { public: - AllNodesIterator(MIRGraph* mir_graph, bool is_iterative) - : DataflowIterator(mir_graph, is_iterative, 0, 0, false) { - all_nodes_iterator_ = - new (mir_graph->GetArena()) GrowableArray<BasicBlock*>::Iterator(mir_graph->GetBlockList()); + explicit AllNodesIterator(MIRGraph* mir_graph) + : DataflowIterator(mir_graph, 0, 0) { + all_nodes_iterator_ = new + (mir_graph->GetArena()) GrowableArray<BasicBlock*>::Iterator(mir_graph->GetBlockList()); } void Reset() { all_nodes_iterator_->Reset(); } - BasicBlock* NextBody(bool had_change) ALWAYS_INLINE; + BasicBlock* Next() ALWAYS_INLINE; private: GrowableArray<BasicBlock*>::Iterator* all_nodes_iterator_; diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc index 63d8aa04f8..abafbc5830 100644 --- a/compiler/dex/dex_to_dex_compiler.cc +++ b/compiler/dex/dex_to_dex_compiler.cc @@ -24,6 +24,7 @@ #include "mirror/art_method-inl.h" #include "mirror/class-inl.h" #include "mirror/dex_cache.h" +#include "thread-inl.h" namespace art { namespace optimizer { @@ -216,8 +217,8 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst, uint32_t field_idx = inst->VRegC_22c(); int field_offset; bool is_volatile; - bool fast_path = driver_.ComputeInstanceFieldInfo(field_idx, &unit_, field_offset, - is_volatile, is_put); + bool fast_path = driver_.ComputeInstanceFieldInfo(field_idx, &unit_, is_put, + &field_offset, &is_volatile); if (fast_path && !is_volatile && IsUint(16, field_offset)) { VLOG(compiler) << "Quickening " << Instruction::Name(inst->Opcode()) << " to " << Instruction::Name(new_opcode) @@ -246,11 +247,13 @@ void DexCompiler::CompileInvokeVirtual(Instruction* inst, int vtable_idx; uintptr_t direct_code; uintptr_t direct_method; - bool fast_path = driver_.ComputeInvokeInfo(&unit_, dex_pc, invoke_type, - target_method, vtable_idx, - direct_code, direct_method, - false); // TODO: support devirtualization. + const bool kEnableDevirtualization = false; + bool fast_path = driver_.ComputeInvokeInfo(&unit_, dex_pc, + false, kEnableDevirtualization, + &invoke_type, + &target_method, &vtable_idx, + &direct_code, &direct_method); if (fast_path && original_invoke_type == invoke_type) { if (vtable_idx >= 0 && IsUint(16, vtable_idx)) { VLOG(compiler) << "Quickening " << Instruction::Name(inst->Opcode()) diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc index fefcab9e87..2f8521f788 100644 --- a/compiler/dex/frontend.cc +++ b/compiler/dex/frontend.cc @@ -24,6 +24,7 @@ #include "runtime.h" #include "backend.h" #include "base/logging.h" +#include "base/timing_logger.h" #if defined(ART_USE_PORTABLE_COMPILER) #include "dex/portable/mir_to_gbc.h" @@ -104,8 +105,30 @@ static uint32_t kCompilerDebugFlags = 0 | // Enable debug/testing modes // (1 << kDebugVerifyBitcode) | // (1 << kDebugShowSummaryMemoryUsage) | // (1 << kDebugShowFilterStats) | + // (1 << kDebugTimings) | 0; +// TODO: Add a cumulative version of logging, and combine with dex2oat --dump-timing +void CompilationUnit::StartTimingSplit(const char* label) { + if (enable_debug & (1 << kDebugTimings)) { + timings.StartSplit(label); + } +} + +void CompilationUnit::NewTimingSplit(const char* label) { + if (enable_debug & (1 << kDebugTimings)) { + timings.NewSplit(label); + } +} + +void CompilationUnit::EndTiming() { + if (enable_debug & (1 << kDebugTimings)) { + timings.EndSplit(); + LOG(INFO) << "TIMINGS " << PrettyMethod(method_idx, *dex_file); + LOG(INFO) << Dumpable<base::TimingLogger>(timings); + } +} + static CompiledMethod* CompileMethod(CompilerDriver& compiler, const CompilerBackend compiler_backend, const DexFile::CodeItem* code_item, @@ -117,6 +140,11 @@ static CompiledMethod* CompileMethod(CompilerDriver& compiler, #endif ) { VLOG(compiler) << "Compiling " << PrettyMethod(method_idx, dex_file) << "..."; + if (code_item->insns_size_in_code_units_ >= 0x10000) { + LOG(INFO) << "Method size exceeds compiler limits: " << code_item->insns_size_in_code_units_ + << " in " << PrettyMethod(method_idx, dex_file); + return NULL; + } ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); CompilationUnit cu(&compiler.GetArenaPool()); @@ -151,7 +179,7 @@ static CompiledMethod* CompileMethod(CompilerDriver& compiler, */ if (compiler_backend == kPortable) { - // Fused long branches not currently usseful in bitcode. + // Fused long branches not currently useful in bitcode. cu.disable_opt |= (1 << kBranchFusing); } @@ -170,6 +198,7 @@ static CompiledMethod* CompileMethod(CompilerDriver& compiler, (1 << kPromoteCompilerTemps)); } + cu.StartTimingSplit("BuildMIRGraph"); cu.mir_graph.reset(new MIRGraph(&cu, &cu.arena)); /* Gathering opcode stats? */ @@ -187,22 +216,28 @@ static CompiledMethod* CompileMethod(CompilerDriver& compiler, } #endif + cu.NewTimingSplit("MIROpt:CodeLayout"); + /* Do a code layout pass */ cu.mir_graph->CodeLayout(); /* Perform SSA transformation for the whole method */ + cu.NewTimingSplit("MIROpt:SSATransform"); cu.mir_graph->SSATransformation(); /* Do constant propagation */ + cu.NewTimingSplit("MIROpt:ConstantProp"); cu.mir_graph->PropagateConstants(); /* Count uses */ cu.mir_graph->MethodUseCount(); /* Perform null check elimination */ + cu.NewTimingSplit("MIROpt:NullCheckElimination"); cu.mir_graph->NullCheckElimination(); /* Combine basic blocks where possible */ + cu.NewTimingSplit("MIROpt:BBOpt"); cu.mir_graph->BasicBlockCombine(); /* Do some basic block optimizations */ @@ -245,6 +280,7 @@ static CompiledMethod* CompileMethod(CompilerDriver& compiler, cu.cg->Materialize(); + cu.NewTimingSplit("Cleanup"); result = cu.cg->GetCompiledMethod(); if (result) { @@ -265,6 +301,7 @@ static CompiledMethod* CompileMethod(CompilerDriver& compiler, << " " << PrettyMethod(method_idx, dex_file); } + cu.EndTiming(); return result; } diff --git a/compiler/dex/frontend.h b/compiler/dex/frontend.h index 6c33d109e3..43f68554b5 100644 --- a/compiler/dex/frontend.h +++ b/compiler/dex/frontend.h @@ -78,6 +78,7 @@ enum debugControlVector { kDebugVerifyBitcode, kDebugShowSummaryMemoryUsage, kDebugShowFilterStats, + kDebugTimings }; class LLVMInfo { diff --git a/compiler/dex/growable_array.h b/compiler/dex/growable_array.h index 8e2abfbaf1..639120a2ba 100644 --- a/compiler/dex/growable_array.h +++ b/compiler/dex/growable_array.h @@ -131,6 +131,11 @@ class GrowableArray { elem_list_[index]++; } + /* + * Remove an existing element from list. If there are more than one copy + * of the element, only the first one encountered will be deleted. + */ + // TODO: consider renaming this. void Delete(T element) { bool found = false; for (size_t i = 0; i < num_used_ - 1; i++) { @@ -150,6 +155,11 @@ class GrowableArray { size_t Size() const { return num_used_; } + void SetSize(size_t new_size) { + Resize(new_size); + num_used_ = new_size; + } + T* GetRawStorage() const { return elem_list_; } static void* operator new(size_t size, ArenaAllocator* arena) { diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc index d7a4136a01..89af06e085 100644 --- a/compiler/dex/mir_analysis.cc +++ b/compiler/dex/mir_analysis.cc @@ -864,7 +864,7 @@ void MIRGraph::AnalyzeBlock(BasicBlock* bb, MethodStats* stats) { if (ending_bb->last_mir_insn != NULL) { uint32_t ending_flags = analysis_attributes_[ending_bb->last_mir_insn->dalvikInsn.opcode]; while ((ending_flags & AN_BRANCH) == 0) { - ending_bb = ending_bb->fall_through; + ending_bb = GetBasicBlock(ending_bb->fall_through); ending_flags = analysis_attributes_[ending_bb->last_mir_insn->dalvikInsn.opcode]; } } @@ -876,13 +876,14 @@ void MIRGraph::AnalyzeBlock(BasicBlock* bb, MethodStats* stats) { */ int loop_scale_factor = 1; // Simple for and while loops - if ((ending_bb->taken != NULL) && (ending_bb->fall_through == NULL)) { - if ((ending_bb->taken->taken == bb) || (ending_bb->taken->fall_through == bb)) { + if ((ending_bb->taken != NullBasicBlockId) && (ending_bb->fall_through == NullBasicBlockId)) { + if ((GetBasicBlock(ending_bb->taken)->taken == bb->id) || + (GetBasicBlock(ending_bb->taken)->fall_through == bb->id)) { loop_scale_factor = 25; } } // Simple do-while loop - if ((ending_bb->taken != NULL) && (ending_bb->taken == bb)) { + if ((ending_bb->taken != NullBasicBlockId) && (ending_bb->taken == bb->id)) { loop_scale_factor = 25; } @@ -922,7 +923,7 @@ void MIRGraph::AnalyzeBlock(BasicBlock* bb, MethodStats* stats) { if (tbb == ending_bb) { done = true; } else { - tbb = tbb->fall_through; + tbb = GetBasicBlock(tbb->fall_through); } } if (has_math && computational_block && (loop_scale_factor > 1)) { @@ -1032,6 +1033,14 @@ bool MIRGraph::SkipCompilation(Runtime::CompilerFilter compiler_filter) { */ if (GetNumDalvikInsns() > Runtime::Current()->GetHugeMethodThreshold()) { skip_compilation = true; + // If we're got a huge number of basic blocks, don't bother with further analysis. + if (static_cast<size_t>(num_blocks_) > (Runtime::Current()->GetHugeMethodThreshold() / 2)) { + return true; + } + } else if (GetNumDalvikInsns() > Runtime::Current()->GetLargeMethodThreshold() && + /* If it's large and contains no branches, it's likely to be machine generated initialization */ + (GetBranchCount() == 0)) { + return true; } else if (compiler_filter == Runtime::kSpeed) { // If not huge, compile. return false; @@ -1061,7 +1070,7 @@ bool MIRGraph::SkipCompilation(Runtime::CompilerFilter compiler_filter) { memset(&stats, 0, sizeof(stats)); ClearAllVisitedFlags(); - AllNodesIterator iter(this, false /* not iterative */); + AllNodesIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { AnalyzeBlock(bb, &stats); } diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc index 3a73717a7b..11e19dc43f 100644 --- a/compiler/dex/mir_dataflow.cc +++ b/compiler/dex/mir_dataflow.cc @@ -1221,10 +1221,10 @@ bool MIRGraph::InvokeUsesMethodStar(MIR* mir) { uint32_t current_offset = static_cast<uint32_t>(current_offset_); bool fast_path = cu_->compiler_driver->ComputeInvokeInfo(&m_unit, current_offset, - type, target_method, - vtable_idx, - direct_code, direct_method, - false) && + false, true, + &type, &target_method, + &vtable_idx, + &direct_code, &direct_method) && !(cu_->enable_debug & (1 << kDebugSlowInvokePath)); return (((type == kDirect) || (type == kStatic)) && fast_path && ((direct_code == 0) || (direct_method == 0))); @@ -1243,7 +1243,8 @@ bool MIRGraph::CountUses(struct BasicBlock* bb) { if (mir->ssa_rep == NULL) { continue; } - uint32_t weight = std::min(16U, static_cast<uint32_t>(bb->nesting_depth)); + // Each level of nesting adds *16 to count, up to 3 levels deep. + uint32_t weight = std::min(3U, static_cast<uint32_t>(bb->nesting_depth) * 4); for (int i = 0; i < mir->ssa_rep->num_uses; i++) { int s_reg = mir->ssa_rep->uses[i]; raw_use_counts_.Increment(s_reg); @@ -1287,7 +1288,7 @@ void MIRGraph::MethodUseCount() { if (cu_->disable_opt & (1 << kPromoteRegs)) { return; } - AllNodesIterator iter(this, false /* not iterative */); + AllNodesIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { CountUses(bb); } @@ -1295,23 +1296,23 @@ void MIRGraph::MethodUseCount() { /* Verify if all the successor is connected with all the claimed predecessors */ bool MIRGraph::VerifyPredInfo(BasicBlock* bb) { - GrowableArray<BasicBlock*>::Iterator iter(bb->predecessors); + GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors); while (true) { - BasicBlock *pred_bb = iter.Next(); + BasicBlock *pred_bb = GetBasicBlock(iter.Next()); if (!pred_bb) break; bool found = false; - if (pred_bb->taken == bb) { + if (pred_bb->taken == bb->id) { found = true; - } else if (pred_bb->fall_through == bb) { + } else if (pred_bb->fall_through == bb->id) { found = true; - } else if (pred_bb->successor_block_list.block_list_type != kNotUsed) { - GrowableArray<SuccessorBlockInfo*>::Iterator iterator(pred_bb->successor_block_list.blocks); + } else if (pred_bb->successor_block_list_type != kNotUsed) { + GrowableArray<SuccessorBlockInfo*>::Iterator iterator(pred_bb->successor_blocks); while (true) { SuccessorBlockInfo *successor_block_info = iterator.Next(); if (successor_block_info == NULL) break; - BasicBlock *succ_bb = successor_block_info->block; - if (succ_bb == bb) { + BasicBlockId succ_bb = successor_block_info->block; + if (succ_bb == bb->id) { found = true; break; } @@ -1331,7 +1332,7 @@ bool MIRGraph::VerifyPredInfo(BasicBlock* bb) { void MIRGraph::VerifyDataflow() { /* Verify if all blocks are connected as claimed */ - AllNodesIterator iter(this, false /* not iterative */); + AllNodesIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { VerifyPredInfo(bb); } diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index a12bf39e64..cf758fc5da 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -96,9 +96,9 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) try_block_addr_(NULL), entry_block_(NULL), exit_block_(NULL), - cur_block_(NULL), num_blocks_(0), current_code_item_(NULL), + dex_pc_to_block_map_(arena, 0, kGrowableArrayMisc), current_method_(kInvalidEntry), current_offset_(kInvalidEntry), def_count_(0), @@ -108,7 +108,9 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) attributes_(METHOD_IS_LEAF), // Start with leaf assumption, change on encountering invoke. checkstats_(NULL), special_case_(kNoHandler), - arena_(arena) { + arena_(arena), + backward_branches_(0), + forward_branches_(0) { try_block_addr_ = new (arena_) ArenaBitVector(arena_, 0, true /* expandable */); } @@ -128,11 +130,14 @@ int MIRGraph::ParseInsn(const uint16_t* code_ptr, DecodedInstruction* decoded_in /* Split an existing block from the specified code offset into two */ -BasicBlock* MIRGraph::SplitBlock(unsigned int code_offset, +BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset, BasicBlock* orig_block, BasicBlock** immed_pred_block_p) { + DCHECK_GT(code_offset, orig_block->start_offset); MIR* insn = orig_block->first_mir_insn; + MIR* prev = NULL; while (insn) { if (insn->offset == code_offset) break; + prev = insn; insn = insn->next; } if (insn == NULL) { @@ -150,43 +155,46 @@ BasicBlock* MIRGraph::SplitBlock(unsigned int code_offset, orig_block->terminated_by_return = false; /* Add it to the quick lookup cache */ - block_map_.Put(bottom_block->start_offset, bottom_block); + dex_pc_to_block_map_.Put(bottom_block->start_offset, bottom_block->id); /* Handle the taken path */ bottom_block->taken = orig_block->taken; - if (bottom_block->taken) { - orig_block->taken = NULL; - bottom_block->taken->predecessors->Delete(orig_block); - bottom_block->taken->predecessors->Insert(bottom_block); + if (bottom_block->taken != NullBasicBlockId) { + orig_block->taken = NullBasicBlockId; + BasicBlock* bb_taken = GetBasicBlock(bottom_block->taken); + bb_taken->predecessors->Delete(orig_block->id); + bb_taken->predecessors->Insert(bottom_block->id); } /* Handle the fallthrough path */ bottom_block->fall_through = orig_block->fall_through; - orig_block->fall_through = bottom_block; - bottom_block->predecessors->Insert(orig_block); - if (bottom_block->fall_through) { - bottom_block->fall_through->predecessors->Delete(orig_block); - bottom_block->fall_through->predecessors->Insert(bottom_block); + orig_block->fall_through = bottom_block->id; + bottom_block->predecessors->Insert(orig_block->id); + if (bottom_block->fall_through != NullBasicBlockId) { + BasicBlock* bb_fall_through = GetBasicBlock(bottom_block->fall_through); + bb_fall_through->predecessors->Delete(orig_block->id); + bb_fall_through->predecessors->Insert(bottom_block->id); } /* Handle the successor list */ - if (orig_block->successor_block_list.block_list_type != kNotUsed) { - bottom_block->successor_block_list = orig_block->successor_block_list; - orig_block->successor_block_list.block_list_type = kNotUsed; - GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bottom_block->successor_block_list.blocks); + if (orig_block->successor_block_list_type != kNotUsed) { + bottom_block->successor_block_list_type = orig_block->successor_block_list_type; + bottom_block->successor_blocks = orig_block->successor_blocks; + orig_block->successor_block_list_type = kNotUsed; + orig_block->successor_blocks = NULL; + GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bottom_block->successor_blocks); while (true) { SuccessorBlockInfo *successor_block_info = iterator.Next(); if (successor_block_info == NULL) break; - BasicBlock *bb = successor_block_info->block; - bb->predecessors->Delete(orig_block); - bb->predecessors->Insert(bottom_block); + BasicBlock *bb = GetBasicBlock(successor_block_info->block); + bb->predecessors->Delete(orig_block->id); + bb->predecessors->Insert(bottom_block->id); } } - orig_block->last_mir_insn = insn->prev; + orig_block->last_mir_insn = prev; + prev->next = NULL; - insn->prev->next = NULL; - insn->prev = NULL; /* * Update the immediate predecessor block pointer so that outgoing edges * can be applied to the proper block. @@ -195,6 +203,23 @@ BasicBlock* MIRGraph::SplitBlock(unsigned int code_offset, DCHECK_EQ(*immed_pred_block_p, orig_block); *immed_pred_block_p = bottom_block; } + + // Associate dex instructions in the bottom block with the new container. + MIR* p = bottom_block->first_mir_insn; + while (p != NULL) { + int opcode = p->dalvikInsn.opcode; + /* + * Some messiness here to ensure that we only enter real opcodes and only the + * first half of a potentially throwing instruction that has been split into + * CHECK and work portions. The 2nd half of a split operation will have a non-null + * throw_insn pointer that refers to the 1st half. + */ + if ((opcode == kMirOpCheck) || (!IsPseudoMirOp(opcode) && (p->meta.throw_insn == NULL))) { + dex_pc_to_block_map_.Put(p->offset, bottom_block->id); + } + p = (p == bottom_block->last_mir_insn) ? NULL : p->next; + } + return bottom_block; } @@ -206,45 +231,43 @@ BasicBlock* MIRGraph::SplitBlock(unsigned int code_offset, * (by the caller) * Utilizes a map for fast lookup of the typical cases. */ -BasicBlock* MIRGraph::FindBlock(unsigned int code_offset, bool split, bool create, +BasicBlock* MIRGraph::FindBlock(DexOffset code_offset, bool split, bool create, BasicBlock** immed_pred_block_p) { - BasicBlock* bb; - unsigned int i; - SafeMap<unsigned int, BasicBlock*>::iterator it; + if (code_offset >= cu_->code_item->insns_size_in_code_units_) { + return NULL; + } + + int block_id = dex_pc_to_block_map_.Get(code_offset); + BasicBlock* bb = (block_id == 0) ? NULL : block_list_.Get(block_id); - it = block_map_.find(code_offset); - if (it != block_map_.end()) { - return it->second; - } else if (!create) { + if ((bb != NULL) && (bb->start_offset == code_offset)) { + // Does this containing block start with the desired instruction? + return bb; + } + + // No direct hit. + if (!create) { return NULL; } - if (split) { - for (i = 0; i < block_list_.Size(); i++) { - bb = block_list_.Get(i); - if (bb->block_type != kDalvikByteCode) continue; - /* Check if a branch jumps into the middle of an existing block */ - if ((code_offset > bb->start_offset) && (bb->last_mir_insn != NULL) && - (code_offset <= bb->last_mir_insn->offset)) { - BasicBlock *new_bb = SplitBlock(code_offset, bb, bb == *immed_pred_block_p ? - immed_pred_block_p : NULL); - return new_bb; - } - } + if (bb != NULL) { + // The target exists somewhere in an existing block. + return SplitBlock(code_offset, bb, bb == *immed_pred_block_p ? immed_pred_block_p : NULL); } - /* Create a new one */ + // Create a new block. bb = NewMemBB(kDalvikByteCode, num_blocks_++); block_list_.Insert(bb); bb->start_offset = code_offset; - block_map_.Put(bb->start_offset, bb); + dex_pc_to_block_map_.Put(bb->start_offset, bb->id); return bb; } + /* Identify code range in try blocks and set up the empty catch blocks */ void MIRGraph::ProcessTryCatchBlocks() { int tries_size = current_code_item_->tries_size_; - int offset; + DexOffset offset; if (tries_size == 0) { return; @@ -253,8 +276,8 @@ void MIRGraph::ProcessTryCatchBlocks() { for (int i = 0; i < tries_size; i++) { const DexFile::TryItem* pTry = DexFile::GetTryItems(*current_code_item_, i); - int start_offset = pTry->start_addr_; - int end_offset = start_offset + pTry->insn_count_; + DexOffset start_offset = pTry->start_addr_; + DexOffset end_offset = start_offset + pTry->insn_count_; for (offset = start_offset; offset < end_offset; offset++) { try_block_addr_->SetBit(offset); } @@ -275,10 +298,10 @@ void MIRGraph::ProcessTryCatchBlocks() { } /* Process instructions with the kBranch flag */ -BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, int cur_offset, int width, - int flags, const uint16_t* code_ptr, +BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, + int width, int flags, const uint16_t* code_ptr, const uint16_t* code_end) { - int target = cur_offset; + DexOffset target = cur_offset; switch (insn->dalvikInsn.opcode) { case Instruction::GOTO: case Instruction::GOTO_16: @@ -306,10 +329,11 @@ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, int cur default: LOG(FATAL) << "Unexpected opcode(" << insn->dalvikInsn.opcode << ") with kBranch set"; } + CountBranch(target); BasicBlock *taken_block = FindBlock(target, /* split */ true, /* create */ true, /* immed_pred_block_p */ &cur_block); - cur_block->taken = taken_block; - taken_block->predecessors->Insert(cur_block); + cur_block->taken = taken_block->id; + taken_block->predecessors->Insert(cur_block->id); /* Always terminate the current block for conditional branches */ if (flags & Instruction::kContinue) { @@ -331,8 +355,8 @@ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, int cur true, /* immed_pred_block_p */ &cur_block); - cur_block->fall_through = fallthrough_block; - fallthrough_block->predecessors->Insert(cur_block); + cur_block->fall_through = fallthrough_block->id; + fallthrough_block->predecessors->Insert(cur_block->id); } else if (code_ptr < code_end) { FindBlock(cur_offset + width, /* split */ false, /* create */ true, /* immed_pred_block_p */ NULL); @@ -341,7 +365,7 @@ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, int cur } /* Process instructions with the kSwitch flag */ -void MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, int cur_offset, int width, +void MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width, int flags) { const uint16_t* switch_data = reinterpret_cast<const uint16_t*>(GetCurrentInsns() + cur_offset + insn->dalvikInsn.vB); @@ -385,14 +409,13 @@ void MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, int cur_offset first_key = 0; // To make the compiler happy } - if (cur_block->successor_block_list.block_list_type != kNotUsed) { + if (cur_block->successor_block_list_type != kNotUsed) { LOG(FATAL) << "Successor block list already in use: " - << static_cast<int>(cur_block->successor_block_list.block_list_type); + << static_cast<int>(cur_block->successor_block_list_type); } - cur_block->successor_block_list.block_list_type = - (insn->dalvikInsn.opcode == Instruction::PACKED_SWITCH) ? - kPackedSwitch : kSparseSwitch; - cur_block->successor_block_list.blocks = + cur_block->successor_block_list_type = + (insn->dalvikInsn.opcode == Instruction::PACKED_SWITCH) ? kPackedSwitch : kSparseSwitch; + cur_block->successor_blocks = new (arena_) GrowableArray<SuccessorBlockInfo*>(arena_, size, kGrowableArraySuccessorBlocks); for (i = 0; i < size; i++) { @@ -401,24 +424,24 @@ void MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, int cur_offset SuccessorBlockInfo *successor_block_info = static_cast<SuccessorBlockInfo*>(arena_->Alloc(sizeof(SuccessorBlockInfo), ArenaAllocator::kAllocSuccessor)); - successor_block_info->block = case_block; + successor_block_info->block = case_block->id; successor_block_info->key = (insn->dalvikInsn.opcode == Instruction::PACKED_SWITCH) ? first_key + i : keyTable[i]; - cur_block->successor_block_list.blocks->Insert(successor_block_info); - case_block->predecessors->Insert(cur_block); + cur_block->successor_blocks->Insert(successor_block_info); + case_block->predecessors->Insert(cur_block->id); } /* Fall-through case */ BasicBlock* fallthrough_block = FindBlock(cur_offset + width, /* split */ false, /* create */ true, /* immed_pred_block_p */ NULL); - cur_block->fall_through = fallthrough_block; - fallthrough_block->predecessors->Insert(cur_block); + cur_block->fall_through = fallthrough_block->id; + fallthrough_block->predecessors->Insert(cur_block->id); } /* Process instructions with the kThrow flag */ -BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, int cur_offset, int width, - int flags, ArenaBitVector* try_block_addr, +BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, + int width, int flags, ArenaBitVector* try_block_addr, const uint16_t* code_ptr, const uint16_t* code_end) { bool in_try_block = try_block_addr->IsBitSet(cur_offset); @@ -426,14 +449,14 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, int cur_ if (in_try_block) { CatchHandlerIterator iterator(*current_code_item_, cur_offset); - if (cur_block->successor_block_list.block_list_type != kNotUsed) { + if (cur_block->successor_block_list_type != kNotUsed) { LOG(INFO) << PrettyMethod(cu_->method_idx, *cu_->dex_file); LOG(FATAL) << "Successor block list already in use: " - << static_cast<int>(cur_block->successor_block_list.block_list_type); + << static_cast<int>(cur_block->successor_block_list_type); } - cur_block->successor_block_list.block_list_type = kCatch; - cur_block->successor_block_list.blocks = + cur_block->successor_block_list_type = kCatch; + cur_block->successor_blocks = new (arena_) GrowableArray<SuccessorBlockInfo*>(arena_, 2, kGrowableArraySuccessorBlocks); for (; iterator.HasNext(); iterator.Next()) { @@ -445,17 +468,17 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, int cur_ } SuccessorBlockInfo *successor_block_info = reinterpret_cast<SuccessorBlockInfo*> (arena_->Alloc(sizeof(SuccessorBlockInfo), ArenaAllocator::kAllocSuccessor)); - successor_block_info->block = catch_block; + successor_block_info->block = catch_block->id; successor_block_info->key = iterator.GetHandlerTypeIndex(); - cur_block->successor_block_list.blocks->Insert(successor_block_info); - catch_block->predecessors->Insert(cur_block); + cur_block->successor_blocks->Insert(successor_block_info); + catch_block->predecessors->Insert(cur_block->id); } } else { BasicBlock *eh_block = NewMemBB(kExceptionHandling, num_blocks_++); - cur_block->taken = eh_block; + cur_block->taken = eh_block->id; block_list_.Insert(eh_block); eh_block->start_offset = cur_offset; - eh_block->predecessors->Insert(cur_block); + eh_block->predecessors->Insert(cur_block->id); } if (insn->dalvikInsn.opcode == Instruction::THROW) { @@ -484,12 +507,15 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, int cur_ * pseudo exception edge MIR. Note also that this new block is * not automatically terminated after the work portion, and may * contain following instructions. + * + * Note also that the dex_pc_to_block_map_ entry for the potentially + * throwing instruction will refer to the original basic block. */ BasicBlock *new_block = NewMemBB(kDalvikByteCode, num_blocks_++); block_list_.Insert(new_block); new_block->start_offset = insn->offset; - cur_block->fall_through = new_block; - new_block->predecessors->Insert(cur_block); + cur_block->fall_through = new_block->id; + new_block->predecessors->Insert(cur_block->id); MIR* new_insn = static_cast<MIR*>(arena_->Alloc(sizeof(MIR), ArenaAllocator::kAllocMIR)); *new_insn = *insn; insn->dalvikInsn.opcode = @@ -517,7 +543,10 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ current_code_item_->insns_ + current_code_item_->insns_size_in_code_units_; // TODO: need to rework expansion of block list & try_block_addr when inlining activated. + // TUNING: use better estimate of basic blocks for following resize. block_list_.Resize(block_list_.Size() + current_code_item_->insns_size_in_code_units_); + dex_pc_to_block_map_.SetSize(dex_pc_to_block_map_.Size() + current_code_item_->insns_size_in_code_units_); + // TODO: replace with explicit resize routine. Using automatic extension side effect for now. try_block_addr_->SetBit(current_code_item_->insns_size_in_code_units_); try_block_addr_->ClearBit(current_code_item_->insns_size_in_code_units_); @@ -527,9 +556,14 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ DCHECK(entry_block_ == NULL); DCHECK(exit_block_ == NULL); DCHECK_EQ(num_blocks_, 0); + // Use id 0 to represent a null block. + BasicBlock* null_block = NewMemBB(kNullBlock, num_blocks_++); + DCHECK_EQ(null_block->id, NullBasicBlockId); + null_block->hidden = true; + block_list_.Insert(null_block); entry_block_ = NewMemBB(kEntryBlock, num_blocks_++); - exit_block_ = NewMemBB(kExitBlock, num_blocks_++); block_list_.Insert(entry_block_); + exit_block_ = NewMemBB(kExitBlock, num_blocks_++); block_list_.Insert(exit_block_); // TODO: deprecate all "cu->" fields; move what's left to wherever CompilationUnit is allocated. cu_->dex_file = &dex_file; @@ -554,15 +588,12 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ /* Current block to record parsed instructions */ BasicBlock *cur_block = NewMemBB(kDalvikByteCode, num_blocks_++); - DCHECK_EQ(current_offset_, 0); + DCHECK_EQ(current_offset_, 0U); cur_block->start_offset = current_offset_; block_list_.Insert(cur_block); - /* Add first block to the fast lookup cache */ -// FIXME: block map needs association with offset/method pair rather than just offset - block_map_.Put(cur_block->start_offset, cur_block); -// FIXME: this needs to insert at the insert point rather than entry block. - entry_block_->fall_through = cur_block; - cur_block->predecessors->Insert(entry_block_); + // TODO: for inlining support, insert at the insert point rather than entry block. + entry_block_->fall_through = cur_block->id; + cur_block->predecessors->Insert(entry_block_->id); /* Identify code range in try blocks and set up the empty catch blocks */ ProcessTryCatchBlocks(); @@ -586,7 +617,6 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ opcode_count_[static_cast<int>(opcode)]++; } - /* Possible simple method? */ if (live_pattern) { live_pattern = false; @@ -628,8 +658,8 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ // It is a simple nop - treat normally. AppendMIR(cur_block, insn); } else { - DCHECK(cur_block->fall_through == NULL); - DCHECK(cur_block->taken == NULL); + DCHECK(cur_block->fall_through == NullBasicBlockId); + DCHECK(cur_block->taken == NullBasicBlockId); // Unreachable instruction, mark for no continuation. flags &= ~Instruction::kContinue; } @@ -637,6 +667,9 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ AppendMIR(cur_block, insn); } + // Associate the starting dex_pc for this opcode with its containing basic block. + dex_pc_to_block_map_.Put(insn->offset, cur_block->id); + code_ptr += width; if (flags & Instruction::kBranch) { @@ -644,8 +677,8 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ width, flags, code_ptr, code_end); } else if (flags & Instruction::kReturn) { cur_block->terminated_by_return = true; - cur_block->fall_through = exit_block_; - exit_block_->predecessors->Insert(cur_block); + cur_block->fall_through = exit_block_->id; + exit_block_->predecessors->Insert(cur_block->id); /* * Terminate the current block if there are instructions * afterwards. @@ -674,13 +707,13 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ * instruction is not an unconditional branch, connect them through * the fall-through link. */ - DCHECK(cur_block->fall_through == NULL || - cur_block->fall_through == next_block || - cur_block->fall_through == exit_block_); + DCHECK(cur_block->fall_through == NullBasicBlockId || + GetBasicBlock(cur_block->fall_through) == next_block || + GetBasicBlock(cur_block->fall_through) == exit_block_); - if ((cur_block->fall_through == NULL) && (flags & Instruction::kContinue)) { - cur_block->fall_through = next_block; - next_block->predecessors->Insert(cur_block); + if ((cur_block->fall_through == NullBasicBlockId) && (flags & Instruction::kContinue)) { + cur_block->fall_through = next_block->id; + next_block->predecessors->Insert(cur_block->id); } cur_block = next_block; } @@ -712,7 +745,7 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks) { std::string fname(PrettyMethod(cu_->method_idx, *cu_->dex_file)); ReplaceSpecialChars(fname); fname = StringPrintf("%s%s%x.dot", dir_prefix, fname.c_str(), - GetEntryBlock()->fall_through->start_offset); + GetBasicBlock(GetEntryBlock()->fall_through)->start_offset); file = fopen(fname.c_str(), "w"); if (file == NULL) { return; @@ -759,31 +792,30 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks) { char block_name1[BLOCK_NAME_LEN], block_name2[BLOCK_NAME_LEN]; - if (bb->taken) { + if (bb->taken != NullBasicBlockId) { GetBlockName(bb, block_name1); - GetBlockName(bb->taken, block_name2); + GetBlockName(GetBasicBlock(bb->taken), block_name2); fprintf(file, " %s:s -> %s:n [style=dotted]\n", block_name1, block_name2); } - if (bb->fall_through) { + if (bb->fall_through != NullBasicBlockId) { GetBlockName(bb, block_name1); - GetBlockName(bb->fall_through, block_name2); + GetBlockName(GetBasicBlock(bb->fall_through), block_name2); fprintf(file, " %s:s -> %s:n\n", block_name1, block_name2); } - if (bb->successor_block_list.block_list_type != kNotUsed) { + if (bb->successor_block_list_type != kNotUsed) { fprintf(file, " succ%04x_%d [shape=%s,label = \"{ \\\n", bb->start_offset, bb->id, - (bb->successor_block_list.block_list_type == kCatch) ? - "Mrecord" : "record"); - GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bb->successor_block_list.blocks); + (bb->successor_block_list_type == kCatch) ? "Mrecord" : "record"); + GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bb->successor_blocks); SuccessorBlockInfo *successor_block_info = iterator.Next(); int succ_id = 0; while (true) { if (successor_block_info == NULL) break; - BasicBlock *dest_block = successor_block_info->block; + BasicBlock *dest_block = GetBasicBlock(successor_block_info->block); SuccessorBlockInfo *next_successor_block_info = iterator.Next(); fprintf(file, " {<f%d> %04x: %04x\\l}%s\\\n", @@ -800,16 +832,16 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks) { fprintf(file, " %s:s -> succ%04x_%d:n [style=dashed]\n", block_name1, bb->start_offset, bb->id); - if (bb->successor_block_list.block_list_type == kPackedSwitch || - bb->successor_block_list.block_list_type == kSparseSwitch) { - GrowableArray<SuccessorBlockInfo*>::Iterator iter(bb->successor_block_list.blocks); + if (bb->successor_block_list_type == kPackedSwitch || + bb->successor_block_list_type == kSparseSwitch) { + GrowableArray<SuccessorBlockInfo*>::Iterator iter(bb->successor_blocks); succ_id = 0; while (true) { SuccessorBlockInfo *successor_block_info = iter.Next(); if (successor_block_info == NULL) break; - BasicBlock *dest_block = successor_block_info->block; + BasicBlock* dest_block = GetBasicBlock(successor_block_info->block); GetBlockName(dest_block, block_name2); fprintf(file, " succ%04x_%d:f%d:e -> %s:n\n", bb->start_offset, @@ -825,7 +857,7 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks) { fprintf(file, " cfg%s [label=\"%s\", shape=none];\n", block_name1, block_name1); if (bb->i_dom) { - GetBlockName(bb->i_dom, block_name2); + GetBlockName(GetBasicBlock(bb->i_dom), block_name2); fprintf(file, " cfg%s:s -> cfg%s:n\n\n", block_name2, block_name1); } } @@ -839,10 +871,9 @@ void MIRGraph::AppendMIR(BasicBlock* bb, MIR* mir) { if (bb->first_mir_insn == NULL) { DCHECK(bb->last_mir_insn == NULL); bb->last_mir_insn = bb->first_mir_insn = mir; - mir->prev = mir->next = NULL; + mir->next = NULL; } else { bb->last_mir_insn->next = mir; - mir->prev = bb->last_mir_insn; mir->next = NULL; bb->last_mir_insn = mir; } @@ -853,25 +884,19 @@ void MIRGraph::PrependMIR(BasicBlock* bb, MIR* mir) { if (bb->first_mir_insn == NULL) { DCHECK(bb->last_mir_insn == NULL); bb->last_mir_insn = bb->first_mir_insn = mir; - mir->prev = mir->next = NULL; + mir->next = NULL; } else { - bb->first_mir_insn->prev = mir; mir->next = bb->first_mir_insn; - mir->prev = NULL; bb->first_mir_insn = mir; } } /* Insert a MIR instruction after the specified MIR */ void MIRGraph::InsertMIRAfter(BasicBlock* bb, MIR* current_mir, MIR* new_mir) { - new_mir->prev = current_mir; new_mir->next = current_mir->next; current_mir->next = new_mir; - if (new_mir->next) { - /* Is not the last MIR in the block */ - new_mir->next->prev = new_mir; - } else { + if (bb->last_mir_insn == current_mir) { /* Is the last MIR in the block */ bb->last_mir_insn = new_mir; } @@ -901,8 +926,9 @@ char* MIRGraph::GetDalvikDisassembly(const MIR* mir) { opcode = insn.opcode; } else if (opcode == kMirOpNop) { str.append("["); - insn.opcode = mir->meta.original_opcode; - opcode = mir->meta.original_opcode; + // Recover original opcode. + insn.opcode = Instruction::At(current_code_item_->insns_ + mir->offset)->Opcode(); + opcode = insn.opcode; nop = true; } @@ -915,7 +941,7 @@ char* MIRGraph::GetDalvikDisassembly(const MIR* mir) { } if (opcode == kMirOpPhi) { - int* incoming = reinterpret_cast<int*>(insn.vB); + BasicBlockId* incoming = mir->meta.phi_incoming; str.append(StringPrintf(" %s = (%s", GetSSANameWithConst(ssa_rep->defs[0], true).c_str(), GetSSANameWithConst(ssa_rep->uses[0], true).c_str())); @@ -1065,7 +1091,7 @@ void MIRGraph::GetBlockName(BasicBlock* bb, char* name) { } const char* MIRGraph::GetShortyFromTargetIdx(int target_idx) { - // FIXME: use current code unit for inline support. + // TODO: for inlining support, use current code unit. const DexFile::MethodId& method_id = cu_->dex_file->GetMethodId(target_idx); return cu_->dex_file->GetShorty(method_id.proto_idx_); } @@ -1095,13 +1121,13 @@ void MIRGraph::DumpMIRGraph() { bb->start_offset, bb->last_mir_insn ? bb->last_mir_insn->offset : bb->start_offset, bb->last_mir_insn ? "" : " empty"); - if (bb->taken) { - LOG(INFO) << " Taken branch: block " << bb->taken->id - << "(0x" << std::hex << bb->taken->start_offset << ")"; + if (bb->taken != NullBasicBlockId) { + LOG(INFO) << " Taken branch: block " << bb->taken + << "(0x" << std::hex << GetBasicBlock(bb->taken)->start_offset << ")"; } - if (bb->fall_through) { - LOG(INFO) << " Fallthrough : block " << bb->fall_through->id - << " (0x" << std::hex << bb->fall_through->start_offset << ")"; + if (bb->fall_through != NullBasicBlockId) { + LOG(INFO) << " Fallthrough : block " << bb->fall_through + << " (0x" << std::hex << GetBasicBlock(bb->fall_through)->start_offset << ")"; } } } @@ -1121,7 +1147,6 @@ CallInfo* MIRGraph::NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, info->result.location = kLocInvalid; } else { info->result = GetRawDest(move_result_mir); - move_result_mir->meta.original_opcode = move_result_mir->dalvikInsn.opcode; move_result_mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop); } info->num_arg_words = mir->ssa_rep->num_uses; @@ -1145,10 +1170,10 @@ BasicBlock* MIRGraph::NewMemBB(BBType block_type, int block_id) { bb->block_type = block_type; bb->id = block_id; // TUNING: better estimate of the exit block predecessors? - bb->predecessors = new (arena_) GrowableArray<BasicBlock*>(arena_, + bb->predecessors = new (arena_) GrowableArray<BasicBlockId>(arena_, (block_type == kExitBlock) ? 2048 : 2, kGrowableArrayPredecessors); - bb->successor_block_list.block_list_type = kNotUsed; + bb->successor_block_list_type = kNotUsed; block_id_map_.Put(block_id, block_id); return bb; } diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 6f8bd85630..a69dde0da3 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -149,7 +149,7 @@ enum DataFlowAttributePos { #define DF_C_IS_REG (DF_UC) #define DF_IS_GETTER_OR_SETTER (DF_IS_GETTER | DF_IS_SETTER) #define DF_USES_FP (DF_FP_A | DF_FP_B | DF_FP_C) - +#define DF_NULL_TRANSFER (DF_NULL_TRANSFER_0 | DF_NULL_TRANSFER_N) enum OatMethodAttributes { kIsLeaf, // Method is leaf. kHasLoop, // Method contains simple loop. @@ -183,6 +183,9 @@ enum OatMethodAttributes { #define BLOCK_NAME_LEN 80 +typedef uint16_t BasicBlockId; +static const BasicBlockId NullBasicBlockId = 0; + /* * In general, vreg/sreg describe Dalvik registers that originated with dx. However, * it is useful to have compiler-generated temporary registers and have them treated @@ -190,15 +193,15 @@ enum OatMethodAttributes { * name of compiler-introduced temporaries. */ struct CompilerTemp { - int s_reg; + int32_t s_reg; }; // When debug option enabled, records effectiveness of null and range check elimination. struct Checkstats { - int null_checks; - int null_checks_eliminated; - int range_checks; - int range_checks_eliminated; + int32_t null_checks; + int32_t null_checks_eliminated; + int32_t range_checks; + int32_t range_checks_eliminated; }; // Dataflow attributes of a basic block. @@ -207,7 +210,7 @@ struct BasicBlockDataFlow { ArenaBitVector* def_v; ArenaBitVector* live_in_v; ArenaBitVector* phi_v; - int* vreg_to_ssa_map; + int32_t* vreg_to_ssa_map; ArenaBitVector* ending_null_check_v; }; @@ -220,11 +223,11 @@ struct BasicBlockDataFlow { * we may want to revisit in the future. */ struct SSARepresentation { - int num_uses; - int* uses; + int16_t num_uses; + int16_t num_defs; + int32_t* uses; bool* fp_use; - int num_defs; - int* defs; + int32_t* defs; bool* fp_def; }; @@ -233,51 +236,53 @@ struct SSARepresentation { * wrapper around a Dalvik byte code. */ struct MIR { + /* + * TODO: remove embedded DecodedInstruction to save space, keeping only opcode. Recover + * additional fields on as-needed basis. Question: how to support MIR Pseudo-ops; probably + * need to carry aux data pointer. + */ DecodedInstruction dalvikInsn; - uint32_t width; // NOTE: only need 16 bits for width. - unsigned int offset; - int m_unit_index; // From which method was this MIR included - MIR* prev; + uint16_t width; // Note: width can include switch table or fill array data. + NarrowDexOffset offset; // Offset of the instruction in code units. + uint16_t optimization_flags; + int16_t m_unit_index; // From which method was this MIR included MIR* next; SSARepresentation* ssa_rep; - int optimization_flags; union { + // Incoming edges for phi node. + BasicBlockId* phi_incoming; // Establish link between two halves of throwing instructions. MIR* throw_insn; - // Saved opcode for NOP'd MIRs - Instruction::Code original_opcode; } meta; }; struct SuccessorBlockInfo; struct BasicBlock { - int id; - int dfs_id; - bool visited; - bool hidden; - bool catch_entry; - bool explicit_throw; - bool conditional_branch; - bool terminated_by_return; // Block ends with a Dalvik return opcode. - bool dominates_return; // Is a member of return extended basic block. - uint16_t start_offset; + BasicBlockId id; + BasicBlockId dfs_id; + NarrowDexOffset start_offset; // Offset in code units. + BasicBlockId fall_through; + BasicBlockId taken; + BasicBlockId i_dom; // Immediate dominator. uint16_t nesting_depth; - BBType block_type; + BBType block_type:4; + BlockListType successor_block_list_type:4; + bool visited:1; + bool hidden:1; + bool catch_entry:1; + bool explicit_throw:1; + bool conditional_branch:1; + bool terminated_by_return:1; // Block ends with a Dalvik return opcode. + bool dominates_return:1; // Is a member of return extended basic block. MIR* first_mir_insn; MIR* last_mir_insn; - BasicBlock* fall_through; - BasicBlock* taken; - BasicBlock* i_dom; // Immediate dominator. BasicBlockDataFlow* data_flow_info; - GrowableArray<BasicBlock*>* predecessors; ArenaBitVector* dominators; ArenaBitVector* i_dominated; // Set nodes being immediately dominated. ArenaBitVector* dom_frontier; // Dominance frontier. - struct { // For one-to-many successors like. - BlockListType block_list_type; // switch and exception handling. - GrowableArray<SuccessorBlockInfo*>* blocks; - } successor_block_list; + GrowableArray<BasicBlockId>* predecessors; + GrowableArray<SuccessorBlockInfo*>* successor_blocks; }; /* @@ -285,9 +290,8 @@ struct BasicBlock { * "SuccessorBlockInfo". For catch blocks, key is type index for the exception. For swtich * blocks, key is the case value. */ -// TODO: make class with placement new. struct SuccessorBlockInfo { - BasicBlock* block; + BasicBlockId block; int key; }; @@ -296,6 +300,15 @@ struct SuccessorBlockInfo { * the type of an SSA name (and, can also be used by code generators to record where the * value is located (i.e. - physical register, frame, spill, etc.). For each SSA name (SReg) * there is a RegLocation. + * A note on SSA names: + * o SSA names for Dalvik vRegs v0..vN will be assigned 0..N. These represent the "vN_0" + * names. Negative SSA names represent special values not present in the Dalvik byte code. + * For example, SSA name -1 represents an invalid SSA name, and SSA name -2 represents the + * the Method pointer. SSA names < -2 are reserved for future use. + * o The vN_0 names for non-argument Dalvik should in practice never be used (as they would + * represent the read of an undefined local variable). The first definition of the + * underlying Dalvik vReg will result in a vN_1 name. + * * FIXME: The orig_sreg field was added as a workaround for llvm bitcode generation. With * the latest restructuring, we should be able to remove it and rely on s_reg_low throughout. */ @@ -311,9 +324,9 @@ struct RegLocation { unsigned home:1; // Does this represent the home location? uint8_t low_reg; // First physical register. uint8_t high_reg; // 2nd physical register (if wide). - int32_t s_reg_low; // SSA name for low Dalvik word. - int32_t orig_sreg; // TODO: remove after Bitcode gen complete - // and consolodate usage w/ s_reg_low. + int16_t s_reg_low; // SSA name for low Dalvik word. + int16_t orig_sreg; // TODO: remove after Bitcode gen complete + // and consolidate usage w/ s_reg_low. }; /* @@ -334,7 +347,7 @@ struct CallInfo { RegLocation target; // Target of following move_result. bool skip_this; bool is_range; - int offset; // Dalvik offset. + DexOffset offset; // Offset in code units. }; @@ -361,7 +374,7 @@ class MIRGraph { uint32_t method_idx, jobject class_loader, const DexFile& dex_file); /* Find existing block */ - BasicBlock* FindBlock(unsigned int code_offset) { + BasicBlock* FindBlock(DexOffset code_offset) { return FindBlock(code_offset, false, false, NULL); } @@ -394,7 +407,7 @@ class MIRGraph { } BasicBlock* GetBasicBlock(int block_id) const { - return block_list_.Get(block_id); + return (block_id == NullBasicBlockId) ? NULL : block_list_.Get(block_id); } size_t GetBasicBlockListCount() const { @@ -405,15 +418,15 @@ class MIRGraph { return &block_list_; } - GrowableArray<int>* GetDfsOrder() { + GrowableArray<BasicBlockId>* GetDfsOrder() { return dfs_order_; } - GrowableArray<int>* GetDfsPostOrder() { + GrowableArray<BasicBlockId>* GetDfsPostOrder() { return dfs_post_order_; } - GrowableArray<int>* GetDomPostOrder() { + GrowableArray<BasicBlockId>* GetDomPostOrder() { return dom_post_order_traversal_; } @@ -477,6 +490,12 @@ class MIRGraph { } void SetNumSSARegs(int new_num) { + /* + * TODO: It's theoretically possible to exceed 32767, though any cases which did + * would be filtered out with current settings. When orig_sreg field is removed + * from RegLocation, expand s_reg_low to handle all possible cases and remove DCHECK(). + */ + DCHECK_EQ(new_num, static_cast<int16_t>(new_num)); num_ssa_regs_ = new_num; } @@ -561,14 +580,35 @@ class MIRGraph { return special_case_; } - bool IsBackedge(BasicBlock* branch_bb, BasicBlock* target_bb) { - return ((target_bb != NULL) && (target_bb->start_offset <= branch_bb->start_offset)); + bool IsBackedge(BasicBlock* branch_bb, BasicBlockId target_bb_id) { + return ((target_bb_id != NullBasicBlockId) && + (GetBasicBlock(target_bb_id)->start_offset <= branch_bb->start_offset)); } bool IsBackwardsBranch(BasicBlock* branch_bb) { return IsBackedge(branch_bb, branch_bb->taken) || IsBackedge(branch_bb, branch_bb->fall_through); } + void CountBranch(DexOffset target_offset) { + if (target_offset <= current_offset_) { + backward_branches_++; + } else { + forward_branches_++; + } + } + + int GetBranchCount() { + return backward_branches_ + forward_branches_; + } + + bool IsPseudoMirOp(Instruction::Code opcode) { + return static_cast<int>(opcode) >= static_cast<int>(kMirOpFirst); + } + + bool IsPseudoMirOp(int opcode) { + return opcode >= static_cast<int>(kMirOpFirst); + } + void BasicBlockCombine(); void CodeLayout(); void DumpCheckStats(); @@ -580,11 +620,34 @@ class MIRGraph { void SSATransformation(); void CheckForDominanceFrontier(BasicBlock* dom_bb, const BasicBlock* succ_bb); void NullCheckElimination(); + /* + * Type inference handling helpers. Because Dalvik's bytecode is not fully typed, + * we have to do some work to figure out the sreg type. For some operations it is + * clear based on the opcode (i.e. ADD_FLOAT v0, v1, v2), but for others (MOVE), we + * may never know the "real" type. + * + * We perform the type inference operation by using an iterative walk over + * the graph, propagating types "defined" by typed opcodes to uses and defs in + * non-typed opcodes (such as MOVE). The Setxx(index) helpers are used to set defined + * types on typed opcodes (such as ADD_INT). The Setxx(index, is_xx) form is used to + * propagate types through non-typed opcodes such as PHI and MOVE. The is_xx flag + * tells whether our guess of the type is based on a previously typed definition. + * If so, the defined type takes precedence. Note that it's possible to have the same sreg + * show multiple defined types because dx treats constants as untyped bit patterns. + * The return value of the Setxx() helpers says whether or not the Setxx() action changed + * the current guess, and is used to know when to terminate the iterative walk. + */ bool SetFp(int index, bool is_fp); + bool SetFp(int index); bool SetCore(int index, bool is_core); + bool SetCore(int index); bool SetRef(int index, bool is_ref); + bool SetRef(int index); bool SetWide(int index, bool is_wide); + bool SetWide(int index); bool SetHigh(int index, bool is_high); + bool SetHigh(int index); + void AppendMIR(BasicBlock* bb, MIR* mir); void PrependMIR(BasicBlock* bb, MIR* mir); void InsertMIRAfter(BasicBlock* bb, MIR* current_mir, MIR* new_mir); @@ -597,6 +660,9 @@ class MIRGraph { void DumpMIRGraph(); CallInfo* NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, bool is_range); BasicBlock* NewMemBB(BBType block_type, int block_id); + MIR* AdvanceMIR(BasicBlock** p_bb, MIR* mir); + BasicBlock* NextDominatedBlock(BasicBlock* bb); + bool LayoutBlocks(BasicBlock* bb); /* * IsDebugBuild sanity check: keep track of the Dex PCs for catch entries so that later on @@ -625,15 +691,16 @@ class MIRGraph { bool InvokeUsesMethodStar(MIR* mir); int ParseInsn(const uint16_t* code_ptr, DecodedInstruction* decoded_instruction); bool ContentIsInsn(const uint16_t* code_ptr); - BasicBlock* SplitBlock(unsigned int code_offset, BasicBlock* orig_block, + BasicBlock* SplitBlock(DexOffset code_offset, BasicBlock* orig_block, BasicBlock** immed_pred_block_p); - BasicBlock* FindBlock(unsigned int code_offset, bool split, bool create, + BasicBlock* FindBlock(DexOffset code_offset, bool split, bool create, BasicBlock** immed_pred_block_p); void ProcessTryCatchBlocks(); - BasicBlock* ProcessCanBranch(BasicBlock* cur_block, MIR* insn, int cur_offset, int width, + BasicBlock* ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width, int flags, const uint16_t* code_ptr, const uint16_t* code_end); - void ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, int cur_offset, int width, int flags); - BasicBlock* ProcessCanThrow(BasicBlock* cur_block, MIR* insn, int cur_offset, int width, + void ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width, + int flags); + BasicBlock* ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width, int flags, ArenaBitVector* try_block_addr, const uint16_t* code_ptr, const uint16_t* code_end); int AddNewSReg(int v_reg); @@ -689,9 +756,9 @@ class MIRGraph { GrowableArray<uint32_t> use_counts_; // Weighted by nesting depth GrowableArray<uint32_t> raw_use_counts_; // Not weighted unsigned int num_reachable_blocks_; - GrowableArray<int>* dfs_order_; - GrowableArray<int>* dfs_post_order_; - GrowableArray<int>* dom_post_order_traversal_; + GrowableArray<BasicBlockId>* dfs_order_; + GrowableArray<BasicBlockId>* dfs_post_order_; + GrowableArray<BasicBlockId>* dom_post_order_traversal_; int* i_dom_list_; ArenaBitVector** def_block_matrix_; // num_dalvik_register x num_blocks. ArenaBitVector* temp_block_v_; @@ -702,24 +769,25 @@ class MIRGraph { ArenaBitVector* try_block_addr_; BasicBlock* entry_block_; BasicBlock* exit_block_; - BasicBlock* cur_block_; int num_blocks_; const DexFile::CodeItem* current_code_item_; - SafeMap<unsigned int, BasicBlock*> block_map_; // FindBlock lookup cache. + GrowableArray<uint16_t> dex_pc_to_block_map_; // FindBlock lookup cache. std::vector<DexCompilationUnit*> m_units_; // List of methods included in this graph typedef std::pair<int, int> MIRLocation; // Insert point, (m_unit_ index, offset) std::vector<MIRLocation> method_stack_; // Include stack int current_method_; - int current_offset_; + DexOffset current_offset_; // Offset in code units int def_count_; // Used to estimate size of ssa name storage. int* opcode_count_; // Dex opcode coverage stats. int num_ssa_regs_; // Number of names following SSA transformation. - std::vector<BasicBlock*> extended_basic_blocks_; // Heads of block "traces". + std::vector<BasicBlockId> extended_basic_blocks_; // Heads of block "traces". int method_sreg_; unsigned int attributes_; Checkstats* checkstats_; SpecialCaseHandler special_case_; ArenaAllocator* arena_; + int backward_branches_; + int forward_branches_; }; } // namespace art diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index b7611f8f5b..f5913a5ad4 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -96,19 +96,19 @@ void MIRGraph::PropagateConstants() { is_constant_v_ = new (arena_) ArenaBitVector(arena_, GetNumSSARegs(), false); constant_values_ = static_cast<int*>(arena_->Alloc(sizeof(int) * GetNumSSARegs(), ArenaAllocator::kAllocDFInfo)); - AllNodesIterator iter(this, false /* not iterative */); + AllNodesIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { DoConstantPropogation(bb); } } /* Advance to next strictly dominated MIR node in an extended basic block */ -static MIR* AdvanceMIR(BasicBlock** p_bb, MIR* mir) { +MIR* MIRGraph::AdvanceMIR(BasicBlock** p_bb, MIR* mir) { BasicBlock* bb = *p_bb; if (mir != NULL) { mir = mir->next; if (mir == NULL) { - bb = bb->fall_through; + bb = GetBasicBlock(bb->fall_through); if ((bb == NULL) || Predecessors(bb) != 1) { mir = NULL; } else { @@ -147,19 +147,21 @@ MIR* MIRGraph::FindMoveResult(BasicBlock* bb, MIR* mir) { return mir; } -static BasicBlock* NextDominatedBlock(BasicBlock* bb) { +BasicBlock* MIRGraph::NextDominatedBlock(BasicBlock* bb) { if (bb->block_type == kDead) { return NULL; } DCHECK((bb->block_type == kEntryBlock) || (bb->block_type == kDalvikByteCode) || (bb->block_type == kExitBlock)); - if (((bb->taken != NULL) && (bb->fall_through == NULL)) && - ((bb->taken->block_type == kDalvikByteCode) || (bb->taken->block_type == kExitBlock))) { + BasicBlock* bb_taken = GetBasicBlock(bb->taken); + BasicBlock* bb_fall_through = GetBasicBlock(bb->fall_through); + if (((bb_taken != NULL) && (bb_fall_through == NULL)) && + ((bb_taken->block_type == kDalvikByteCode) || (bb_taken->block_type == kExitBlock))) { // Follow simple unconditional branches. - bb = bb->taken; + bb = bb_taken; } else { // Follow simple fallthrough - bb = (bb->taken != NULL) ? NULL : bb->fall_through; + bb = (bb_taken != NULL) ? NULL : bb_fall_through; } if (bb == NULL || (Predecessors(bb) != 1)) { return NULL; @@ -311,11 +313,13 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { case Instruction::IF_GTZ: case Instruction::IF_LEZ: // If we've got a backwards branch to return, no need to suspend check. - if ((IsBackedge(bb, bb->taken) && bb->taken->dominates_return) || - (IsBackedge(bb, bb->fall_through) && bb->fall_through->dominates_return)) { + if ((IsBackedge(bb, bb->taken) && GetBasicBlock(bb->taken)->dominates_return) || + (IsBackedge(bb, bb->fall_through) && + GetBasicBlock(bb->fall_through)->dominates_return)) { mir->optimization_flags |= MIR_IGNORE_SUSPEND_CHECK; if (cu_->verbose) { - LOG(INFO) << "Suppressed suspend check on branch to return at 0x" << std::hex << mir->offset; + LOG(INFO) << "Suppressed suspend check on branch to return at 0x" << std::hex + << mir->offset; } } break; @@ -328,15 +332,15 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { if (!(cu_->compiler_backend == kPortable) && (cu_->instruction_set == kThumb2) && ((mir->dalvikInsn.opcode == Instruction::IF_EQZ) || (mir->dalvikInsn.opcode == Instruction::IF_NEZ))) { - BasicBlock* ft = bb->fall_through; + BasicBlock* ft = GetBasicBlock(bb->fall_through); DCHECK(ft != NULL); - BasicBlock* ft_ft = ft->fall_through; - BasicBlock* ft_tk = ft->taken; + BasicBlock* ft_ft = GetBasicBlock(ft->fall_through); + BasicBlock* ft_tk = GetBasicBlock(ft->taken); - BasicBlock* tk = bb->taken; + BasicBlock* tk = GetBasicBlock(bb->taken); DCHECK(tk != NULL); - BasicBlock* tk_ft = tk->fall_through; - BasicBlock* tk_tk = tk->taken; + BasicBlock* tk_ft = GetBasicBlock(tk->fall_through); + BasicBlock* tk_tk = GetBasicBlock(tk->taken); /* * In the select pattern, the taken edge goes to a block that unconditionally @@ -434,7 +438,7 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { int dead_def = if_false->ssa_rep->defs[0]; int live_def = if_true->ssa_rep->defs[0]; mir->ssa_rep->defs[0] = live_def; - int* incoming = reinterpret_cast<int*>(phi->dalvikInsn.vB); + BasicBlockId* incoming = phi->meta.phi_incoming; for (int i = 0; i < phi->ssa_rep->num_uses; i++) { if (phi->ssa_rep->uses[i] == live_def) { incoming[i] = bb->id; @@ -449,7 +453,7 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { } } phi->ssa_rep->num_uses--; - bb->taken = NULL; + bb->taken = NullBasicBlockId; tk->block_type = kDead; for (MIR* tmir = ft->first_mir_insn; tmir != NULL; tmir = tmir->next) { tmir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop); @@ -500,7 +504,7 @@ void MIRGraph::CountChecks(struct BasicBlock* bb) { } /* Try to make common case the fallthrough path */ -static bool LayoutBlocks(struct BasicBlock* bb) { +bool MIRGraph::LayoutBlocks(BasicBlock* bb) { // TODO: For now, just looking for direct throws. Consider generalizing for profile feedback if (!bb->explicit_throw) { return false; @@ -511,13 +515,13 @@ static bool LayoutBlocks(struct BasicBlock* bb) { if ((walker->block_type == kEntryBlock) || (Predecessors(walker) != 1)) { break; } - BasicBlock* prev = walker->predecessors->Get(0); + BasicBlock* prev = GetBasicBlock(walker->predecessors->Get(0)); if (prev->conditional_branch) { - if (prev->fall_through == walker) { + if (GetBasicBlock(prev->fall_through) == walker) { // Already done - return break; } - DCHECK_EQ(walker, prev->taken); + DCHECK_EQ(walker, GetBasicBlock(prev->taken)); // Got one. Flip it and exit Instruction::Code opcode = prev->last_mir_insn->dalvikInsn.opcode; switch (opcode) { @@ -536,7 +540,7 @@ static bool LayoutBlocks(struct BasicBlock* bb) { default: LOG(FATAL) << "Unexpected opcode " << opcode; } prev->last_mir_insn->dalvikInsn.opcode = opcode; - BasicBlock* t_bb = prev->taken; + BasicBlockId t_bb = prev->taken; prev->taken = prev->fall_through; prev->fall_through = t_bb; break; @@ -556,8 +560,9 @@ bool MIRGraph::CombineBlocks(struct BasicBlock* bb) { || (bb->block_type == kExceptionHandling) || (bb->block_type == kExitBlock) || (bb->block_type == kDead) - || ((bb->taken == NULL) || (bb->taken->block_type != kExceptionHandling)) - || (bb->successor_block_list.block_list_type != kNotUsed) + || (bb->taken == NullBasicBlockId) + || (GetBasicBlock(bb->taken)->block_type != kExceptionHandling) + || (bb->successor_block_list_type != kNotUsed) || (static_cast<int>(bb->last_mir_insn->dalvikInsn.opcode) != kMirOpCheck)) { break; } @@ -578,19 +583,18 @@ bool MIRGraph::CombineBlocks(struct BasicBlock* bb) { break; } // OK - got one. Combine - BasicBlock* bb_next = bb->fall_through; + BasicBlock* bb_next = GetBasicBlock(bb->fall_through); DCHECK(!bb_next->catch_entry); DCHECK_EQ(Predecessors(bb_next), 1U); - MIR* t_mir = bb->last_mir_insn->prev; // Overwrite the kOpCheck insn with the paired opcode DCHECK_EQ(bb_next->first_mir_insn, throw_insn); *bb->last_mir_insn = *throw_insn; - bb->last_mir_insn->prev = t_mir; // Use the successor info from the next block - bb->successor_block_list = bb_next->successor_block_list; + bb->successor_block_list_type = bb_next->successor_block_list_type; + bb->successor_blocks = bb_next->successor_blocks; // Use the ending block linkage from the next block bb->fall_through = bb_next->fall_through; - bb->taken->block_type = kDead; // Kill the unused exception block + GetBasicBlock(bb->taken)->block_type = kDead; // Kill the unused exception block bb->taken = bb_next->taken; // Include the rest of the instructions bb->last_mir_insn = bb_next->last_mir_insn; @@ -625,49 +629,57 @@ bool MIRGraph::EliminateNullChecks(struct BasicBlock* bb) { */ if ((bb->block_type == kEntryBlock) | bb->catch_entry) { temp_ssa_register_v_->ClearAllBits(); + // Assume all ins are objects. + for (uint16_t in_reg = cu_->num_dalvik_registers - cu_->num_ins; + in_reg < cu_->num_dalvik_registers; in_reg++) { + temp_ssa_register_v_->SetBit(in_reg); + } if ((cu_->access_flags & kAccStatic) == 0) { // If non-static method, mark "this" as non-null int this_reg = cu_->num_dalvik_registers - cu_->num_ins; - temp_ssa_register_v_->SetBit(this_reg); + temp_ssa_register_v_->ClearBit(this_reg); } } else if (bb->predecessors->Size() == 1) { - BasicBlock* pred_bb = bb->predecessors->Get(0); + BasicBlock* pred_bb = GetBasicBlock(bb->predecessors->Get(0)); temp_ssa_register_v_->Copy(pred_bb->data_flow_info->ending_null_check_v); if (pred_bb->block_type == kDalvikByteCode) { // Check to see if predecessor had an explicit null-check. MIR* last_insn = pred_bb->last_mir_insn; Instruction::Code last_opcode = last_insn->dalvikInsn.opcode; if (last_opcode == Instruction::IF_EQZ) { - if (pred_bb->fall_through == bb) { + if (pred_bb->fall_through == bb->id) { // The fall-through of a block following a IF_EQZ, set the vA of the IF_EQZ to show that // it can't be null. - temp_ssa_register_v_->SetBit(last_insn->ssa_rep->uses[0]); + temp_ssa_register_v_->ClearBit(last_insn->ssa_rep->uses[0]); } } else if (last_opcode == Instruction::IF_NEZ) { - if (pred_bb->taken == bb) { + if (pred_bb->taken == bb->id) { // The taken block following a IF_NEZ, set the vA of the IF_NEZ to show that it can't be // null. - temp_ssa_register_v_->SetBit(last_insn->ssa_rep->uses[0]); + temp_ssa_register_v_->ClearBit(last_insn->ssa_rep->uses[0]); } } } } else { - // Starting state is intersection of all incoming arcs - GrowableArray<BasicBlock*>::Iterator iter(bb->predecessors); - BasicBlock* pred_bb = iter.Next(); + // Starting state is union of all incoming arcs + GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors); + BasicBlock* pred_bb = GetBasicBlock(iter.Next()); DCHECK(pred_bb != NULL); temp_ssa_register_v_->Copy(pred_bb->data_flow_info->ending_null_check_v); while (true) { - pred_bb = iter.Next(); + pred_bb = GetBasicBlock(iter.Next()); if (!pred_bb) break; if ((pred_bb->data_flow_info == NULL) || (pred_bb->data_flow_info->ending_null_check_v == NULL)) { continue; } - temp_ssa_register_v_->Intersect(pred_bb->data_flow_info->ending_null_check_v); + temp_ssa_register_v_->Union(pred_bb->data_flow_info->ending_null_check_v); } } + // At this point, temp_ssa_register_v_ shows which sregs have an object definition with + // no intervening uses. + // Walk through the instruction in the block, updating as necessary for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { if (mir->ssa_rep == NULL) { @@ -675,11 +687,49 @@ bool MIRGraph::EliminateNullChecks(struct BasicBlock* bb) { } int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode]; - // Mark target of NEW* as non-null - if (df_attributes & DF_NON_NULL_DST) { + // Might need a null check? + if (df_attributes & DF_HAS_NULL_CHKS) { + int src_idx; + if (df_attributes & DF_NULL_CHK_1) { + src_idx = 1; + } else if (df_attributes & DF_NULL_CHK_2) { + src_idx = 2; + } else { + src_idx = 0; + } + int src_sreg = mir->ssa_rep->uses[src_idx]; + if (!temp_ssa_register_v_->IsBitSet(src_sreg)) { + // Eliminate the null check. + mir->optimization_flags |= MIR_IGNORE_NULL_CHECK; + } else { + // Do the null check. + mir->optimization_flags &= ~MIR_IGNORE_NULL_CHECK; + // Mark s_reg as null-checked + temp_ssa_register_v_->ClearBit(src_sreg); + } + } + + if ((df_attributes & DF_A_WIDE) || + (df_attributes & (DF_REF_A | DF_SETS_CONST | DF_NULL_TRANSFER)) == 0) { + continue; + } + + /* + * First, mark all object definitions as requiring null check. + * Note: we can't tell if a CONST definition might be used as an object, so treat + * them all as object definitions. + */ + if (((df_attributes & (DF_DA | DF_REF_A)) == (DF_DA | DF_REF_A)) || + (df_attributes & DF_SETS_CONST)) { temp_ssa_register_v_->SetBit(mir->ssa_rep->defs[0]); } + // Now, remove mark from all object definitions we know are non-null. + if (df_attributes & DF_NON_NULL_DST) { + // Mark target of NEW* as non-null + temp_ssa_register_v_->ClearBit(mir->ssa_rep->defs[0]); + } + // Mark non-null returns from invoke-style NEW* if (df_attributes & DF_NON_NULL_RET) { MIR* next_mir = mir->next; @@ -687,13 +737,13 @@ bool MIRGraph::EliminateNullChecks(struct BasicBlock* bb) { if (next_mir && next_mir->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) { // Mark as null checked - temp_ssa_register_v_->SetBit(next_mir->ssa_rep->defs[0]); + temp_ssa_register_v_->ClearBit(next_mir->ssa_rep->defs[0]); } else { if (next_mir) { LOG(WARNING) << "Unexpected opcode following new: " << next_mir->dalvikInsn.opcode; - } else if (bb->fall_through) { + } else if (bb->fall_through != NullBasicBlockId) { // Look in next basic block - struct BasicBlock* next_bb = bb->fall_through; + struct BasicBlock* next_bb = GetBasicBlock(bb->fall_through); for (MIR* tmir = next_bb->first_mir_insn; tmir != NULL; tmir =tmir->next) { if (static_cast<int>(tmir->dalvikInsn.opcode) >= static_cast<int>(kMirOpFirst)) { @@ -702,7 +752,7 @@ bool MIRGraph::EliminateNullChecks(struct BasicBlock* bb) { // First non-pseudo should be MOVE_RESULT_OBJECT if (tmir->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) { // Mark as null checked - temp_ssa_register_v_->SetBit(tmir->ssa_rep->defs[0]); + temp_ssa_register_v_->ClearBit(tmir->ssa_rep->defs[0]); } else { LOG(WARNING) << "Unexpected op after new: " << tmir->dalvikInsn.opcode; } @@ -715,40 +765,22 @@ bool MIRGraph::EliminateNullChecks(struct BasicBlock* bb) { /* * Propagate nullcheck state on register copies (including * Phi pseudo copies. For the latter, nullcheck state is - * the "and" of all the Phi's operands. + * the "or" of all the Phi's operands. */ if (df_attributes & (DF_NULL_TRANSFER_0 | DF_NULL_TRANSFER_N)) { int tgt_sreg = mir->ssa_rep->defs[0]; int operands = (df_attributes & DF_NULL_TRANSFER_0) ? 1 : mir->ssa_rep->num_uses; - bool null_checked = true; + bool needs_null_check = false; for (int i = 0; i < operands; i++) { - null_checked &= temp_ssa_register_v_->IsBitSet(mir->ssa_rep->uses[i]); + needs_null_check |= temp_ssa_register_v_->IsBitSet(mir->ssa_rep->uses[i]); } - if (null_checked) { + if (needs_null_check) { temp_ssa_register_v_->SetBit(tgt_sreg); - } - } - - // Already nullchecked? - if ((df_attributes & DF_HAS_NULL_CHKS) && !(mir->optimization_flags & MIR_IGNORE_NULL_CHECK)) { - int src_idx; - if (df_attributes & DF_NULL_CHK_1) { - src_idx = 1; - } else if (df_attributes & DF_NULL_CHK_2) { - src_idx = 2; } else { - src_idx = 0; + temp_ssa_register_v_->ClearBit(tgt_sreg); } - int src_sreg = mir->ssa_rep->uses[src_idx]; - if (temp_ssa_register_v_->IsBitSet(src_sreg)) { - // Eliminate the null check - mir->optimization_flags |= MIR_IGNORE_NULL_CHECK; - } else { - // Mark s_reg as null-checked - temp_ssa_register_v_->SetBit(src_sreg); - } - } + } } // Did anything change? @@ -762,11 +794,11 @@ bool MIRGraph::EliminateNullChecks(struct BasicBlock* bb) { void MIRGraph::NullCheckElimination() { if (!(cu_->disable_opt & (1 << kNullCheckElimination))) { DCHECK(temp_ssa_register_v_ != NULL); - AllNodesIterator iter(this, false /* not iterative */); + AllNodesIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { NullCheckEliminationInit(bb); } - PreOrderDfsIterator iter2(this, true /* iterative */); + RepeatingPreOrderDfsIterator iter2(this); bool change = false; for (BasicBlock* bb = iter2.Next(change); bb != NULL; bb = iter2.Next(change)) { change = EliminateNullChecks(bb); @@ -778,7 +810,7 @@ void MIRGraph::NullCheckElimination() { } void MIRGraph::BasicBlockCombine() { - PreOrderDfsIterator iter(this, false /* not iterative */); + PreOrderDfsIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { CombineBlocks(bb); } @@ -791,7 +823,7 @@ void MIRGraph::CodeLayout() { if (cu_->enable_debug & (1 << kDebugVerifyDataflow)) { VerifyDataflow(); } - AllNodesIterator iter(this, false /* not iterative */); + AllNodesIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { LayoutBlocks(bb); } @@ -804,7 +836,7 @@ void MIRGraph::DumpCheckStats() { Checkstats* stats = static_cast<Checkstats*>(arena_->Alloc(sizeof(Checkstats), ArenaAllocator::kAllocDFInfo)); checkstats_ = stats; - AllNodesIterator iter(this, false /* not iterative */); + AllNodesIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { CountChecks(bb); } @@ -834,7 +866,7 @@ bool MIRGraph::BuildExtendedBBList(struct BasicBlock* bb) { } // Must be head of extended basic block. BasicBlock* start_bb = bb; - extended_basic_blocks_.push_back(bb); + extended_basic_blocks_.push_back(bb->id); bool terminated_by_return = false; // Visit blocks strictly dominated by this head. while (bb != NULL) { @@ -858,13 +890,13 @@ void MIRGraph::BasicBlockOptimization() { if (!(cu_->disable_opt & (1 << kBBOpt))) { DCHECK_EQ(cu_->num_compiler_temps, 0); ClearAllVisitedFlags(); - PreOrderDfsIterator iter2(this, false /* not iterative */); + PreOrderDfsIterator iter2(this); for (BasicBlock* bb = iter2.Next(); bb != NULL; bb = iter2.Next()) { BuildExtendedBBList(bb); } // Perform extended basic block optimizations. for (unsigned int i = 0; i < extended_basic_blocks_.size(); i++) { - BasicBlockOpt(extended_basic_blocks_[i]); + BasicBlockOpt(GetBasicBlock(extended_basic_blocks_[i])); } } if (cu_->enable_debug & (1 << kDebugDumpCFG)) { diff --git a/compiler/dex/portable/mir_to_gbc.cc b/compiler/dex/portable/mir_to_gbc.cc index 90cec75039..963cbeb1d1 100644 --- a/compiler/dex/portable/mir_to_gbc.cc +++ b/compiler/dex/portable/mir_to_gbc.cc @@ -30,10 +30,10 @@ #include "dex/compiler_internals.h" #include "dex/dataflow_iterator-inl.h" #include "dex/frontend.h" -#include "mir_to_gbc.h" - #include "llvm/llvm_compilation_unit.h" #include "llvm/utils_llvm.h" +#include "mir_to_gbc.h" +#include "thread-inl.h" const char* kLabelFormat = "%c0x%x_%d"; const char kInvalidBlock = 0xff; @@ -132,7 +132,7 @@ void MirConverter::ConvertPackedSwitch(BasicBlock* bb, ::llvm::Value* value = GetLLVMValue(rl_src.orig_sreg); ::llvm::SwitchInst* sw = - irb_->CreateSwitch(value, GetLLVMBlock(bb->fall_through->id), + irb_->CreateSwitch(value, GetLLVMBlock(bb->fall_through), payload->case_count); for (uint16_t i = 0; i < payload->case_count; ++i) { @@ -143,8 +143,8 @@ void MirConverter::ConvertPackedSwitch(BasicBlock* bb, ::llvm::MDNode* switch_node = ::llvm::MDNode::get(*context_, irb_->getInt32(table_offset)); sw->setMetadata("SwitchTable", switch_node); - bb->taken = NULL; - bb->fall_through = NULL; + bb->taken = NullBasicBlockId; + bb->fall_through = NullBasicBlockId; } void MirConverter::ConvertSparseSwitch(BasicBlock* bb, @@ -159,7 +159,7 @@ void MirConverter::ConvertSparseSwitch(BasicBlock* bb, ::llvm::Value* value = GetLLVMValue(rl_src.orig_sreg); ::llvm::SwitchInst* sw = - irb_->CreateSwitch(value, GetLLVMBlock(bb->fall_through->id), + irb_->CreateSwitch(value, GetLLVMBlock(bb->fall_through), payload->case_count); for (size_t i = 0; i < payload->case_count; ++i) { @@ -170,8 +170,8 @@ void MirConverter::ConvertSparseSwitch(BasicBlock* bb, ::llvm::MDNode* switch_node = ::llvm::MDNode::get(*context_, irb_->getInt32(table_offset)); sw->setMetadata("SwitchTable", switch_node); - bb->taken = NULL; - bb->fall_through = NULL; + bb->taken = NullBasicBlockId; + bb->fall_through = NullBasicBlockId; } void MirConverter::ConvertSget(int32_t field_index, @@ -311,22 +311,22 @@ void MirConverter::EmitSuspendCheck() { void MirConverter::ConvertCompareAndBranch(BasicBlock* bb, MIR* mir, ConditionCode cc, RegLocation rl_src1, RegLocation rl_src2) { - if (bb->taken->start_offset <= mir->offset) { + if (mir_graph_->GetBasicBlock(bb->taken)->start_offset <= mir->offset) { EmitSuspendCheck(); } ::llvm::Value* src1 = GetLLVMValue(rl_src1.orig_sreg); ::llvm::Value* src2 = GetLLVMValue(rl_src2.orig_sreg); ::llvm::Value* cond_value = ConvertCompare(cc, src1, src2); cond_value->setName(StringPrintf("t%d", temp_name_++)); - irb_->CreateCondBr(cond_value, GetLLVMBlock(bb->taken->id), - GetLLVMBlock(bb->fall_through->id)); + irb_->CreateCondBr(cond_value, GetLLVMBlock(bb->taken), + GetLLVMBlock(bb->fall_through)); // Don't redo the fallthrough branch in the BB driver - bb->fall_through = NULL; + bb->fall_through = NullBasicBlockId; } void MirConverter::ConvertCompareZeroAndBranch(BasicBlock* bb, MIR* mir, ConditionCode cc, RegLocation rl_src1) { - if (bb->taken->start_offset <= mir->offset) { + if (mir_graph_->GetBasicBlock(bb->taken)->start_offset <= mir->offset) { EmitSuspendCheck(); } ::llvm::Value* src1 = GetLLVMValue(rl_src1.orig_sreg); @@ -337,10 +337,10 @@ void MirConverter::ConvertCompareZeroAndBranch(BasicBlock* bb, src2 = irb_->getInt32(0); } ::llvm::Value* cond_value = ConvertCompare(cc, src1, src2); - irb_->CreateCondBr(cond_value, GetLLVMBlock(bb->taken->id), - GetLLVMBlock(bb->fall_through->id)); + irb_->CreateCondBr(cond_value, GetLLVMBlock(bb->taken), + GetLLVMBlock(bb->fall_through)); // Don't redo the fallthrough branch in the BB driver - bb->fall_through = NULL; + bb->fall_through = NullBasicBlockId; } ::llvm::Value* MirConverter::GenDivModOp(bool is_div, bool is_long, @@ -941,10 +941,10 @@ bool MirConverter::ConvertMIRNode(MIR* mir, BasicBlock* bb, case Instruction::GOTO: case Instruction::GOTO_16: case Instruction::GOTO_32: { - if (bb->taken->start_offset <= bb->start_offset) { + if (mir_graph_->GetBasicBlock(bb->taken)->start_offset <= bb->start_offset) { EmitSuspendCheck(); } - irb_->CreateBr(GetLLVMBlock(bb->taken->id)); + irb_->CreateBr(GetLLVMBlock(bb->taken)); } break; @@ -1190,11 +1190,11 @@ bool MirConverter::ConvertMIRNode(MIR* mir, BasicBlock* bb, * If it might rethrow, force termination * of the following block. */ - if (bb->fall_through == NULL) { + if (bb->fall_through == NullBasicBlockId) { irb_->CreateUnreachable(); } else { - bb->fall_through->fall_through = NULL; - bb->fall_through->taken = NULL; + mir_graph_->GetBasicBlock(bb->fall_through)->fall_through = NullBasicBlockId; + mir_graph_->GetBasicBlock(bb->fall_through)->taken = NullBasicBlockId; } break; @@ -1552,7 +1552,7 @@ void MirConverter::HandlePhiNodes(BasicBlock* bb, ::llvm::BasicBlock* llvm_bb) { if (rl_dest.high_word) { continue; // No Phi node - handled via low word } - int* incoming = reinterpret_cast<int*>(mir->dalvikInsn.vB); + BasicBlockId* incoming = mir->meta.phi_incoming; ::llvm::Type* phi_type = LlvmTypeFromLocRec(rl_dest); ::llvm::PHINode* phi = irb_->CreatePHI(phi_type, mir->ssa_rep->num_uses); @@ -1597,8 +1597,8 @@ void MirConverter::ConvertExtendedMIR(BasicBlock* bb, MIR* mir, break; } case kMirOpNop: - if ((mir == bb->last_mir_insn) && (bb->taken == NULL) && - (bb->fall_through == NULL)) { + if ((mir == bb->last_mir_insn) && (bb->taken == NullBasicBlockId) && + (bb->fall_through == NullBasicBlockId)) { irb_->CreateUnreachable(); } break; @@ -1718,25 +1718,23 @@ bool MirConverter::BlockBitcodeConversion(BasicBlock* bb) { SSARepresentation* ssa_rep = work_half->ssa_rep; work_half->ssa_rep = mir->ssa_rep; mir->ssa_rep = ssa_rep; - work_half->meta.original_opcode = work_half->dalvikInsn.opcode; work_half->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop); - if (bb->successor_block_list.block_list_type == kCatch) { + if (bb->successor_block_list_type == kCatch) { ::llvm::Function* intr = intrinsic_helper_->GetIntrinsicFunction( art::llvm::IntrinsicHelper::CatchTargets); ::llvm::Value* switch_key = irb_->CreateCall(intr, irb_->getInt32(mir->offset)); - GrowableArray<SuccessorBlockInfo*>::Iterator iter(bb->successor_block_list.blocks); + GrowableArray<SuccessorBlockInfo*>::Iterator iter(bb->successor_blocks); // New basic block to use for work half ::llvm::BasicBlock* work_bb = ::llvm::BasicBlock::Create(*context_, "", func_); ::llvm::SwitchInst* sw = - irb_->CreateSwitch(switch_key, work_bb, - bb->successor_block_list.blocks->Size()); + irb_->CreateSwitch(switch_key, work_bb, bb->successor_blocks->Size()); while (true) { SuccessorBlockInfo *successor_block_info = iter.Next(); if (successor_block_info == NULL) break; ::llvm::BasicBlock *target = - GetLLVMBlock(successor_block_info->block->id); + GetLLVMBlock(successor_block_info->block); int type_index = successor_block_info->key; sw->addCase(irb_->getInt32(type_index), target); } @@ -1761,9 +1759,9 @@ bool MirConverter::BlockBitcodeConversion(BasicBlock* bb) { } if (bb->block_type == kEntryBlock) { - entry_target_bb_ = GetLLVMBlock(bb->fall_through->id); - } else if ((bb->fall_through != NULL) && !bb->terminated_by_return) { - irb_->CreateBr(GetLLVMBlock(bb->fall_through->id)); + entry_target_bb_ = GetLLVMBlock(bb->fall_through); + } else if ((bb->fall_through != NullBasicBlockId) && !bb->terminated_by_return) { + irb_->CreateBr(GetLLVMBlock(bb->fall_through)); } return false; @@ -1877,7 +1875,7 @@ void MirConverter::MethodMIR2Bitcode() { CreateFunction(); // Create an LLVM basic block for each MIR block in dfs preorder - PreOrderDfsIterator iter(mir_graph_, false /* not iterative */); + PreOrderDfsIterator iter(mir_graph_); for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { CreateLLVMBasicBlock(bb); } @@ -1909,7 +1907,7 @@ void MirConverter::MethodMIR2Bitcode() { } } - PreOrderDfsIterator iter2(mir_graph_, false /* not iterative */); + PreOrderDfsIterator iter2(mir_graph_); for (BasicBlock* bb = iter2.Next(); bb != NULL; bb = iter2.Next()) { BlockBitcodeConversion(bb); } @@ -1972,7 +1970,7 @@ void MirConverter::MethodMIR2Bitcode() { ::llvm::OwningPtr< ::llvm::tool_output_file> out_file( new ::llvm::tool_output_file(fname.c_str(), errmsg, - ::llvm::sys::fs::F_Binary)); + ::llvm::raw_fd_ostream::F_Binary)); if (!errmsg.empty()) { LOG(ERROR) << "Failed to create bitcode output file: " << errmsg; diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h index 2f54190ae7..ffaaf84503 100644 --- a/compiler/dex/quick/arm/arm_lir.h +++ b/compiler/dex/quick/arm/arm_lir.h @@ -296,6 +296,8 @@ enum ArmOpcode { kThumbOrr, // orr [0100001100] rm[5..3] rd[2..0]. kThumbPop, // pop [1011110] r[8..8] rl[7..0]. kThumbPush, // push [1011010] r[8..8] rl[7..0]. + kThumbRev, // rev [1011101000] rm[5..3] rd[2..0] + kThumbRevsh, // revsh [1011101011] rm[5..3] rd[2..0] kThumbRorRR, // ror [0100000111] rs[5..3] rd[2..0]. kThumbSbc, // sbc [0100000110] rm[5..3] rd[2..0]. kThumbStmia, // stmia [11000] rn[10..8] reglist [7.. 0]. @@ -378,6 +380,8 @@ enum ArmOpcode { kThumb2CmnRR, // cmn [111010110001] rn[19..16] [0000] [1111] [0000] rm[3..0]. kThumb2EorRRR, // eor [111010101000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. kThumb2MulRRR, // mul [111110110000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0]. + kThumb2SdivRRR, // sdiv [111110111001] rn[19..16] [1111] rd[11..8] [1111] rm[3..0]. + kThumb2UdivRRR, // udiv [111110111011] rn[19..16] [1111] rd[11..8] [1111] rm[3..0]. kThumb2MnvRR, // mvn [11101010011011110] rd[11-8] [0000] rm[3..0]. kThumb2RsubRRI8, // rsub [111100011100] rn[19..16] [0000] rd[11..8] imm8[7..0]. kThumb2NegRR, // actually rsub rd, rn, #0. @@ -399,6 +403,8 @@ enum ArmOpcode { kThumb2AdcRRI8, // adc [111100010101] rn[19..16] [0] imm3 rd[11..8] imm8. kThumb2SubRRI8, // sub [111100011011] rn[19..16] [0] imm3 rd[11..8] imm8. kThumb2SbcRRI8, // sbc [111100010111] rn[19..16] [0] imm3 rd[11..8] imm8. + kThumb2RevRR, // rev [111110101001] rm[19..16] [1111] rd[11..8] 1000 rm[3..0] + kThumb2RevshRR, // rev [111110101001] rm[19..16] [1111] rd[11..8] 1011 rm[3..0] kThumb2It, // it [10111111] firstcond[7-4] mask[3-0]. kThumb2Fmstat, // fmstat [11101110111100011111101000010000]. kThumb2Vcmpd, // vcmp [111011101] D [11011] rd[15-12] [1011] E [1] M [0] rm[3-0]. @@ -462,7 +468,7 @@ enum ArmOpDmbOptions { // Instruction assembly field_loc kind. enum ArmEncodingKind { - kFmtUnused, + kFmtUnused, // Unused field and marks end of formats. kFmtBitBlt, // Bit string using end/start. kFmtDfp, // Double FP reg. kFmtSfp, // Single FP reg. @@ -477,6 +483,7 @@ enum ArmEncodingKind { kFmtBrOffset, // Signed extended [26,11,13,21-16,10-0]:0. kFmtFPImm, // Encoded floating point immediate. kFmtOff24, // 24-bit Thumb2 unconditional branch encoding. + kFmtSkip, // Unused field, but continue to next. }; // Struct used to define the snippet positions for each Thumb opcode. @@ -492,6 +499,7 @@ struct ArmEncodingMap { const char* name; const char* fmt; int size; // Note: size is in bytes. + FixupKind fixup; }; } // namespace art diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc index 0649c9f319..3d0f263fad 100644 --- a/compiler/dex/quick/arm/assemble_arm.cc +++ b/compiler/dex/quick/arm/assemble_arm.cc @@ -37,9 +37,9 @@ namespace art { * fmt: for pretty-printing */ #define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \ - k3, k3s, k3e, flags, name, fmt, size) \ + k3, k3s, k3e, flags, name, fmt, size, fixup) \ {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \ - {k3, k3s, k3e}}, opcode, flags, name, fmt, size} + {k3, k3s, k3e}}, opcode, flags, name, fmt, size, fixup} /* Instruction dump string format keys: !pf, where "!" is the start * of the key, "p" is which numeric operand to use and "f" is the @@ -79,916 +79,966 @@ namespace art { const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = { ENCODING_MAP(kArm16BitData, 0x0000, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 2), + kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 2, kFixupNone), ENCODING_MAP(kThumbAdcRR, 0x4140, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES | USES_CCODES, - "adcs", "!0C, !1C", 2), + "adcs", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbAddRRI3, 0x1c00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "adds", "!0C, !1C, #!2d", 2), + "adds", "!0C, !1C, #!2d", 2, kFixupNone), ENCODING_MAP(kThumbAddRI8, 0x3000, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, - "adds", "!0C, !0C, #!1d", 2), + "adds", "!0C, !0C, #!1d", 2, kFixupNone), ENCODING_MAP(kThumbAddRRR, 0x1800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES, - "adds", "!0C, !1C, !2C", 2), + "adds", "!0C, !1C, !2C", 2, kFixupNone), ENCODING_MAP(kThumbAddRRLH, 0x4440, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01, - "add", "!0C, !1C", 2), + "add", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbAddRRHL, 0x4480, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01, - "add", "!0C, !1C", 2), + "add", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbAddRRHH, 0x44c0, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01, - "add", "!0C, !1C", 2), + "add", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbAddPcRel, 0xa000, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | IS_BRANCH | NEEDS_FIXUP, - "add", "!0C, pc, #!1E", 2), + "add", "!0C, pc, #!1E", 2, kFixupLoad), ENCODING_MAP(kThumbAddSpRel, 0xa800, - kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0, + kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF_SP | REG_USE_SP, - "add", "!0C, sp, #!2E", 2), + "add", "!0C, sp, #!2E", 2, kFixupNone), ENCODING_MAP(kThumbAddSpI7, 0xb000, kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP, - "add", "sp, #!0d*4", 2), + "add", "sp, #!0d*4", 2, kFixupNone), ENCODING_MAP(kThumbAndRR, 0x4000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "ands", "!0C, !1C", 2), + "ands", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbAsrRRI5, 0x1000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "asrs", "!0C, !1C, #!2d", 2), + "asrs", "!0C, !1C, #!2d", 2, kFixupNone), ENCODING_MAP(kThumbAsrRR, 0x4100, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "asrs", "!0C, !1C", 2), + "asrs", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbBCond, 0xd000, kFmtBitBlt, 7, 0, kFmtBitBlt, 11, 8, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES | - NEEDS_FIXUP, "b!1c", "!0t", 2), + NEEDS_FIXUP, "b!1c", "!0t", 2, kFixupCondBranch), ENCODING_MAP(kThumbBUncond, 0xe000, kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, - "b", "!0t", 2), + "b", "!0t", 2, kFixupT1Branch), ENCODING_MAP(kThumbBicRR, 0x4380, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "bics", "!0C, !1C", 2), + "bics", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbBkpt, 0xbe00, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, - "bkpt", "!0d", 2), + "bkpt", "!0d", 2, kFixupNone), ENCODING_MAP(kThumbBlx1, 0xf000, kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR | - NEEDS_FIXUP, "blx_1", "!0u", 2), + NEEDS_FIXUP, "blx_1", "!0u", 2, kFixupBlx1), ENCODING_MAP(kThumbBlx2, 0xe800, kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR | - NEEDS_FIXUP, "blx_2", "!0v", 2), + NEEDS_FIXUP, "blx_2", "!0v", 2, kFixupLabel), ENCODING_MAP(kThumbBl1, 0xf000, kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR, - "bl_1", "!0u", 2), + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP, + "bl_1", "!0u", 2, kFixupBl1), ENCODING_MAP(kThumbBl2, 0xf800, kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR, - "bl_2", "!0v", 2), + kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP, + "bl_2", "!0v", 2, kFixupLabel), ENCODING_MAP(kThumbBlxR, 0x4780, kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR, - "blx", "!0C", 2), + "blx", "!0C", 2, kFixupNone), ENCODING_MAP(kThumbBx, 0x4700, kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, - "bx", "!0C", 2), + "bx", "!0C", 2, kFixupNone), ENCODING_MAP(kThumbCmnRR, 0x42c0, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmn", "!0C, !1C", 2), + "cmn", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbCmpRI8, 0x2800, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES, - "cmp", "!0C, #!1d", 2), + "cmp", "!0C, #!1d", 2, kFixupNone), ENCODING_MAP(kThumbCmpRR, 0x4280, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "!0C, !1C", 2), + "cmp", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbCmpLH, 0x4540, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "!0C, !1C", 2), + "cmp", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbCmpHL, 0x4580, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "!0C, !1C", 2), + "cmp", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbCmpHH, 0x45c0, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "!0C, !1C", 2), + "cmp", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbEorRR, 0x4040, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "eors", "!0C, !1C", 2), + "eors", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbLdmia, 0xc800, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, - "ldmia", "!0C!!, <!1R>", 2), + "ldmia", "!0C!!, <!1R>", 2, kFixupNone), ENCODING_MAP(kThumbLdrRRI5, 0x6800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldr", "!0C, [!1C, #!2E]", 2), + "ldr", "!0C, [!1C, #!2E]", 2, kFixupNone), ENCODING_MAP(kThumbLdrRRR, 0x5800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, - "ldr", "!0C, [!1C, !2C]", 2), + "ldr", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbLdrPcRel, 0x4800, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC - | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2), + | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 2, kFixupLoad), ENCODING_MAP(kThumbLdrSpRel, 0x9800, - kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0, + kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP - | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 2), + | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 2, kFixupNone), ENCODING_MAP(kThumbLdrbRRI5, 0x7800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrb", "!0C, [!1C, #2d]", 2), + "ldrb", "!0C, [!1C, #2d]", 2, kFixupNone), ENCODING_MAP(kThumbLdrbRRR, 0x5c00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrb", "!0C, [!1C, !2C]", 2), + "ldrb", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbLdrhRRI5, 0x8800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrh", "!0C, [!1C, #!2F]", 2), + "ldrh", "!0C, [!1C, #!2F]", 2, kFixupNone), ENCODING_MAP(kThumbLdrhRRR, 0x5a00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrh", "!0C, [!1C, !2C]", 2), + "ldrh", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbLdrsbRRR, 0x5600, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrsb", "!0C, [!1C, !2C]", 2), + "ldrsb", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbLdrshRRR, 0x5e00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrsh", "!0C, [!1C, !2C]", 2), + "ldrsh", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbLslRRI5, 0x0000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "lsls", "!0C, !1C, #!2d", 2), + "lsls", "!0C, !1C, #!2d", 2, kFixupNone), ENCODING_MAP(kThumbLslRR, 0x4080, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "lsls", "!0C, !1C", 2), + "lsls", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbLsrRRI5, 0x0800, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "lsrs", "!0C, !1C, #!2d", 2), + "lsrs", "!0C, !1C, #!2d", 2, kFixupNone), ENCODING_MAP(kThumbLsrRR, 0x40c0, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "lsrs", "!0C, !1C", 2), + "lsrs", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbMovImm, 0x2000, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | SETS_CCODES, - "movs", "!0C, #!1d", 2), + "movs", "!0C, #!1d", 2, kFixupNone), ENCODING_MAP(kThumbMovRR, 0x1c00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "movs", "!0C, !1C", 2), + "movs", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbMovRR_H2H, 0x46c0, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "mov", "!0C, !1C", 2), + "mov", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbMovRR_H2L, 0x4640, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "mov", "!0C, !1C", 2), + "mov", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbMovRR_L2H, 0x4680, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "mov", "!0C, !1C", 2), + "mov", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbMul, 0x4340, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "muls", "!0C, !1C", 2), + "muls", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbMvn, 0x43c0, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "mvns", "!0C, !1C", 2), + "mvns", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbNeg, 0x4240, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "negs", "!0C, !1C", 2), + "negs", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbOrr, 0x4300, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "orrs", "!0C, !1C", 2), + "orrs", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbPop, 0xbc00, kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 - | IS_LOAD, "pop", "<!0R>", 2), + | IS_LOAD, "pop", "<!0R>", 2, kFixupNone), ENCODING_MAP(kThumbPush, 0xb400, kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0 - | IS_STORE, "push", "<!0R>", 2), + | IS_STORE, "push", "<!0R>", 2, kFixupNone), + ENCODING_MAP(kThumbRev, 0xba00, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE1, + "rev", "!0C, !1C", 2, kFixupNone), + ENCODING_MAP(kThumbRevsh, 0xbac0, + kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, + kFmtUnused, -1, -1, + IS_BINARY_OP | REG_DEF0_USE1, + "rev", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbRorRR, 0x41c0, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES, - "rors", "!0C, !1C", 2), + "rors", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbSbc, 0x4180, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01 | USES_CCODES | SETS_CCODES, - "sbcs", "!0C, !1C", 2), + "sbcs", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumbStmia, 0xc000, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1 | IS_STORE, - "stmia", "!0C!!, <!1R>", 2), + "stmia", "!0C!!, <!1R>", 2, kFixupNone), ENCODING_MAP(kThumbStrRRI5, 0x6000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "str", "!0C, [!1C, #!2E]", 2), + "str", "!0C, [!1C, #!2E]", 2, kFixupNone), ENCODING_MAP(kThumbStrRRR, 0x5000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, - "str", "!0C, [!1C, !2C]", 2), + "str", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbStrSpRel, 0x9000, - kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0, + kFmtBitBlt, 10, 8, kFmtSkip, -1, -1, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_SP - | IS_STORE, "str", "!0C, [sp, #!2E]", 2), + | IS_STORE, "str", "!0C, [sp, #!2E]", 2, kFixupNone), ENCODING_MAP(kThumbStrbRRI5, 0x7000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "strb", "!0C, [!1C, #!2d]", 2), + "strb", "!0C, [!1C, #!2d]", 2, kFixupNone), ENCODING_MAP(kThumbStrbRRR, 0x5400, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, - "strb", "!0C, [!1C, !2C]", 2), + "strb", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbStrhRRI5, 0x8000, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "strh", "!0C, [!1C, #!2F]", 2), + "strh", "!0C, [!1C, #!2F]", 2, kFixupNone), ENCODING_MAP(kThumbStrhRRR, 0x5200, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE, - "strh", "!0C, [!1C, !2C]", 2), + "strh", "!0C, [!1C, !2C]", 2, kFixupNone), ENCODING_MAP(kThumbSubRRI3, 0x1e00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "subs", "!0C, !1C, #!2d", 2), + "subs", "!0C, !1C, #!2d", 2, kFixupNone), ENCODING_MAP(kThumbSubRI8, 0x3800, kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, - "subs", "!0C, #!1d", 2), + "subs", "!0C, #!1d", 2, kFixupNone), ENCODING_MAP(kThumbSubRRR, 0x1a00, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES, - "subs", "!0C, !1C, !2C", 2), + "subs", "!0C, !1C, !2C", 2, kFixupNone), ENCODING_MAP(kThumbSubSpI7, 0xb080, kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP, - "sub", "sp, #!0d*4", 2), + "sub", "sp, #!0d*4", 2, kFixupNone), ENCODING_MAP(kThumbSwi, 0xdf00, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH, - "swi", "!0d", 2), + "swi", "!0d", 2, kFixupNone), ENCODING_MAP(kThumbTst, 0x4200, kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE01 | SETS_CCODES, - "tst", "!0C, !1C", 2), + "tst", "!0C, !1C", 2, kFixupNone), ENCODING_MAP(kThumb2Vldrs, 0xed900a00, kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD | - REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4), + REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 4, kFixupVLoad), ENCODING_MAP(kThumb2Vldrd, 0xed900b00, kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD | - REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4), + REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 4, kFixupVLoad), ENCODING_MAP(kThumb2Vmuls, 0xee200a00, kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vmuls", "!0s, !1s, !2s", 4), + "vmuls", "!0s, !1s, !2s", 4, kFixupNone), ENCODING_MAP(kThumb2Vmuld, 0xee200b00, kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vmuld", "!0S, !1S, !2S", 4), + "vmuld", "!0S, !1S, !2S", 4, kFixupNone), ENCODING_MAP(kThumb2Vstrs, 0xed800a00, kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "vstr", "!0s, [!1C, #!2E]", 4), + "vstr", "!0s, [!1C, #!2E]", 4, kFixupNone), ENCODING_MAP(kThumb2Vstrd, 0xed800b00, kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "vstr", "!0S, [!1C, #!2E]", 4), + "vstr", "!0S, [!1C, #!2E]", 4, kFixupNone), ENCODING_MAP(kThumb2Vsubs, 0xee300a40, kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vsub", "!0s, !1s, !2s", 4), + "vsub", "!0s, !1s, !2s", 4, kFixupNone), ENCODING_MAP(kThumb2Vsubd, 0xee300b40, kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vsub", "!0S, !1S, !2S", 4), + "vsub", "!0S, !1S, !2S", 4, kFixupNone), ENCODING_MAP(kThumb2Vadds, 0xee300a00, kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vadd", "!0s, !1s, !2s", 4), + "vadd", "!0s, !1s, !2s", 4, kFixupNone), ENCODING_MAP(kThumb2Vaddd, 0xee300b00, kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vadd", "!0S, !1S, !2S", 4), + "vadd", "!0S, !1S, !2S", 4, kFixupNone), ENCODING_MAP(kThumb2Vdivs, 0xee800a00, kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vdivs", "!0s, !1s, !2s", 4), + "vdivs", "!0s, !1s, !2s", 4, kFixupNone), ENCODING_MAP(kThumb2Vdivd, 0xee800b00, kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "vdivd", "!0S, !1S, !2S", 4), + "vdivd", "!0S, !1S, !2S", 4, kFixupNone), ENCODING_MAP(kThumb2VcvtIF, 0xeeb80ac0, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.f32", "!0s, !1s", 4), + "vcvt.f32", "!0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2VcvtID, 0xeeb80bc0, kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.f64", "!0S, !1s", 4), + "vcvt.f64", "!0S, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2VcvtFI, 0xeebd0ac0, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.s32.f32 ", "!0s, !1s", 4), + "vcvt.s32.f32 ", "!0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2VcvtDI, 0xeebd0bc0, kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.s32.f64 ", "!0s, !1S", 4), + "vcvt.s32.f64 ", "!0s, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2VcvtFd, 0xeeb70ac0, kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.f64.f32 ", "!0S, !1s", 4), + "vcvt.f64.f32 ", "!0S, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2VcvtDF, 0xeeb70bc0, kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vcvt.f32.f64 ", "!0s, !1S", 4), + "vcvt.f32.f64 ", "!0s, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2Vsqrts, 0xeeb10ac0, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vsqrt.f32 ", "!0s, !1s", 4), + "vsqrt.f32 ", "!0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2Vsqrtd, 0xeeb10bc0, kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vsqrt.f64 ", "!0S, !1S", 4), + "vsqrt.f64 ", "!0S, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2MovImmShift, 0xf04f0000, /* no setflags encoding */ kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "mov", "!0C, #!1m", 4), + "mov", "!0C, #!1m", 4, kFixupNone), ENCODING_MAP(kThumb2MovImm16, 0xf2400000, kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "mov", "!0C, #!1M", 4), + "mov", "!0C, #!1M", 4, kFixupNone), ENCODING_MAP(kThumb2StrRRI12, 0xf8c00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "str", "!0C, [!1C, #!2d]", 4), + "str", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrRRI12, 0xf8d00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldr", "!0C, [!1C, #!2d]", 4), + "ldr", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2StrRRI8Predec, 0xf8400c00, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "str", "!0C, [!1C, #-!2d]", 4), + "str", "!0C, [!1C, #-!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrRRI8Predec, 0xf8500c00, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldr", "!0C, [!1C, #-!2d]", 4), + "ldr", "!0C, [!1C, #-!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2Cbnz, 0xb900, /* Note: does not affect flags */ kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH | - NEEDS_FIXUP, "cbnz", "!0C,!1t", 2), + NEEDS_FIXUP, "cbnz", "!0C,!1t", 2, kFixupCBxZ), ENCODING_MAP(kThumb2Cbz, 0xb100, /* Note: does not affect flags */ kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH | - NEEDS_FIXUP, "cbz", "!0C,!1t", 2), + NEEDS_FIXUP, "cbz", "!0C,!1t", 2, kFixupCBxZ), ENCODING_MAP(kThumb2AddRRI12, 0xf2000000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */ - "add", "!0C,!1C,#!2d", 4), + "add", "!0C,!1C,#!2d", 4, kFixupNone), ENCODING_MAP(kThumb2MovRR, 0xea4f0000, /* no setflags encoding */ kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "mov", "!0C, !1C", 4), + "mov", "!0C, !1C", 4, kFixupNone), ENCODING_MAP(kThumb2Vmovs, 0xeeb00a40, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vmov.f32 ", " !0s, !1s", 4), + "vmov.f32 ", " !0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2Vmovd, 0xeeb00b40, kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vmov.f64 ", " !0S, !1S", 4), + "vmov.f64 ", " !0S, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2Ldmia, 0xe8900000, kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, - "ldmia", "!0C!!, <!1R>", 4), + "ldmia", "!0C!!, <!1R>", 4, kFixupNone), ENCODING_MAP(kThumb2Stmia, 0xe8800000, kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1 | IS_STORE, - "stmia", "!0C!!, <!1R>", 4), + "stmia", "!0C!!, <!1R>", 4, kFixupNone), ENCODING_MAP(kThumb2AddRRR, 0xeb100000, /* setflags encoding */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, - "adds", "!0C, !1C, !2C!3H", 4), + "adds", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2SubRRR, 0xebb00000, /* setflags enconding */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, - "subs", "!0C, !1C, !2C!3H", 4), + "subs", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2SbcRRR, 0xeb700000, /* setflags encoding */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES | SETS_CCODES, - "sbcs", "!0C, !1C, !2C!3H", 4), + "sbcs", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2CmpRR, 0xebb00f00, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES, - "cmp", "!0C, !1C", 4), + "cmp", "!0C, !1C", 4, kFixupNone), ENCODING_MAP(kThumb2SubRRI12, 0xf2a00000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */ - "sub", "!0C,!1C,#!2d", 4), + "sub", "!0C,!1C,#!2d", 4, kFixupNone), ENCODING_MAP(kThumb2MvnImm12, 0xf06f0000, /* no setflags encoding */ kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "mvn", "!0C, #!1n", 4), + "mvn", "!0C, #!1n", 4, kFixupNone), ENCODING_MAP(kThumb2Sel, 0xfaa0f080, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | USES_CCODES, - "sel", "!0C, !1C, !2C", 4), + "sel", "!0C, !1C, !2C", 4, kFixupNone), ENCODING_MAP(kThumb2Ubfx, 0xf3c00000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1, kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, - "ubfx", "!0C, !1C, #!2d, #!3d", 4), + "ubfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone), ENCODING_MAP(kThumb2Sbfx, 0xf3400000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1, kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, - "sbfx", "!0C, !1C, #!2d, #!3d", 4), + "sbfx", "!0C, !1C, #!2d, #!3d", 4, kFixupNone), ENCODING_MAP(kThumb2LdrRRR, 0xf8500000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, - "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4), + "ldr", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrhRRR, 0xf8300000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4), + "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrshRRR, 0xf9300000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4), + "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrbRRR, 0xf8100000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4), + "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrsbRRR, 0xf9100000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD, - "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4), + "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2StrRRR, 0xf8400000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, - "str", "!0C, [!1C, !2C, LSL #!3d]", 4), + "str", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2StrhRRR, 0xf8200000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, - "strh", "!0C, [!1C, !2C, LSL #!3d]", 4), + "strh", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2StrbRRR, 0xf8000000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE, - "strb", "!0C, [!1C, !2C, LSL #!3d]", 4), + "strb", "!0C, [!1C, !2C, LSL #!3d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrhRRI12, 0xf8b00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrh", "!0C, [!1C, #!2d]", 4), + "ldrh", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrshRRI12, 0xf9b00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrsh", "!0C, [!1C, #!2d]", 4), + "ldrsh", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrbRRI12, 0xf8900000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrb", "!0C, [!1C, #!2d]", 4), + "ldrb", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2LdrsbRRI12, 0xf9900000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrsb", "!0C, [!1C, #!2d]", 4), + "ldrsb", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2StrhRRI12, 0xf8a00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "strh", "!0C, [!1C, #!2d]", 4), + "strh", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2StrbRRI12, 0xf8800000, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE, - "strb", "!0C, [!1C, #!2d]", 4), + "strb", "!0C, [!1C, #!2d]", 4, kFixupNone), ENCODING_MAP(kThumb2Pop, 0xe8bd0000, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 - | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4), + | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4, kFixupPushPop), ENCODING_MAP(kThumb2Push, 0xe92d0000, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0 - | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4), + | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4, kFixupPushPop), ENCODING_MAP(kThumb2CmpRI12, 0xf1b00f00, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES, - "cmp", "!0C, #!1m", 4), + "cmp", "!0C, #!1m", 4, kFixupNone), ENCODING_MAP(kThumb2AdcRRR, 0xeb500000, /* setflags encoding */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, - "adcs", "!0C, !1C, !2C!3H", 4), + "adcs", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2AndRRR, 0xea000000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, - "and", "!0C, !1C, !2C!3H", 4), + "and", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2BicRRR, 0xea200000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, - "bic", "!0C, !1C, !2C!3H", 4), + "bic", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2CmnRR, 0xeb000000, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "cmn", "!0C, !1C, shift !2d", 4), + "cmn", "!0C, !1C, shift !2d", 4, kFixupNone), ENCODING_MAP(kThumb2EorRRR, 0xea800000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, - "eor", "!0C, !1C, !2C!3H", 4), + "eor", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2MulRRR, 0xfb00f000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "mul", "!0C, !1C, !2C", 4), + "mul", "!0C, !1C, !2C", 4, kFixupNone), + ENCODING_MAP(kThumb2SdivRRR, 0xfb90f0f0, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "sdiv", "!0C, !1C, !2C", 4, kFixupNone), + ENCODING_MAP(kThumb2UdivRRR, 0xfbb0f0f0, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, + "udiv", "!0C, !1C, !2C", 4, kFixupNone), ENCODING_MAP(kThumb2MnvRR, 0xea6f0000, kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "mvn", "!0C, !1C, shift !2d", 4), + "mvn", "!0C, !1C, shift !2d", 4, kFixupNone), ENCODING_MAP(kThumb2RsubRRI8, 0xf1d00000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "rsb", "!0C,!1C,#!2m", 4), + "rsb", "!0C,!1C,#!2m", 4, kFixupNone), ENCODING_MAP(kThumb2NegRR, 0xf1d00000, /* instance of rsub */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "neg", "!0C,!1C", 4), + "neg", "!0C,!1C", 4, kFixupNone), ENCODING_MAP(kThumb2OrrRRR, 0xea400000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12, - "orr", "!0C, !1C, !2C!3H", 4), + "orr", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2TstRR, 0xea100f00, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | SETS_CCODES, - "tst", "!0C, !1C, shift !2d", 4), + "tst", "!0C, !1C, shift !2d", 4, kFixupNone), ENCODING_MAP(kThumb2LslRRR, 0xfa00f000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "lsl", "!0C, !1C, !2C", 4), + "lsl", "!0C, !1C, !2C", 4, kFixupNone), ENCODING_MAP(kThumb2LsrRRR, 0xfa20f000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "lsr", "!0C, !1C, !2C", 4), + "lsr", "!0C, !1C, !2C", 4, kFixupNone), ENCODING_MAP(kThumb2AsrRRR, 0xfa40f000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "asr", "!0C, !1C, !2C", 4), + "asr", "!0C, !1C, !2C", 4, kFixupNone), ENCODING_MAP(kThumb2RorRRR, 0xfa60f000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "ror", "!0C, !1C, !2C", 4), + "ror", "!0C, !1C, !2C", 4, kFixupNone), ENCODING_MAP(kThumb2LslRRI5, 0xea4f0000, kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "lsl", "!0C, !1C, #!2d", 4), + "lsl", "!0C, !1C, #!2d", 4, kFixupNone), ENCODING_MAP(kThumb2LsrRRI5, 0xea4f0010, kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "lsr", "!0C, !1C, #!2d", 4), + "lsr", "!0C, !1C, #!2d", 4, kFixupNone), ENCODING_MAP(kThumb2AsrRRI5, 0xea4f0020, kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "asr", "!0C, !1C, #!2d", 4), + "asr", "!0C, !1C, #!2d", 4, kFixupNone), ENCODING_MAP(kThumb2RorRRI5, 0xea4f0030, kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "ror", "!0C, !1C, #!2d", 4), + "ror", "!0C, !1C, #!2d", 4, kFixupNone), ENCODING_MAP(kThumb2BicRRI8, 0xf0200000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "bic", "!0C, !1C, #!2m", 4), + "bic", "!0C, !1C, #!2m", 4, kFixupNone), ENCODING_MAP(kThumb2AndRRI8, 0xf0000000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "and", "!0C, !1C, #!2m", 4), + "and", "!0C, !1C, #!2m", 4, kFixupNone), ENCODING_MAP(kThumb2OrrRRI8, 0xf0400000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "orr", "!0C, !1C, #!2m", 4), + "orr", "!0C, !1C, #!2m", 4, kFixupNone), ENCODING_MAP(kThumb2EorRRI8, 0xf0800000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1, - "eor", "!0C, !1C, #!2m", 4), + "eor", "!0C, !1C, #!2m", 4, kFixupNone), ENCODING_MAP(kThumb2AddRRI8, 0xf1100000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "adds", "!0C, !1C, #!2m", 4), + "adds", "!0C, !1C, #!2m", 4, kFixupNone), ENCODING_MAP(kThumb2AdcRRI8, 0xf1500000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES, - "adcs", "!0C, !1C, #!2m", 4), + "adcs", "!0C, !1C, #!2m", 4, kFixupNone), ENCODING_MAP(kThumb2SubRRI8, 0xf1b00000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "subs", "!0C, !1C, #!2m", 4), + "subs", "!0C, !1C, #!2m", 4, kFixupNone), ENCODING_MAP(kThumb2SbcRRI8, 0xf1700000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES, - "sbcs", "!0C, !1C, #!2m", 4), + "sbcs", "!0C, !1C, #!2m", 4, kFixupNone), + ENCODING_MAP(kThumb2RevRR, 0xfa90f080, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE12, // Binary, but rm is stored twice. + "rev", "!0C, !1C", 4, kFixupNone), + ENCODING_MAP(kThumb2RevshRR, 0xfa90f0b0, + kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, + kFmtUnused, -1, -1, + IS_TERTIARY_OP | REG_DEF0_USE12, // Binary, but rm is stored twice. + "revsh", "!0C, !1C", 4, kFixupNone), ENCODING_MAP(kThumb2It, 0xbf00, kFmtBitBlt, 7, 4, kFmtBitBlt, 3, 0, kFmtModImm, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | IS_IT | USES_CCODES, - "it:!1b", "!0c", 2), + "it:!1b", "!0c", 2, kFixupNone), ENCODING_MAP(kThumb2Fmstat, 0xeef1fa10, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES, - "fmstat", "", 4), + "fmstat", "", 4, kFixupNone), ENCODING_MAP(kThumb2Vcmpd, 0xeeb40b40, kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01, - "vcmp.f64", "!0S, !1S", 4), + "vcmp.f64", "!0S, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2Vcmps, 0xeeb40a40, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01, - "vcmp.f32", "!0s, !1s", 4), + "vcmp.f32", "!0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2LdrPcRel12, 0xf8df0000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP, - "ldr", "!0C, [r15pc, #!1d]", 4), + "ldr", "!0C, [r15pc, #!1d]", 4, kFixupLoad), ENCODING_MAP(kThumb2BCond, 0xf0008000, kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES | NEEDS_FIXUP, - "b!1c", "!0t", 4), + "b!1c", "!0t", 4, kFixupCondBranch), ENCODING_MAP(kThumb2Vmovd_RR, 0xeeb00b40, kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vmov.f64", "!0S, !1S", 4), + "vmov.f64", "!0S, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2Vmovs_RR, 0xeeb00a40, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vmov.f32", "!0s, !1s", 4), + "vmov.f32", "!0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2Fmrs, 0xee100a10, kFmtBitBlt, 15, 12, kFmtSfp, 7, 16, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "fmrs", "!0C, !1s", 4), + "fmrs", "!0C, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2Fmsr, 0xee000a10, kFmtSfp, 7, 16, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "fmsr", "!0s, !1C", 4), + "fmsr", "!0s, !1C", 4, kFixupNone), ENCODING_MAP(kThumb2Fmrrd, 0xec500b10, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtDfp, 5, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2, - "fmrrd", "!0C, !1C, !2S", 4), + "fmrrd", "!0C, !1C, !2S", 4, kFixupNone), ENCODING_MAP(kThumb2Fmdrr, 0xec400b10, kFmtDfp, 5, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12, - "fmdrr", "!0S, !1C, !2C", 4), + "fmdrr", "!0S, !1C, !2C", 4, kFixupNone), ENCODING_MAP(kThumb2Vabsd, 0xeeb00bc0, kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vabs.f64", "!0S, !1S", 4), + "vabs.f64", "!0S, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2Vabss, 0xeeb00ac0, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vabs.f32", "!0s, !1s", 4), + "vabs.f32", "!0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2Vnegd, 0xeeb10b40, kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vneg.f64", "!0S, !1S", 4), + "vneg.f64", "!0S, !1S", 4, kFixupNone), ENCODING_MAP(kThumb2Vnegs, 0xeeb10a40, kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, - "vneg.f32", "!0s, !1s", 4), + "vneg.f32", "!0s, !1s", 4, kFixupNone), ENCODING_MAP(kThumb2Vmovs_IMM8, 0xeeb00a00, kFmtSfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "vmov.f32", "!0s, #0x!1h", 4), + "vmov.f32", "!0s, #0x!1h", 4, kFixupNone), ENCODING_MAP(kThumb2Vmovd_IMM8, 0xeeb00b00, kFmtDfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0, - "vmov.f64", "!0S, #0x!1h", 4), + "vmov.f64", "!0S, #0x!1h", 4, kFixupNone), ENCODING_MAP(kThumb2Mla, 0xfb000000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0 | REG_USE1 | REG_USE2 | REG_USE3, - "mla", "!0C, !1C, !2C, !3C", 4), + "mla", "!0C, !1C, !2C, !3C", 4, kFixupNone), ENCODING_MAP(kThumb2Umull, 0xfba00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3, - "umull", "!0C, !1C, !2C, !3C", 4), + "umull", "!0C, !1C, !2C, !3C", 4, kFixupNone), ENCODING_MAP(kThumb2Ldrex, 0xe8500f00, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, - "ldrex", "!0C, [!1C, #!2E]", 4), + "ldrex", "!0C, [!1C, #!2E]", 4, kFixupNone), ENCODING_MAP(kThumb2Strex, 0xe8400000, kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STORE, - "strex", "!0C,!1C, [!2C, #!2E]", 4), + "strex", "!0C,!1C, [!2C, #!2E]", 4, kFixupNone), ENCODING_MAP(kThumb2Clrex, 0xf3bf8f2f, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, NO_OPERAND, - "clrex", "", 4), + "clrex", "", 4, kFixupNone), ENCODING_MAP(kThumb2Bfi, 0xf3600000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtShift5, -1, -1, kFmtBitBlt, 4, 0, IS_QUAD_OP | REG_DEF0_USE1, - "bfi", "!0C,!1C,#!2d,#!3d", 4), + "bfi", "!0C,!1C,#!2d,#!3d", 4, kFixupNone), ENCODING_MAP(kThumb2Bfc, 0xf36f0000, kFmtBitBlt, 11, 8, kFmtShift5, -1, -1, kFmtBitBlt, 4, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0, - "bfc", "!0C,#!1d,#!2d", 4), + "bfc", "!0C,#!1d,#!2d", 4, kFixupNone), ENCODING_MAP(kThumb2Dmb, 0xf3bf8f50, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP, - "dmb", "#!0B", 4), + "dmb", "#!0B", 4, kFixupNone), ENCODING_MAP(kThumb2LdrPcReln12, 0xf85f0000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD, - "ldr", "!0C, [r15pc, -#!1d]", 4), + "ldr", "!0C, [r15pc, -#!1d]", 4, kFixupNone), ENCODING_MAP(kThumb2Stm, 0xe9000000, kFmtBitBlt, 19, 16, kFmtBitBlt, 12, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | REG_USE_LIST1 | IS_STORE, - "stm", "!0C, <!1R>", 4), + "stm", "!0C, <!1R>", 4, kFixupNone), ENCODING_MAP(kThumbUndefined, 0xde00, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, NO_OPERAND, - "undefined", "", 2), + "undefined", "", 2, kFixupNone), // NOTE: vpop, vpush hard-encoded for s16+ reg list ENCODING_MAP(kThumb2VPopCS, 0xecbd8a00, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_FPCS_LIST0 - | IS_LOAD, "vpop", "<!0P>", 4), + | IS_LOAD, "vpop", "<!0P>", 4, kFixupNone), ENCODING_MAP(kThumb2VPushCS, 0xed2d8a00, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_FPCS_LIST0 - | IS_STORE, "vpush", "<!0P>", 4), + | IS_STORE, "vpush", "<!0P>", 4, kFixupNone), ENCODING_MAP(kThumb2Vldms, 0xec900a00, kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_DEF_FPCS_LIST2 - | IS_LOAD, "vldms", "!0C, <!2Q>", 4), + | IS_LOAD, "vldms", "!0C, <!2Q>", 4, kFixupNone), ENCODING_MAP(kThumb2Vstms, 0xec800a00, kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_FPCS_LIST2 - | IS_STORE, "vstms", "!0C, <!2Q>", 4), + | IS_STORE, "vstms", "!0C, <!2Q>", 4, kFixupNone), ENCODING_MAP(kThumb2BUncond, 0xf0009000, kFmtOff24, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH, - "b", "!0t", 4), + "b", "!0t", 4, kFixupT2Branch), ENCODING_MAP(kThumb2MovImm16H, 0xf2c00000, kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0, - "movt", "!0C, #!1M", 4), + "movt", "!0C, #!1M", 4, kFixupNone), ENCODING_MAP(kThumb2AddPCR, 0x4487, kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - IS_UNARY_OP | REG_USE0 | IS_BRANCH, - "add", "rPC, !0C", 2), + IS_UNARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP, + "add", "rPC, !0C", 2, kFixupLabel), ENCODING_MAP(kThumb2Adr, 0xf20f0000, kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, /* Note: doesn't affect flags */ IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP, - "adr", "!0C,#!1d", 4), + "adr", "!0C,#!1d", 4, kFixupAdr), ENCODING_MAP(kThumb2MovImm16LST, 0xf2400000, kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP, - "mov", "!0C, #!1M", 4), + "mov", "!0C, #!1M", 4, kFixupMovImmLST), ENCODING_MAP(kThumb2MovImm16HST, 0xf2c00000, kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | NEEDS_FIXUP, - "movt", "!0C, #!1M", 4), + "movt", "!0C, #!1M", 4, kFixupMovImmHST), ENCODING_MAP(kThumb2LdmiaWB, 0xe8b00000, kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD, - "ldmia", "!0C!!, <!1R>", 4), + "ldmia", "!0C!!, <!1R>", 4, kFixupNone), ENCODING_MAP(kThumb2SubsRRI12, 0xf1b00000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, - "subs", "!0C,!1C,#!2d", 4), + "subs", "!0C,!1C,#!2d", 4, kFixupNone), ENCODING_MAP(kThumb2OrrRRRs, 0xea500000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, - "orrs", "!0C, !1C, !2C!3H", 4), + "orrs", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2Push1, 0xf84d0d04, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE0 - | IS_STORE, "push1", "!0C", 4), + | IS_STORE, "push1", "!0C", 4, kFixupNone), ENCODING_MAP(kThumb2Pop1, 0xf85d0b04, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF0 - | IS_LOAD, "pop1", "!0C", 4), + | IS_LOAD, "pop1", "!0C", 4, kFixupNone), ENCODING_MAP(kThumb2RsubRRR, 0xebd00000, /* setflags encoding */ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES, - "rsbs", "!0C, !1C, !2C!3H", 4), + "rsbs", "!0C, !1C, !2C!3H", 4, kFixupNone), ENCODING_MAP(kThumb2Smull, 0xfb800000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3, - "smull", "!0C, !1C, !2C, !3C", 4), + "smull", "!0C, !1C, !2C, !3C", 4, kFixupNone), ENCODING_MAP(kThumb2LdrdPcRel8, 0xe9df0000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP, - "ldrd", "!0C, !1C, [pc, #!2E]", 4), + "ldrd", "!0C, !1C, [pc, #!2E]", 4, kFixupLoad), ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD, - "ldrd", "!0C, !1C, [!2C, #!3E]", 4), + "ldrd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone), ENCODING_MAP(kThumb2StrdI8, 0xe9c00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE, - "strd", "!0C, !1C, [!2C, #!3E]", 4), + "strd", "!0C, !1C, [!2C, #!3E]", 4, kFixupNone), }; +// new_lir replaces orig_lir in the pcrel_fixup list. +void ArmMir2Lir::ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) { + new_lir->u.a.pcrel_next = orig_lir->u.a.pcrel_next; + if (UNLIKELY(prev_lir == NULL)) { + first_fixup_ = new_lir; + } else { + prev_lir->u.a.pcrel_next = new_lir; + } + orig_lir->flags.fixup = kFixupNone; +} + +// new_lir is inserted before orig_lir in the pcrel_fixup list. +void ArmMir2Lir::InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir) { + new_lir->u.a.pcrel_next = orig_lir; + if (UNLIKELY(prev_lir == NULL)) { + first_fixup_ = new_lir; + } else { + DCHECK(prev_lir->u.a.pcrel_next == orig_lir); + prev_lir->u.a.pcrel_next = new_lir; + } +} + /* * The fake NOP of moving r0 to r0 actually will incur data stalls if r0 is * not ready. Since r5FP is not updated often, it is less likely to @@ -997,398 +1047,641 @@ const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = { */ #define PADDING_MOV_R5_R5 0x1C2D -/* - * Assemble the LIR into binary instruction format. Note that we may - * discover that pc-relative displacements may not fit the selected - * instruction. - */ -AssemblerStatus ArmMir2Lir::AssembleInstructions(uintptr_t start_addr) { - LIR* lir; - AssemblerStatus res = kSuccess; // Assume success - - for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { - if (lir->opcode < 0) { - /* 1 means padding is needed */ - if ((lir->opcode == kPseudoPseudoAlign4) && (lir->operands[0] == 1)) { - code_buffer_.push_back(PADDING_MOV_R5_R5 & 0xFF); - code_buffer_.push_back((PADDING_MOV_R5_R5 >> 8) & 0xFF); +void ArmMir2Lir::EncodeLIR(LIR* lir) { + int opcode = lir->opcode; + if (IsPseudoLirOp(opcode)) { + if (UNLIKELY(opcode == kPseudoPseudoAlign4)) { + // Note: size for this opcode will be either 0 or 2 depending on final alignment. + lir->u.a.bytes[0] = (PADDING_MOV_R5_R5 & 0xff); + lir->u.a.bytes[1] = ((PADDING_MOV_R5_R5 >> 8) & 0xff); + lir->flags.size = (lir->offset & 0x2); + } + } else if (LIKELY(!lir->flags.is_nop)) { + const ArmEncodingMap *encoder = &EncodingMap[lir->opcode]; + uint32_t bits = encoder->skeleton; + for (int i = 0; i < 4; i++) { + uint32_t operand; + uint32_t value; + operand = lir->operands[i]; + ArmEncodingKind kind = encoder->field_loc[i].kind; + if (LIKELY(kind == kFmtBitBlt)) { + value = (operand << encoder->field_loc[i].start) & + ((1 << (encoder->field_loc[i].end + 1)) - 1); + bits |= value; + } else { + switch (encoder->field_loc[i].kind) { + case kFmtSkip: + break; // Nothing to do, but continue to next. + case kFmtUnused: + i = 4; // Done, break out of the enclosing loop. + break; + case kFmtFPImm: + value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end; + value |= (operand & 0x0F) << encoder->field_loc[i].start; + bits |= value; + break; + case kFmtBrOffset: + value = ((operand & 0x80000) >> 19) << 26; + value |= ((operand & 0x40000) >> 18) << 11; + value |= ((operand & 0x20000) >> 17) << 13; + value |= ((operand & 0x1f800) >> 11) << 16; + value |= (operand & 0x007ff); + bits |= value; + break; + case kFmtShift5: + value = ((operand & 0x1c) >> 2) << 12; + value |= (operand & 0x03) << 6; + bits |= value; + break; + case kFmtShift: + value = ((operand & 0x70) >> 4) << 12; + value |= (operand & 0x0f) << 4; + bits |= value; + break; + case kFmtBWidth: + value = operand - 1; + bits |= value; + break; + case kFmtLsb: + value = ((operand & 0x1c) >> 2) << 12; + value |= (operand & 0x03) << 6; + bits |= value; + break; + case kFmtImm6: + value = ((operand & 0x20) >> 5) << 9; + value |= (operand & 0x1f) << 3; + bits |= value; + break; + case kFmtDfp: { + DCHECK(ARM_DOUBLEREG(operand)); + DCHECK_EQ((operand & 0x1), 0U); + uint32_t reg_name = (operand & ARM_FP_REG_MASK) >> 1; + /* Snag the 1-bit slice and position it */ + value = ((reg_name & 0x10) >> 4) << encoder->field_loc[i].end; + /* Extract and position the 4-bit slice */ + value |= (reg_name & 0x0f) << encoder->field_loc[i].start; + bits |= value; + break; + } + case kFmtSfp: + DCHECK(ARM_SINGLEREG(operand)); + /* Snag the 1-bit slice and position it */ + value = (operand & 0x1) << encoder->field_loc[i].end; + /* Extract and position the 4-bit slice */ + value |= ((operand & 0x1e) >> 1) << encoder->field_loc[i].start; + bits |= value; + break; + case kFmtImm12: + case kFmtModImm: + value = ((operand & 0x800) >> 11) << 26; + value |= ((operand & 0x700) >> 8) << 12; + value |= operand & 0x0ff; + bits |= value; + break; + case kFmtImm16: + value = ((operand & 0x0800) >> 11) << 26; + value |= ((operand & 0xf000) >> 12) << 16; + value |= ((operand & 0x0700) >> 8) << 12; + value |= operand & 0x0ff; + bits |= value; + break; + case kFmtOff24: { + uint32_t signbit = (operand >> 31) & 0x1; + uint32_t i1 = (operand >> 22) & 0x1; + uint32_t i2 = (operand >> 21) & 0x1; + uint32_t imm10 = (operand >> 11) & 0x03ff; + uint32_t imm11 = operand & 0x07ff; + uint32_t j1 = (i1 ^ signbit) ? 0 : 1; + uint32_t j2 = (i2 ^ signbit) ? 0 : 1; + value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | + imm11; + bits |= value; + } + break; + default: + LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind; + } } - continue; } - - if (lir->flags.is_nop) { - continue; + if (encoder->size == 4) { + lir->u.a.bytes[0] = ((bits >> 16) & 0xff); + lir->u.a.bytes[1] = ((bits >> 24) & 0xff); + lir->u.a.bytes[2] = (bits & 0xff); + lir->u.a.bytes[3] = ((bits >> 8) & 0xff); + } else { + DCHECK_EQ(encoder->size, 2); + lir->u.a.bytes[0] = (bits & 0xff); + lir->u.a.bytes[1] = ((bits >> 8) & 0xff); } + lir->flags.size = encoder->size; + } +} - /* - * For PC-relative displacements we won't know if the - * selected instruction will work until late (i.e. - now). - * If something doesn't fit, we must replace the short-form - * operation with a longer-form one. Note, though, that this - * can change code we've already processed, so we'll need to - * re-calculate offsets and restart. To limit the number of - * restarts, the entire list will be scanned and patched. - * Of course, the patching itself may cause new overflows so this - * is an iterative process. - */ - if (lir->flags.pcRelFixup) { - if (lir->opcode == kThumbLdrPcRel || - lir->opcode == kThumb2LdrPcRel12 || - lir->opcode == kThumbAddPcRel || - lir->opcode == kThumb2LdrdPcRel8 || - ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) || - ((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) { - /* - * PC-relative loads are mostly used to load immediates - * that are too large to materialize directly in one shot. - * However, if the load displacement exceeds the limit, - * we revert to a multiple-instruction materialization sequence. - */ - LIR *lir_target = lir->target; - uintptr_t pc = (lir->offset + 4) & ~3; - uintptr_t target = lir_target->offset; - int delta = target - pc; - if (delta & 0x3) { - LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta; - } - // First, a sanity check for cases we shouldn't see now - if (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) || - ((lir->opcode == kThumbLdrPcRel) && (delta > 1020))) { - // Shouldn't happen in current codegen. - LOG(FATAL) << "Unexpected pc-rel offset " << delta; - } - // Now, check for the difficult cases - if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) || - ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) || - ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) || - ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) { +// Assemble the LIR into binary instruction format. +void ArmMir2Lir::AssembleLIR() { + LIR* lir; + LIR* prev_lir; + cu_->NewTimingSplit("Assemble"); + int assembler_retries = 0; + CodeOffset starting_offset = EncodeRange(first_lir_insn_, last_lir_insn_, 0); + data_offset_ = (starting_offset + 0x3) & ~0x3; + int32_t offset_adjustment; + AssignDataOffsets(); + + /* + * Note: generation must be 1 on first pass (to distinguish from initialized state of 0 for non-visited nodes). + * Start at zero here, and bit will be flipped to 1 on entry to the loop. + */ + int generation = 0; + while (true) { + offset_adjustment = 0; + AssemblerStatus res = kSuccess; // Assume success + generation ^= 1; + // Note: nodes requring possible fixup linked in ascending order. + lir = first_fixup_; + prev_lir = NULL; + while (lir != NULL) { + /* + * NOTE: the lir being considered here will be encoded following the switch (so long as + * we're not in a retry situation). However, any new non-pc_rel instructions inserted + * due to retry must be explicitly encoded at the time of insertion. Note that + * inserted instructions don't need use/def flags, but do need size and pc-rel status + * properly updated. + */ + lir->offset += offset_adjustment; + // During pass, allows us to tell whether a node has been updated with offset_adjustment yet. + lir->flags.generation = generation; + switch (static_cast<FixupKind>(lir->flags.fixup)) { + case kFixupLabel: + case kFixupNone: + break; + case kFixupVLoad: + if (lir->operands[1] != r15pc) { + break; + } + // NOTE: intentional fallthrough. + case kFixupLoad: { /* - * Note: because rARM_LR may be used to fix up out-of-range - * vldrs/vldrd we include REG_DEF_LR in the resource - * masks for these instructions. + * PC-relative loads are mostly used to load immediates + * that are too large to materialize directly in one shot. + * However, if the load displacement exceeds the limit, + * we revert to a multiple-instruction materialization sequence. */ - int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) || (lir->opcode == kThumb2LdrPcRel12)) - ? lir->operands[0] : rARM_LR; + LIR *lir_target = lir->target; + CodeOffset pc = (lir->offset + 4) & ~3; + CodeOffset target = lir_target->offset + + ((lir_target->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + int32_t delta = target - pc; + if (res != kSuccess) { + /* + * In this case, we're just estimating and will do it again for real. Ensure offset + * is legal. + */ + delta &= ~0x3; + } + DCHECK_EQ((delta & 0x3), 0); + // First, a sanity check for cases we shouldn't see now + if (kIsDebugBuild && (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) || + ((lir->opcode == kThumbLdrPcRel) && (delta > 1020)))) { + // Shouldn't happen in current codegen. + LOG(FATAL) << "Unexpected pc-rel offset " << delta; + } + // Now, check for the difficult cases + if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) || + ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) || + ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) || + ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) { + /* + * Note: The reason vldrs/vldrd include rARM_LR in their use/def masks is that we + * sometimes have to use it to fix up out-of-range accesses. This is where that + * happens. + */ + int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) || + (lir->opcode == kThumb2LdrPcRel12)) ? lir->operands[0] : rARM_LR; - // Add new Adr to generate the address. - LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr, - base_reg, 0, 0, 0, 0, lir->target); - InsertLIRBefore(lir, new_adr); + // Add new Adr to generate the address. + LIR* new_adr = RawLIR(lir->dalvik_offset, kThumb2Adr, + base_reg, 0, 0, 0, 0, lir->target); + new_adr->offset = lir->offset; + new_adr->flags.fixup = kFixupAdr; + new_adr->flags.size = EncodingMap[kThumb2Adr].size; + InsertLIRBefore(lir, new_adr); + lir->offset += new_adr->flags.size; + offset_adjustment += new_adr->flags.size; - // Convert to normal load. - if (lir->opcode == kThumb2LdrPcRel12) { - lir->opcode = kThumb2LdrRRI12; - } else if (lir->opcode == kThumb2LdrdPcRel8) { - lir->opcode = kThumb2LdrdI8; - } - // Change the load to be relative to the new Adr base. - if (lir->opcode == kThumb2LdrdI8) { - lir->operands[3] = 0; - lir->operands[2] = base_reg; + // lir no longer pcrel, unlink and link in new_adr. + ReplaceFixup(prev_lir, lir, new_adr); + + // Convert to normal load. + offset_adjustment -= lir->flags.size; + if (lir->opcode == kThumb2LdrPcRel12) { + lir->opcode = kThumb2LdrRRI12; + } else if (lir->opcode == kThumb2LdrdPcRel8) { + lir->opcode = kThumb2LdrdI8; + } + lir->flags.size = EncodingMap[lir->opcode].size; + offset_adjustment += lir->flags.size; + // Change the load to be relative to the new Adr base. + if (lir->opcode == kThumb2LdrdI8) { + lir->operands[3] = 0; + lir->operands[2] = base_reg; + } else { + lir->operands[2] = 0; + lir->operands[1] = base_reg; + } + // Must redo encoding here - won't ever revisit this node. + EncodeLIR(lir); + prev_lir = new_adr; // Continue scan with new_adr; + lir = new_adr->u.a.pcrel_next; + res = kRetryAll; + continue; } else { - lir->operands[2] = 0; - lir->operands[1] = base_reg; + if ((lir->opcode == kThumb2Vldrs) || + (lir->opcode == kThumb2Vldrd) || + (lir->opcode == kThumb2LdrdPcRel8)) { + lir->operands[2] = delta >> 2; + } else { + lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ? delta : + delta >> 2; + } } - SetupResourceMasks(lir); - res = kRetryAll; - } else { - if ((lir->opcode == kThumb2Vldrs) || - (lir->opcode == kThumb2Vldrd) || - (lir->opcode == kThumb2LdrdPcRel8)) { - lir->operands[2] = delta >> 2; + break; + } + case kFixupCBxZ: { + LIR *target_lir = lir->target; + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset + + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + int32_t delta = target - pc; + if (delta > 126 || delta < 0) { + /* + * Convert to cmp rx,#0 / b[eq/ne] tgt pair + * Make new branch instruction and insert after + */ + LIR* new_inst = + RawLIR(lir->dalvik_offset, kThumbBCond, 0, + (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe, + 0, 0, 0, lir->target); + InsertLIRAfter(lir, new_inst); + + /* Convert the cb[n]z to a cmp rx, #0 ] */ + // Subtract the old size. + offset_adjustment -= lir->flags.size; + lir->opcode = kThumbCmpRI8; + /* operand[0] is src1 in both cb[n]z & CmpRI8 */ + lir->operands[1] = 0; + lir->target = 0; + EncodeLIR(lir); // NOTE: sets flags.size. + // Add back the new size. + DCHECK_EQ(lir->flags.size, static_cast<uint32_t>(EncodingMap[lir->opcode].size)); + offset_adjustment += lir->flags.size; + // Set up the new following inst. + new_inst->offset = lir->offset + lir->flags.size; + new_inst->flags.fixup = kFixupCondBranch; + new_inst->flags.size = EncodingMap[new_inst->opcode].size; + offset_adjustment += new_inst->flags.size; + + // lir no longer pcrel, unlink and link in new_inst. + ReplaceFixup(prev_lir, lir, new_inst); + prev_lir = new_inst; // Continue with the new instruction. + lir = new_inst->u.a.pcrel_next; + res = kRetryAll; + continue; } else { - lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ? delta : - delta >> 2; + lir->operands[1] = delta >> 1; } + break; } - } else if (lir->opcode == kThumb2Cbnz || lir->opcode == kThumb2Cbz) { - LIR *target_lir = lir->target; - uintptr_t pc = lir->offset + 4; - uintptr_t target = target_lir->offset; - int delta = target - pc; - if (delta > 126 || delta < 0) { - /* - * Convert to cmp rx,#0 / b[eq/ne] tgt pair - * Make new branch instruction and insert after - */ - LIR* new_inst = - RawLIR(lir->dalvik_offset, kThumbBCond, 0, - (lir->opcode == kThumb2Cbz) ? kArmCondEq : kArmCondNe, - 0, 0, 0, lir->target); - InsertLIRAfter(lir, new_inst); - /* Convert the cb[n]z to a cmp rx, #0 ] */ - lir->opcode = kThumbCmpRI8; - /* operand[0] is src1 in both cb[n]z & CmpRI8 */ - lir->operands[1] = 0; - lir->target = 0; - SetupResourceMasks(lir); - res = kRetryAll; - } else { - lir->operands[1] = delta >> 1; - } - } else if (lir->opcode == kThumb2Push || lir->opcode == kThumb2Pop) { - if (__builtin_popcount(lir->operands[0]) == 1) { - /* - * The standard push/pop multiple instruction - * requires at least two registers in the list. - * If we've got just one, switch to the single-reg - * encoding. - */ - lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 : - kThumb2Pop1; - int reg = 0; - while (lir->operands[0]) { - if (lir->operands[0] & 0x1) { - break; - } else { - reg++; - lir->operands[0] >>= 1; + case kFixupPushPop: { + if (__builtin_popcount(lir->operands[0]) == 1) { + /* + * The standard push/pop multiple instruction + * requires at least two registers in the list. + * If we've got just one, switch to the single-reg + * encoding. + */ + lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 : + kThumb2Pop1; + int reg = 0; + while (lir->operands[0]) { + if (lir->operands[0] & 0x1) { + break; + } else { + reg++; + lir->operands[0] >>= 1; + } } + lir->operands[0] = reg; + // This won't change again, don't bother unlinking, just reset fixup kind + lir->flags.fixup = kFixupNone; } - lir->operands[0] = reg; - SetupResourceMasks(lir); - res = kRetryAll; - } - } else if (lir->opcode == kThumbBCond || lir->opcode == kThumb2BCond) { - LIR *target_lir = lir->target; - int delta = 0; - DCHECK(target_lir); - uintptr_t pc = lir->offset + 4; - uintptr_t target = target_lir->offset; - delta = target - pc; - if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) { - lir->opcode = kThumb2BCond; - SetupResourceMasks(lir); - res = kRetryAll; + break; } - lir->operands[0] = delta >> 1; - } else if (lir->opcode == kThumb2BUncond) { - LIR *target_lir = lir->target; - uintptr_t pc = lir->offset + 4; - uintptr_t target = target_lir->offset; - int delta = target - pc; - lir->operands[0] = delta >> 1; - if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && - lir->operands[0] == 0) { // Useless branch - lir->flags.is_nop = true; - res = kRetryAll; + case kFixupCondBranch: { + LIR *target_lir = lir->target; + int32_t delta = 0; + DCHECK(target_lir); + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset + + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + delta = target - pc; + if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) { + offset_adjustment -= lir->flags.size; + lir->opcode = kThumb2BCond; + lir->flags.size = EncodingMap[lir->opcode].size; + // Fixup kind remains the same. + offset_adjustment += lir->flags.size; + res = kRetryAll; + } + lir->operands[0] = delta >> 1; + break; } - } else if (lir->opcode == kThumbBUncond) { - LIR *target_lir = lir->target; - uintptr_t pc = lir->offset + 4; - uintptr_t target = target_lir->offset; - int delta = target - pc; - if (delta > 2046 || delta < -2048) { - // Convert to Thumb2BCond w/ kArmCondAl - lir->opcode = kThumb2BUncond; - lir->operands[0] = 0; - SetupResourceMasks(lir); - res = kRetryAll; - } else { + case kFixupT2Branch: { + LIR *target_lir = lir->target; + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset + + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + int32_t delta = target - pc; lir->operands[0] = delta >> 1; - if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && - lir->operands[0] == -1) { // Useless branch + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == 0) { + // Useless branch + offset_adjustment -= lir->flags.size; lir->flags.is_nop = true; + // Don't unlink - just set to do-nothing. + lir->flags.fixup = kFixupNone; + res = kRetryAll; + } + break; + } + case kFixupT1Branch: { + LIR *target_lir = lir->target; + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset + + ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + int32_t delta = target - pc; + if (delta > 2046 || delta < -2048) { + // Convert to Thumb2BCond w/ kArmCondAl + offset_adjustment -= lir->flags.size; + lir->opcode = kThumb2BUncond; + lir->operands[0] = 0; + lir->flags.size = EncodingMap[lir->opcode].size; + lir->flags.fixup = kFixupT2Branch; + offset_adjustment += lir->flags.size; res = kRetryAll; + } else { + lir->operands[0] = delta >> 1; + if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == -1) { + // Useless branch + offset_adjustment -= lir->flags.size; + lir->flags.is_nop = true; + // Don't unlink - just set to do-nothing. + lir->flags.fixup = kFixupNone; + res = kRetryAll; + } } + break; } - } else if (lir->opcode == kThumbBlx1) { - DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2); - /* cur_pc is Thumb */ - uintptr_t cur_pc = (start_addr + lir->offset + 4) & ~3; - uintptr_t target = lir->operands[1]; + case kFixupBlx1: { + DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2); + /* cur_pc is Thumb */ + CodeOffset cur_pc = (lir->offset + 4) & ~3; + CodeOffset target = lir->operands[1]; - /* Match bit[1] in target with base */ - if (cur_pc & 0x2) { - target |= 0x2; - } - int delta = target - cur_pc; - DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2))); + /* Match bit[1] in target with base */ + if (cur_pc & 0x2) { + target |= 0x2; + } + int32_t delta = target - cur_pc; + DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2))); - lir->operands[0] = (delta >> 12) & 0x7ff; - NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; - } else if (lir->opcode == kThumbBl1) { - DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2); - /* Both cur_pc and target are Thumb */ - uintptr_t cur_pc = start_addr + lir->offset + 4; - uintptr_t target = lir->operands[1]; + lir->operands[0] = (delta >> 12) & 0x7ff; + NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; + break; + } + case kFixupBl1: { + DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2); + /* Both cur_pc and target are Thumb */ + CodeOffset cur_pc = lir->offset + 4; + CodeOffset target = lir->operands[1]; - int delta = target - cur_pc; - DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2))); + int32_t delta = target - cur_pc; + DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2))); - lir->operands[0] = (delta >> 12) & 0x7ff; - NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; - } else if (lir->opcode == kThumb2Adr) { - SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[2]); - LIR* target = lir->target; - int target_disp = tab_rec ? tab_rec->offset - : target->offset; - int disp = target_disp - ((lir->offset + 4) & ~3); - if (disp < 4096) { - lir->operands[1] = disp; - } else { - // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0] - // TUNING: if this case fires often, it can be improved. Not expected to be common. - LIR *new_mov16L = - RawLIR(lir->dalvik_offset, kThumb2MovImm16LST, - lir->operands[0], 0, reinterpret_cast<uintptr_t>(lir), - reinterpret_cast<uintptr_t>(tab_rec), 0, lir->target); - InsertLIRBefore(lir, new_mov16L); - LIR *new_mov16H = - RawLIR(lir->dalvik_offset, kThumb2MovImm16HST, - lir->operands[0], 0, reinterpret_cast<uintptr_t>(lir), - reinterpret_cast<uintptr_t>(tab_rec), 0, lir->target); - InsertLIRBefore(lir, new_mov16H); - if (ARM_LOWREG(lir->operands[0])) { - lir->opcode = kThumbAddRRLH; + lir->operands[0] = (delta >> 12) & 0x7ff; + NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; + break; + } + case kFixupAdr: { + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[2])); + LIR* target = lir->target; + int32_t target_disp = (tab_rec != NULL) ? tab_rec->offset + offset_adjustment + : target->offset + ((target->flags.generation == lir->flags.generation) ? 0 : offset_adjustment); + int32_t disp = target_disp - ((lir->offset + 4) & ~3); + if (disp < 4096) { + lir->operands[1] = disp; } else { - lir->opcode = kThumbAddRRHH; + // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0] + // TUNING: if this case fires often, it can be improved. Not expected to be common. + LIR *new_mov16L = + RawLIR(lir->dalvik_offset, kThumb2MovImm16LST, lir->operands[0], 0, + WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target); + new_mov16L->flags.size = EncodingMap[new_mov16L->opcode].size; + new_mov16L->flags.fixup = kFixupMovImmLST; + new_mov16L->offset = lir->offset; + // Link the new instruction, retaining lir. + InsertLIRBefore(lir, new_mov16L); + lir->offset += new_mov16L->flags.size; + offset_adjustment += new_mov16L->flags.size; + InsertFixupBefore(prev_lir, lir, new_mov16L); + prev_lir = new_mov16L; // Now we've got a new prev. + LIR *new_mov16H = + RawLIR(lir->dalvik_offset, kThumb2MovImm16HST, lir->operands[0], 0, + WrapPointer(lir), WrapPointer(tab_rec), 0, lir->target); + new_mov16H->flags.size = EncodingMap[new_mov16H->opcode].size; + new_mov16H->flags.fixup = kFixupMovImmHST; + new_mov16H->offset = lir->offset; + // Link the new instruction, retaining lir. + InsertLIRBefore(lir, new_mov16H); + lir->offset += new_mov16H->flags.size; + offset_adjustment += new_mov16H->flags.size; + InsertFixupBefore(prev_lir, lir, new_mov16H); + prev_lir = new_mov16H; // Now we've got a new prev. + + offset_adjustment -= lir->flags.size; + if (ARM_LOWREG(lir->operands[0])) { + lir->opcode = kThumbAddRRLH; + } else { + lir->opcode = kThumbAddRRHH; + } + lir->operands[1] = rARM_PC; + lir->flags.size = EncodingMap[lir->opcode].size; + offset_adjustment += lir->flags.size; + // Must stay in fixup list and have offset updated; will be used by LST/HSP pair. + lir->flags.fixup = kFixupNone; + res = kRetryAll; } - lir->operands[1] = rARM_PC; - SetupResourceMasks(lir); - res = kRetryAll; - } - } else if (lir->opcode == kThumb2MovImm16LST) { - // operands[1] should hold disp, [2] has add, [3] has tab_rec - LIR *addPCInst = reinterpret_cast<LIR*>(lir->operands[2]); - SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]); - // If tab_rec is null, this is a literal load. Use target - LIR* target = lir->target; - int target_disp = tab_rec ? tab_rec->offset : target->offset; - lir->operands[1] = (target_disp - (addPCInst->offset + 4)) & 0xffff; - } else if (lir->opcode == kThumb2MovImm16HST) { - // operands[1] should hold disp, [2] has add, [3] has tab_rec - LIR *addPCInst = reinterpret_cast<LIR*>(lir->operands[2]); - SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]); - // If tab_rec is null, this is a literal load. Use target - LIR* target = lir->target; - int target_disp = tab_rec ? tab_rec->offset : target->offset; - lir->operands[1] = - ((target_disp - (addPCInst->offset + 4)) >> 16) & 0xffff; - } - } - /* - * If one of the pc-relative instructions expanded we'll have - * to make another pass. Don't bother to fully assemble the - * instruction. - */ - if (res != kSuccess) { - continue; - } - const ArmEncodingMap *encoder = &EncodingMap[lir->opcode]; - uint32_t bits = encoder->skeleton; - int i; - for (i = 0; i < 4; i++) { - uint32_t operand; - uint32_t value; - operand = lir->operands[i]; - switch (encoder->field_loc[i].kind) { - case kFmtUnused: - break; - case kFmtFPImm: - value = ((operand & 0xF0) >> 4) << encoder->field_loc[i].end; - value |= (operand & 0x0F) << encoder->field_loc[i].start; - bits |= value; - break; - case kFmtBrOffset: - value = ((operand & 0x80000) >> 19) << 26; - value |= ((operand & 0x40000) >> 18) << 11; - value |= ((operand & 0x20000) >> 17) << 13; - value |= ((operand & 0x1f800) >> 11) << 16; - value |= (operand & 0x007ff); - bits |= value; - break; - case kFmtShift5: - value = ((operand & 0x1c) >> 2) << 12; - value |= (operand & 0x03) << 6; - bits |= value; - break; - case kFmtShift: - value = ((operand & 0x70) >> 4) << 12; - value |= (operand & 0x0f) << 4; - bits |= value; - break; - case kFmtBWidth: - value = operand - 1; - bits |= value; - break; - case kFmtLsb: - value = ((operand & 0x1c) >> 2) << 12; - value |= (operand & 0x03) << 6; - bits |= value; - break; - case kFmtImm6: - value = ((operand & 0x20) >> 5) << 9; - value |= (operand & 0x1f) << 3; - bits |= value; - break; - case kFmtBitBlt: - value = (operand << encoder->field_loc[i].start) & - ((1 << (encoder->field_loc[i].end + 1)) - 1); - bits |= value; - break; - case kFmtDfp: { - DCHECK(ARM_DOUBLEREG(operand)); - DCHECK_EQ((operand & 0x1), 0U); - int reg_name = (operand & ARM_FP_REG_MASK) >> 1; - /* Snag the 1-bit slice and position it */ - value = ((reg_name & 0x10) >> 4) << encoder->field_loc[i].end; - /* Extract and position the 4-bit slice */ - value |= (reg_name & 0x0f) << encoder->field_loc[i].start; - bits |= value; break; } - case kFmtSfp: - DCHECK(ARM_SINGLEREG(operand)); - /* Snag the 1-bit slice and position it */ - value = (operand & 0x1) << encoder->field_loc[i].end; - /* Extract and position the 4-bit slice */ - value |= ((operand & 0x1e) >> 1) << encoder->field_loc[i].start; - bits |= value; + case kFixupMovImmLST: { + // operands[1] should hold disp, [2] has add, [3] has tab_rec + LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])); + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3])); + // If tab_rec is null, this is a literal load. Use target + LIR* target = lir->target; + int32_t target_disp = tab_rec ? tab_rec->offset : target->offset; + lir->operands[1] = (target_disp - (addPCInst->offset + 4)) & 0xffff; break; - case kFmtImm12: - case kFmtModImm: - value = ((operand & 0x800) >> 11) << 26; - value |= ((operand & 0x700) >> 8) << 12; - value |= operand & 0x0ff; - bits |= value; - break; - case kFmtImm16: - value = ((operand & 0x0800) >> 11) << 26; - value |= ((operand & 0xf000) >> 12) << 16; - value |= ((operand & 0x0700) >> 8) << 12; - value |= operand & 0x0ff; - bits |= value; + } + case kFixupMovImmHST: { + // operands[1] should hold disp, [2] has add, [3] has tab_rec + LIR *addPCInst = reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])); + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3])); + // If tab_rec is null, this is a literal load. Use target + LIR* target = lir->target; + int32_t target_disp = tab_rec ? tab_rec->offset : target->offset; + lir->operands[1] = + ((target_disp - (addPCInst->offset + 4)) >> 16) & 0xffff; break; - case kFmtOff24: { - uint32_t signbit = (operand >> 31) & 0x1; - uint32_t i1 = (operand >> 22) & 0x1; - uint32_t i2 = (operand >> 21) & 0x1; - uint32_t imm10 = (operand >> 11) & 0x03ff; - uint32_t imm11 = operand & 0x07ff; - uint32_t j1 = (i1 ^ signbit) ? 0 : 1; - uint32_t j2 = (i2 ^ signbit) ? 0 : 1; - value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | - imm11; - bits |= value; + } + case kFixupAlign4: { + int32_t required_size = lir->offset & 0x2; + if (lir->flags.size != required_size) { + offset_adjustment += required_size - lir->flags.size; + lir->flags.size = required_size; + res = kRetryAll; } break; + } default: - LOG(FATAL) << "Bad fmt:" << encoder->field_loc[i].kind; + LOG(FATAL) << "Unexpected case " << lir->flags.fixup; } + /* + * If one of the pc-relative instructions expanded we'll have + * to make another pass. Don't bother to fully assemble the + * instruction. + */ + if (res == kSuccess) { + EncodeLIR(lir); + if (assembler_retries == 0) { + // Go ahead and fix up the code buffer image. + for (int i = 0; i < lir->flags.size; i++) { + code_buffer_[lir->offset + i] = lir->u.a.bytes[i]; + } + } + } + prev_lir = lir; + lir = lir->u.a.pcrel_next; } - if (encoder->size == 4) { - code_buffer_.push_back((bits >> 16) & 0xff); - code_buffer_.push_back((bits >> 24) & 0xff); + + if (res == kSuccess) { + break; + } else { + assembler_retries++; + if (assembler_retries > MAX_ASSEMBLER_RETRIES) { + CodegenDump(); + LOG(FATAL) << "Assembler error - too many retries"; + } + starting_offset += offset_adjustment; + data_offset_ = (starting_offset + 0x3) & ~0x3; + AssignDataOffsets(); } - code_buffer_.push_back(bits & 0xff); - code_buffer_.push_back((bits >> 8) & 0xff); } - return res; + + // Rebuild the CodeBuffer if we had to retry; otherwise it should be good as-is. + if (assembler_retries != 0) { + code_buffer_.clear(); + for (LIR* lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { + if (lir->flags.is_nop) { + continue; + } else { + for (int i = 0; i < lir->flags.size; i++) { + code_buffer_.push_back(lir->u.a.bytes[i]); + } + } + } + } + + data_offset_ = (code_buffer_.size() + 0x3) & ~0x3; + + cu_->NewTimingSplit("LiteralData"); + // Install literals + InstallLiteralPools(); + + // Install switch tables + InstallSwitchTables(); + + // Install fill array data + InstallFillArrayData(); + + // Create the mapping table and native offset to reference map. + cu_->NewTimingSplit("PcMappingTable"); + CreateMappingTables(); + + cu_->NewTimingSplit("GcMap"); + CreateNativeGcMap(); } int ArmMir2Lir::GetInsnSize(LIR* lir) { + DCHECK(!IsPseudoLirOp(lir->opcode)); return EncodingMap[lir->opcode].size; } +// Encode instruction bit pattern and assign offsets. +uint32_t ArmMir2Lir::EncodeRange(LIR* head_lir, LIR* tail_lir, uint32_t offset) { + LIR* end_lir = tail_lir->next; + + /* + * A significant percentage of methods can be assembled in a single pass. We'll + * go ahead and build the code image here, leaving holes for pc-relative fixup + * codes. If the code size changes during that pass, we'll have to throw away + * this work - but if not, we're ready to go. + */ + code_buffer_.reserve(estimated_native_code_size_ + 256); // Add a little slop. + LIR* last_fixup = NULL; + for (LIR* lir = head_lir; lir != end_lir; lir = NEXT_LIR(lir)) { + lir->offset = offset; + if (!lir->flags.is_nop) { + if (lir->flags.fixup != kFixupNone) { + if (!IsPseudoLirOp(lir->opcode)) { + lir->flags.size = EncodingMap[lir->opcode].size; + lir->flags.fixup = EncodingMap[lir->opcode].fixup; + } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) { + lir->flags.size = (offset & 0x2); + lir->flags.fixup = kFixupAlign4; + } else { + lir->flags.size = 0; + lir->flags.fixup = kFixupLabel; + } + // Link into the fixup chain. + lir->flags.use_def_invalid = true; + lir->u.a.pcrel_next = NULL; + if (first_fixup_ == NULL) { + first_fixup_ = lir; + } else { + last_fixup->u.a.pcrel_next = lir; + } + last_fixup = lir; + } else { + EncodeLIR(lir); + } + for (int i = 0; i < lir->flags.size; i++) { + code_buffer_.push_back(lir->u.a.bytes[i]); + } + offset += lir->flags.size; + } + } + return offset; +} + +void ArmMir2Lir::AssignDataOffsets() { + /* Set up offsets for literals */ + CodeOffset offset = data_offset_; + + offset = AssignLiteralOffset(offset); + + offset = AssignSwitchTablesOffset(offset); + + total_size_ = AssignFillArrayDataOffset(offset); +} + } // namespace art diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index 2dbe5f5c36..51aca8540c 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -92,7 +92,7 @@ void ArmMir2Lir::LockLiveArgs(MIR* mir) { } /* Find the next MIR, which may be in a following basic block */ -// TODO: should this be a utility in mir_graph? +// TODO: make this a utility in mir_graph. MIR* ArmMir2Lir::GetNextMir(BasicBlock** p_bb, MIR* mir) { BasicBlock* bb = *p_bb; MIR* orig_mir = mir; @@ -103,7 +103,7 @@ MIR* ArmMir2Lir::GetNextMir(BasicBlock** p_bb, MIR* mir) { if (mir != NULL) { return mir; } else { - bb = bb->fall_through; + bb = mir_graph_->GetBasicBlock(bb->fall_through); *p_bb = bb; if (bb) { mir = bb->first_mir_insn; @@ -120,17 +120,18 @@ MIR* ArmMir2Lir::GetNextMir(BasicBlock** p_bb, MIR* mir) { // TODO: move to common code void ArmMir2Lir::GenPrintLabel(MIR* mir) { /* Mark the beginning of a Dalvik instruction for line tracking */ - char* inst_str = cu_->verbose ? - mir_graph_->GetDalvikDisassembly(mir) : NULL; - MarkBoundary(mir->offset, inst_str); + if (cu_->verbose) { + char* inst_str = mir_graph_->GetDalvikDisassembly(mir); + MarkBoundary(mir->offset, inst_str); + } } MIR* ArmMir2Lir::SpecialIGet(BasicBlock** bb, MIR* mir, OpSize size, bool long_or_double, bool is_object) { - int field_offset; + int32_t field_offset; bool is_volatile; uint32_t field_idx = mir->dalvikInsn.vC; - bool fast_path = FastInstance(field_idx, field_offset, is_volatile, false); + bool fast_path = FastInstance(field_idx, false, &field_offset, &is_volatile); if (!fast_path || !(mir->optimization_flags & MIR_IGNORE_NULL_CHECK)) { return NULL; } @@ -152,10 +153,10 @@ MIR* ArmMir2Lir::SpecialIGet(BasicBlock** bb, MIR* mir, MIR* ArmMir2Lir::SpecialIPut(BasicBlock** bb, MIR* mir, OpSize size, bool long_or_double, bool is_object) { - int field_offset; + int32_t field_offset; bool is_volatile; uint32_t field_idx = mir->dalvikInsn.vC; - bool fast_path = FastInstance(field_idx, field_offset, is_volatile, false); + bool fast_path = FastInstance(field_idx, false, &field_offset, &is_volatile); if (!fast_path || !(mir->optimization_flags & MIR_IGNORE_NULL_CHECK)) { return NULL; } @@ -319,9 +320,9 @@ void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; - int size = table[1]; + uint32_t size = table[1]; tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), - ArenaAllocator::kAllocLIR)); + ArenaAllocator::kAllocLIR)); switch_tables_.Insert(tab_rec); // Get the switch value @@ -337,7 +338,7 @@ void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, r_key = tmp; } // Materialize a pointer to the switch table - NewLIR3(kThumb2Adr, rBase, 0, reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR3(kThumb2Adr, rBase, 0, WrapPointer(tab_rec)); // Set up r_idx int r_idx = AllocTemp(); LoadConstant(r_idx, size); @@ -367,7 +368,7 @@ void ArmMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; - int size = table[1]; + uint32_t size = table[1]; tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), ArenaAllocator::kAllocLIR)); switch_tables_.Insert(tab_rec); @@ -376,7 +377,7 @@ void ArmMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, rl_src = LoadValue(rl_src, kCoreReg); int table_base = AllocTemp(); // Materialize a pointer to the switch table - NewLIR3(kThumb2Adr, table_base, 0, reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR3(kThumb2Adr, table_base, 0, WrapPointer(tab_rec)); int low_key = s4FromSwitchData(&table[2]); int keyReg; // Remove the bias, if necessary @@ -432,95 +433,127 @@ void ArmMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pHandleFillArrayData).Int32Value(), rARM_LR); // Materialize a pointer to the fill data image - NewLIR3(kThumb2Adr, r1, 0, reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR3(kThumb2Adr, r1, 0, WrapPointer(tab_rec)); ClobberCalleeSave(); LIR* call_inst = OpReg(kOpBlx, rARM_LR); MarkSafepointPC(call_inst); } /* - * Handle simple case (thin lock) inline. If it's complicated, bail - * out to the heavyweight lock/unlock routines. We'll use dedicated - * registers here in order to be in the right position in case we - * to bail to oat[Lock/Unlock]Object(self, object) - * - * r0 -> self pointer [arg0 for oat[Lock/Unlock]Object - * r1 -> object [arg1 for oat[Lock/Unlock]Object - * r2 -> intial contents of object->lock, later result of strex - * r3 -> self->thread_id - * r12 -> allow to be used by utilities as general temp - * - * The result of the strex is 0 if we acquire the lock. - * - * See comments in monitor.cc for the layout of the lock word. - * Of particular interest to this code is the test for the - * simple case - which we handle inline. For monitor enter, the - * simple case is thin lock, held by no-one. For monitor exit, - * the simple case is thin lock, held by the unlocking thread with - * a recurse count of 0. - * - * A minor complication is that there is a field in the lock word - * unrelated to locking: the hash state. This field must be ignored, but - * preserved. - * + * Handle unlocked -> thin locked transition inline or else call out to quick entrypoint. For more + * details see monitor.cc. */ void ArmMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { FlushAllRegs(); - DCHECK_EQ(LW_SHAPE_THIN, 0); LoadValueDirectFixed(rl_src, r0); // Get obj LockCallTemps(); // Prepare for explicit register usage - GenNullCheck(rl_src.s_reg_low, r0, opt_flags); - LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2); - NewLIR3(kThumb2Ldrex, r1, r0, - mirror::Object::MonitorOffset().Int32Value() >> 2); // Get object->lock - // Align owner - OpRegImm(kOpLsl, r2, LW_LOCK_OWNER_SHIFT); - // Is lock unheld on lock or held by us (==thread_id) on unlock? - NewLIR4(kThumb2Bfi, r2, r1, 0, LW_LOCK_OWNER_SHIFT - 1); - NewLIR3(kThumb2Bfc, r1, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1); - OpRegImm(kOpCmp, r1, 0); - OpIT(kCondEq, ""); - NewLIR4(kThumb2Strex, r1, r2, r0, - mirror::Object::MonitorOffset().Int32Value() >> 2); - OpRegImm(kOpCmp, r1, 0); - OpIT(kCondNe, "T"); - // Go expensive route - artLockObjectFromCode(self, obj); - LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR); - ClobberCalleeSave(); - LIR* call_inst = OpReg(kOpBlx, rARM_LR); - MarkSafepointPC(call_inst); - GenMemBarrier(kLoadLoad); + constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. + if (kArchVariantHasGoodBranchPredictor) { + LIR* null_check_branch; + if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) { + null_check_branch = nullptr; // No null check. + } else { + // If the null-check fails its handled by the slow-path to reduce exception related meta-data. + null_check_branch = OpCmpImmBranch(kCondEq, r0, 0, NULL); + } + LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2); + NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); + LIR* not_unlocked_branch = OpCmpImmBranch(kCondNe, r1, 0, NULL); + NewLIR4(kThumb2Strex, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); + LIR* lock_success_branch = OpCmpImmBranch(kCondEq, r1, 0, NULL); + + + LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); + not_unlocked_branch->target = slow_path_target; + if (null_check_branch != nullptr) { + null_check_branch->target = slow_path_target; + } + // TODO: move to a slow path. + // Go expensive route - artLockObjectFromCode(obj); + LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR); + ClobberCalleeSave(); + LIR* call_inst = OpReg(kOpBlx, rARM_LR); + MarkSafepointPC(call_inst); + + LIR* success_target = NewLIR0(kPseudoTargetLabel); + lock_success_branch->target = success_target; + GenMemBarrier(kLoadLoad); + } else { + // Explicit null-check as slow-path is entered using an IT. + GenNullCheck(rl_src.s_reg_low, r0, opt_flags); + LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2); + NewLIR3(kThumb2Ldrex, r1, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); + OpRegImm(kOpCmp, r1, 0); + OpIT(kCondEq, ""); + NewLIR4(kThumb2Strex/*eq*/, r1, r2, r0, mirror::Object::MonitorOffset().Int32Value() >> 2); + OpRegImm(kOpCmp, r1, 0); + OpIT(kCondNe, "T"); + // Go expensive route - artLockObjectFromCode(self, obj); + LoadWordDisp/*ne*/(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pLockObject).Int32Value(), rARM_LR); + ClobberCalleeSave(); + LIR* call_inst = OpReg(kOpBlx/*ne*/, rARM_LR); + MarkSafepointPC(call_inst); + GenMemBarrier(kLoadLoad); + } } /* - * For monitor unlock, we don't have to use ldrex/strex. Once - * we've determined that the lock is thin and that we own it with - * a zero recursion count, it's safe to punch it back to the - * initial, unlock thin state with a store word. + * Handle thin locked -> unlocked transition inline or else call out to quick entrypoint. For more + * details see monitor.cc. Note the code below doesn't use ldrex/strex as the code holds the lock + * and can only give away ownership if its suspended. */ void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { - DCHECK_EQ(LW_SHAPE_THIN, 0); FlushAllRegs(); LoadValueDirectFixed(rl_src, r0); // Get obj LockCallTemps(); // Prepare for explicit register usage - GenNullCheck(rl_src.s_reg_low, r0, opt_flags); - LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1); // Get lock + LIR* null_check_branch; LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2); - // Is lock unheld on lock or held by us (==thread_id) on unlock? - OpRegRegImm(kOpAnd, r3, r1, - (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT)); - // Align owner - OpRegImm(kOpLsl, r2, LW_LOCK_OWNER_SHIFT); - NewLIR3(kThumb2Bfc, r1, LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1); - OpRegReg(kOpSub, r1, r2); - OpIT(kCondEq, "EE"); - StoreWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r3); - // Go expensive route - UnlockObjectFromCode(obj); - LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR); - ClobberCalleeSave(); - LIR* call_inst = OpReg(kOpBlx, rARM_LR); - MarkSafepointPC(call_inst); - GenMemBarrier(kStoreLoad); + constexpr bool kArchVariantHasGoodBranchPredictor = false; // TODO: true if cortex-A15. + if (kArchVariantHasGoodBranchPredictor) { + if ((opt_flags & MIR_IGNORE_NULL_CHECK) && !(cu_->disable_opt & (1 << kNullCheckElimination))) { + null_check_branch = nullptr; // No null check. + } else { + // If the null-check fails its handled by the slow-path to reduce exception related meta-data. + null_check_branch = OpCmpImmBranch(kCondEq, r0, 0, NULL); + } + LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1); + LoadConstantNoClobber(r3, 0); + LIR* slow_unlock_branch = OpCmpBranch(kCondNe, r1, r2, NULL); + StoreWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r3); + LIR* unlock_success_branch = OpUnconditionalBranch(NULL); + + LIR* slow_path_target = NewLIR0(kPseudoTargetLabel); + slow_unlock_branch->target = slow_path_target; + if (null_check_branch != nullptr) { + null_check_branch->target = slow_path_target; + } + // TODO: move to a slow path. + // Go expensive route - artUnlockObjectFromCode(obj); + LoadWordDisp(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR); + ClobberCalleeSave(); + LIR* call_inst = OpReg(kOpBlx, rARM_LR); + MarkSafepointPC(call_inst); + + LIR* success_target = NewLIR0(kPseudoTargetLabel); + unlock_success_branch->target = success_target; + GenMemBarrier(kStoreLoad); + } else { + // Explicit null-check as slow-path is entered using an IT. + GenNullCheck(rl_src.s_reg_low, r0, opt_flags); + LoadWordDisp(r0, mirror::Object::MonitorOffset().Int32Value(), r1); // Get lock + LoadWordDisp(rARM_SELF, Thread::ThinLockIdOffset().Int32Value(), r2); + LoadConstantNoClobber(r3, 0); + // Is lock unheld on lock or held by us (==thread_id) on unlock? + OpRegReg(kOpCmp, r1, r2); + OpIT(kCondEq, "EE"); + StoreWordDisp/*eq*/(r0, mirror::Object::MonitorOffset().Int32Value(), r3); + // Go expensive route - UnlockObjectFromCode(obj); + LoadWordDisp/*ne*/(rARM_SELF, QUICK_ENTRYPOINT_OFFSET(pUnlockObject).Int32Value(), rARM_LR); + ClobberCalleeSave(); + LIR* call_inst = OpReg(kOpBlx/*ne*/, rARM_LR); + MarkSafepointPC(call_inst); + GenMemBarrier(kStoreLoad); + } } void ArmMir2Lir::GenMoveException(RegLocation rl_dest) { diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 291319f258..15355be9d7 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -51,7 +51,6 @@ class ArmMir2Lir : public Mir2Lir { int AllocTypedTempPair(bool fp_hint, int reg_class); int S2d(int low_reg, int high_reg); int TargetReg(SpecialTargetRegister reg); - RegisterInfo* GetRegInfo(int reg); RegLocation GetReturnAlt(); RegLocation GetReturnWideAlt(); RegLocation LocCReturn(); @@ -71,9 +70,13 @@ class ArmMir2Lir : public Mir2Lir { void CompilerInitializeRegAlloc(); // Required for target - miscellaneous. - AssemblerStatus AssembleInstructions(uintptr_t start_addr); + void AssembleLIR(); + uint32_t EncodeRange(LIR* head_lir, LIR* tail_lir, uint32_t starting_offset); + int AssignInsnOffsets(); + void AssignOffsets(); + void EncodeLIR(LIR* lir); void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); - void SetupTargetResourceMasks(LIR* lir); + void SetupTargetResourceMasks(LIR* lir, uint64_t flags); const char* GetTargetInstFmt(int opcode); const char* GetTargetInstName(int opcode); std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); @@ -85,12 +88,10 @@ class ArmMir2Lir : public Mir2Lir { // Required for target - Dalvik-level generators. void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); - void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index, - RegLocation rl_src, int scale); void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_dest, int scale); - void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale); + void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, + RegLocation rl_src, int scale, bool card_mark); void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift); void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -106,6 +107,8 @@ class ArmMir2Lir : public Mir2Lir { bool GenInlinedCas32(CallInfo* info, bool need_write_barrier); bool GenInlinedMinMaxInt(CallInfo* info, bool is_min); bool GenInlinedSqrt(CallInfo* info); + bool GenInlinedPeek(CallInfo* info, OpSize size); + bool GenInlinedPoke(CallInfo* info, OpSize size); void GenNegLong(RegLocation rl_dest, RegLocation rl_src); void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -118,7 +121,7 @@ class ArmMir2Lir : public Mir2Lir { void GenDivZeroCheck(int reg_lo, int reg_hi); void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method); void GenExitSequence(); - void GenFillArrayData(uint32_t table_offset, RegLocation rl_src); + void GenFillArrayData(DexOffset table_offset, RegLocation rl_src); void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double); void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir); void GenSelect(BasicBlock* bb, MIR* mir); @@ -130,8 +133,8 @@ class ArmMir2Lir : public Mir2Lir { int first_bit, int second_bit); void GenNegDouble(RegLocation rl_dest, RegLocation rl_src); void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); - void GenPackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); - void GenSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); + void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); void GenSpecialCase(BasicBlock* bb, MIR* mir, SpecialCaseHandler special_case); // Required for target - single operation generators. @@ -188,6 +191,9 @@ class ArmMir2Lir : public Mir2Lir { MIR* SpecialIdentity(MIR* mir); LIR* LoadFPConstantValue(int r_dest, int value); bool BadOverlap(RegLocation rl_src, RegLocation rl_dest); + void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); + void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); + void AssignDataOffsets(); }; } // namespace art diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc index 08d6778129..480e0218d5 100644 --- a/compiler/dex/quick/arm/fp_arm.cc +++ b/compiler/dex/quick/arm/fp_arm.cc @@ -176,7 +176,7 @@ void ArmMir2Lir::GenConversion(Instruction::Code opcode, void ArmMir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) { - LIR* target = &block_label_list_[bb->taken->id]; + LIR* target = &block_label_list_[bb->taken]; RegLocation rl_src1; RegLocation rl_src2; if (is_double) { diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 6fbdd2fd49..42bf3d4d00 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -24,8 +24,7 @@ namespace art { -LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, int src1, - int src2, LIR* target) { +LIR* ArmMir2Lir::OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target) { OpRegReg(kOpCmp, src1, src2); return OpCondBranch(cond, target); } @@ -123,8 +122,8 @@ void ArmMir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int32_t val_hi = High32Bits(val); DCHECK_GE(ModifiedImmediate(val_lo), 0); DCHECK_GE(ModifiedImmediate(val_hi), 0); - LIR* taken = &block_label_list_[bb->taken->id]; - LIR* not_taken = &block_label_list_[bb->fall_through->id]; + LIR* taken = &block_label_list_[bb->taken]; + LIR* not_taken = &block_label_list_[bb->fall_through]; rl_src1 = LoadValueWide(rl_src1, kCoreReg); int32_t low_reg = rl_src1.low_reg; int32_t high_reg = rl_src1.high_reg; @@ -179,23 +178,6 @@ void ArmMir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { RegLocation rl_result; RegLocation rl_src = mir_graph_->GetSrc(mir, 0); - // Temporary debugging code - int dest_sreg = mir->ssa_rep->defs[0]; - if ((dest_sreg < 0) || (dest_sreg >= mir_graph_->GetNumSSARegs())) { - LOG(INFO) << "Bad target sreg: " << dest_sreg << ", in " - << PrettyMethod(cu_->method_idx, *cu_->dex_file); - LOG(INFO) << "at dex offset 0x" << std::hex << mir->offset; - LOG(INFO) << "vreg = " << mir_graph_->SRegToVReg(dest_sreg); - LOG(INFO) << "num uses = " << mir->ssa_rep->num_uses; - if (mir->ssa_rep->num_uses == 1) { - LOG(INFO) << "CONST case, vals = " << mir->dalvikInsn.vB << ", " << mir->dalvikInsn.vC; - } else { - LOG(INFO) << "MOVE case, operands = " << mir->ssa_rep->uses[1] << ", " - << mir->ssa_rep->uses[2]; - } - CHECK(false) << "Invalid target sreg on Select."; - } - // End temporary debugging code RegLocation rl_dest = mir_graph_->GetDest(mir); rl_src = LoadValue(rl_src, kCoreReg); if (mir->ssa_rep->num_uses == 1) { @@ -234,11 +216,17 @@ void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { rl_false = LoadValue(rl_false, kCoreReg); rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegImm(kOpCmp, rl_src.low_reg, 0); - OpIT(kCondEq, "E"); - LIR* l1 = OpRegCopy(rl_result.low_reg, rl_true.low_reg); - l1->flags.is_nop = false; // Make sure this instruction isn't optimized away - LIR* l2 = OpRegCopy(rl_result.low_reg, rl_false.low_reg); - l2->flags.is_nop = false; // Make sure this instruction isn't optimized away + if (rl_result.low_reg == rl_true.low_reg) { // Is the "true" case already in place? + OpIT(kCondNe, ""); + OpRegCopy(rl_result.low_reg, rl_false.low_reg); + } else if (rl_result.low_reg == rl_false.low_reg) { // False case in place? + OpIT(kCondEq, ""); + OpRegCopy(rl_result.low_reg, rl_true.low_reg); + } else { // Normal - select between the two. + OpIT(kCondEq, "E"); + OpRegCopy(rl_result.low_reg, rl_true.low_reg); + OpRegCopy(rl_result.low_reg, rl_false.low_reg); + } GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact } StoreValue(rl_dest, rl_result); @@ -265,8 +253,8 @@ void ArmMir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { return; } } - LIR* taken = &block_label_list_[bb->taken->id]; - LIR* not_taken = &block_label_list_[bb->fall_through->id]; + LIR* taken = &block_label_list_[bb->taken]; + LIR* not_taken = &block_label_list_[bb->fall_through]; rl_src1 = LoadValueWide(rl_src1, kCoreReg); rl_src2 = LoadValueWide(rl_src2, kCoreReg); OpRegReg(kOpCmp, rl_src1.high_reg, rl_src2.high_reg); @@ -313,7 +301,18 @@ LIR* ArmMir2Lir::OpCmpImmBranch(ConditionCode cond, int reg, int check_value, LIR* branch; int mod_imm; ArmConditionCode arm_cond = ArmConditionEncoding(cond); - if ((ARM_LOWREG(reg)) && (check_value == 0) && + /* + * A common use of OpCmpImmBranch is for null checks, and using the Thumb 16-bit + * compare-and-branch if zero is ideal if it will reach. However, because null checks + * branch forward to a launch pad, they will frequently not reach - and thus have to + * be converted to a long form during assembly (which will trigger another assembly + * pass). Here we estimate the branch distance for checks, and if large directly + * generate the long form in an attempt to avoid an extra assembly pass. + * TODO: consider interspersing launchpads in code following unconditional branches. + */ + bool skip = ((target != NULL) && (target->opcode == kPseudoThrowTarget)); + skip &= ((cu_->code_item->insns_size_in_code_units_ - current_dalvik_offset_) > 64); + if (!skip && (ARM_LOWREG(reg)) && (check_value == 0) && ((arm_cond == kArmCondEq) || (arm_cond == kArmCondNe))) { branch = NewLIR2((arm_cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz, reg, 0); @@ -467,14 +466,39 @@ LIR* ArmMir2Lir::GenRegMemCheck(ConditionCode c_code, RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, int reg1, int lit, bool is_div) { - LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm"; - return rl_dest; + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + + // Put the literal in a temp. + int lit_temp = AllocTemp(); + LoadConstant(lit_temp, lit); + // Use the generic case for div/rem with arg2 in a register. + // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure. + rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div); + FreeTemp(lit_temp); + + return rl_result; } RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, int reg1, int reg2, bool is_div) { - LOG(FATAL) << "Unexpected use of GenDivRem for Arm"; - return rl_dest; + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (is_div) { + // Simple case, use sdiv instruction. + OpRegRegReg(kOpDiv, rl_result.low_reg, reg1, reg2); + } else { + // Remainder case, use the following code: + // temp = reg1 / reg2 - integer division + // temp = temp * reg2 + // dest = reg1 - temp + + int temp = AllocTemp(); + OpRegRegReg(kOpDiv, temp, reg1, reg2); + OpRegReg(kOpMul, temp, reg2); + OpRegRegReg(kOpSub, rl_result.low_reg, reg1, temp); + FreeTemp(temp); + } + + return rl_result; } bool ArmMir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { @@ -494,6 +518,50 @@ bool ArmMir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { return true; } +bool ArmMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { + RegLocation rl_src_address = info->args[0]; // long address + rl_src_address.wide = 0; // ignore high half in info->args[1] + RegLocation rl_dest = InlineTarget(info); + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (size == kLong) { + // Fake unaligned LDRD by two unaligned LDR instructions on ARMv7 with SCTLR.A set to 0. + if (rl_address.low_reg != rl_result.low_reg) { + LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, kWord, INVALID_SREG); + LoadBaseDisp(rl_address.low_reg, 4, rl_result.high_reg, kWord, INVALID_SREG); + } else { + LoadBaseDisp(rl_address.low_reg, 4, rl_result.high_reg, kWord, INVALID_SREG); + LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, kWord, INVALID_SREG); + } + StoreValueWide(rl_dest, rl_result); + } else { + DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); + // Unaligned load with LDR and LDRSH is allowed on ARMv7 with SCTLR.A set to 0. + LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, size, INVALID_SREG); + StoreValue(rl_dest, rl_result); + } + return true; +} + +bool ArmMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { + RegLocation rl_src_address = info->args[0]; // long address + rl_src_address.wide = 0; // ignore high half in info->args[1] + RegLocation rl_src_value = info->args[2]; // [size] value + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + if (size == kLong) { + // Fake unaligned STRD by two unaligned STR instructions on ARMv7 with SCTLR.A set to 0. + RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); + StoreBaseDisp(rl_address.low_reg, 0, rl_value.low_reg, kWord); + StoreBaseDisp(rl_address.low_reg, 4, rl_value.high_reg, kWord); + } else { + DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); + // Unaligned store with STR and STRSH is allowed on ARMv7 with SCTLR.A set to 0. + RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); + StoreBaseDisp(rl_address.low_reg, 0, rl_value.low_reg, size); + } + return true; +} + void ArmMir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) { LOG(FATAL) << "Unexpected use of OpLea for Arm"; } @@ -618,7 +686,7 @@ void ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { break; } LIR* dmb = NewLIR1(kThumb2Dmb, dmb_flavor); - dmb->def_mask = ENCODE_ALL; + dmb->u.m.def_mask = ENCODE_ALL; #endif } @@ -755,7 +823,7 @@ void ArmMir2Lir::GenXorLong(RegLocation rl_dest, RegLocation rl_src1, * Generate array load */ void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_dest, int scale) { + RegLocation rl_index, RegLocation rl_dest, int scale) { RegisterClass reg_class = oat_reg_class_by_size(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; @@ -845,13 +913,13 @@ void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, * */ void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) { + RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { RegisterClass reg_class = oat_reg_class_by_size(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); - int data_offset; bool constant_index = rl_index.is_const; - if (rl_src.wide) { + int data_offset; + if (size == kLong || size == kDouble) { data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); } else { data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); @@ -868,12 +936,14 @@ void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, } int reg_ptr; + bool allocated_reg_ptr_temp = false; if (constant_index) { reg_ptr = rl_array.low_reg; - } else if (IsTemp(rl_array.low_reg)) { + } else if (IsTemp(rl_array.low_reg) && !card_mark) { Clobber(rl_array.low_reg); reg_ptr = rl_array.low_reg; } else { + allocated_reg_ptr_temp = true; reg_ptr = AllocTemp(); } @@ -924,71 +994,15 @@ void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, StoreBaseIndexed(reg_ptr, rl_index.low_reg, rl_src.low_reg, scale, size); } - if (!constant_index) { + if (allocated_reg_ptr_temp) { FreeTemp(reg_ptr); } -} - -/* - * Generate array store - * - */ -void ArmMir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) { - int len_offset = mirror::Array::LengthOffset().Int32Value(); - int data_offset = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(); - - FlushAllRegs(); // Use explicit registers - LockCallTemps(); - - int r_value = TargetReg(kArg0); // Register holding value - int r_array_class = TargetReg(kArg1); // Register holding array's Class - int r_array = TargetReg(kArg2); // Register holding array - int r_index = TargetReg(kArg3); // Register holding index into array - - LoadValueDirectFixed(rl_array, r_array); // Grab array - LoadValueDirectFixed(rl_src, r_value); // Grab value - LoadValueDirectFixed(rl_index, r_index); // Grab index - - GenNullCheck(rl_array.s_reg_low, r_array, opt_flags); // NPE? - - // Store of null? - LIR* null_value_check = OpCmpImmBranch(kCondEq, r_value, 0, NULL); - - // Get the array's class. - LoadWordDisp(r_array, mirror::Object::ClassOffset().Int32Value(), r_array_class); - CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCanPutArrayElement), r_value, - r_array_class, true); - // Redo LoadValues in case they didn't survive the call. - LoadValueDirectFixed(rl_array, r_array); // Reload array - LoadValueDirectFixed(rl_index, r_index); // Reload index - LoadValueDirectFixed(rl_src, r_value); // Reload value - r_array_class = INVALID_REG; - - // Branch here if value to be stored == null - LIR* target = NewLIR0(kPseudoTargetLabel); - null_value_check->target = target; - - bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); - int reg_len = INVALID_REG; - if (needs_range_check) { - reg_len = TargetReg(kArg1); - LoadWordDisp(r_array, len_offset, reg_len); // Get len - } - /* r_ptr -> array data */ - int r_ptr = AllocTemp(); - OpRegRegImm(kOpAdd, r_ptr, r_array, data_offset); - if (needs_range_check) { - GenRegRegCheck(kCondCs, r_index, reg_len, kThrowArrayBounds); - } - StoreBaseIndexed(r_ptr, r_index, r_value, scale, kWord); - FreeTemp(r_ptr); - FreeTemp(r_index); - if (!mir_graph_->IsConstantNullRef(rl_src)) { - MarkGCCard(r_value, r_array); + if (card_mark) { + MarkGCCard(rl_src.low_reg, rl_array.low_reg); } } + void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) { rl_src = LoadValueWide(rl_src, kCoreReg); diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index 6cc3052da1..52aba9b4df 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -74,6 +74,8 @@ int ArmMir2Lir::TargetReg(SpecialTargetRegister reg) { case kRet0: res = rARM_RET0; break; case kRet1: res = rARM_RET1; break; case kInvokeTgt: res = rARM_INVOKE_TGT; break; + case kHiddenArg: res = r12; break; + case kHiddenFpArg: res = INVALID_REG; break; case kCount: res = rARM_COUNT; break; } return res; @@ -118,78 +120,83 @@ uint64_t ArmMir2Lir::GetPCUseDefEncoding() { return ENCODE_ARM_REG_PC; } -void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir) { +// Thumb2 specific setup. TODO: inline?: +void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { DCHECK_EQ(cu_->instruction_set, kThumb2); + DCHECK(!lir->flags.use_def_invalid); - // Thumb2 specific setup - uint64_t flags = ArmMir2Lir::EncodingMap[lir->opcode].flags; int opcode = lir->opcode; - if (flags & REG_DEF_SP) { - lir->def_mask |= ENCODE_ARM_REG_SP; - } + // These flags are somewhat uncommon - bypass if we can. + if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 | REG_DEF_LIST1 | + REG_DEF_FPCS_LIST0 | REG_DEF_FPCS_LIST2 | REG_USE_PC | IS_IT | REG_USE_LIST0 | + REG_USE_LIST1 | REG_USE_FPCS_LIST0 | REG_USE_FPCS_LIST2 | REG_DEF_LR)) != 0) { + if (flags & REG_DEF_SP) { + lir->u.m.def_mask |= ENCODE_ARM_REG_SP; + } - if (flags & REG_USE_SP) { - lir->use_mask |= ENCODE_ARM_REG_SP; - } + if (flags & REG_USE_SP) { + lir->u.m.use_mask |= ENCODE_ARM_REG_SP; + } - if (flags & REG_DEF_LIST0) { - lir->def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); - } + if (flags & REG_DEF_LIST0) { + lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); + } - if (flags & REG_DEF_LIST1) { - lir->def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); - } + if (flags & REG_DEF_LIST1) { + lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); + } - if (flags & REG_DEF_FPCS_LIST0) { - lir->def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); - } + if (flags & REG_DEF_FPCS_LIST0) { + lir->u.m.def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); + } - if (flags & REG_DEF_FPCS_LIST2) { - for (int i = 0; i < lir->operands[2]; i++) { - SetupRegMask(&lir->def_mask, lir->operands[1] + i); + if (flags & REG_DEF_FPCS_LIST2) { + for (int i = 0; i < lir->operands[2]; i++) { + SetupRegMask(&lir->u.m.def_mask, lir->operands[1] + i); + } } - } - if (flags & REG_USE_PC) { - lir->use_mask |= ENCODE_ARM_REG_PC; - } + if (flags & REG_USE_PC) { + lir->u.m.use_mask |= ENCODE_ARM_REG_PC; + } - /* Conservatively treat the IT block */ - if (flags & IS_IT) { - lir->def_mask = ENCODE_ALL; - } + /* Conservatively treat the IT block */ + if (flags & IS_IT) { + lir->u.m.def_mask = ENCODE_ALL; + } - if (flags & REG_USE_LIST0) { - lir->use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); - } + if (flags & REG_USE_LIST0) { + lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]); + } - if (flags & REG_USE_LIST1) { - lir->use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); - } + if (flags & REG_USE_LIST1) { + lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]); + } - if (flags & REG_USE_FPCS_LIST0) { - lir->use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); - } + if (flags & REG_USE_FPCS_LIST0) { + lir->u.m.use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]); + } - if (flags & REG_USE_FPCS_LIST2) { - for (int i = 0; i < lir->operands[2]; i++) { - SetupRegMask(&lir->use_mask, lir->operands[1] + i); + if (flags & REG_USE_FPCS_LIST2) { + for (int i = 0; i < lir->operands[2]; i++) { + SetupRegMask(&lir->u.m.use_mask, lir->operands[1] + i); + } } - } - /* Fixup for kThumbPush/lr and kThumbPop/pc */ - if (opcode == kThumbPush || opcode == kThumbPop) { - uint64_t r8Mask = GetRegMaskCommon(r8); - if ((opcode == kThumbPush) && (lir->use_mask & r8Mask)) { - lir->use_mask &= ~r8Mask; - lir->use_mask |= ENCODE_ARM_REG_LR; - } else if ((opcode == kThumbPop) && (lir->def_mask & r8Mask)) { - lir->def_mask &= ~r8Mask; - lir->def_mask |= ENCODE_ARM_REG_PC; + /* Fixup for kThumbPush/lr and kThumbPop/pc */ + if (opcode == kThumbPush || opcode == kThumbPop) { + uint64_t r8Mask = GetRegMaskCommon(r8); + if ((opcode == kThumbPush) && (lir->u.m.use_mask & r8Mask)) { + lir->u.m.use_mask &= ~r8Mask; + lir->u.m.use_mask |= ENCODE_ARM_REG_LR; + } else if ((opcode == kThumbPop) && (lir->u.m.def_mask & r8Mask)) { + lir->u.m.def_mask &= ~r8Mask; + lir->u.m.def_mask |= ENCODE_ARM_REG_PC; + } + } + if (flags & REG_DEF_LR) { + lir->u.m.def_mask |= ENCODE_ARM_REG_LR; } - } - if (flags & REG_DEF_LR) { - lir->def_mask |= ENCODE_ARM_REG_LR; } } @@ -277,8 +284,8 @@ static char* DecodeFPCSRegList(int count, int base, char* buf) { return buf; } -static int ExpandImmediate(int value) { - int mode = (value & 0xf00) >> 8; +static int32_t ExpandImmediate(int value) { + int32_t mode = (value & 0xf00) >> 8; uint32_t bits = value & 0xff; switch (mode) { case 0: @@ -466,8 +473,8 @@ void ArmMir2Lir::DumpResourceMask(LIR* arm_lir, uint64_t mask, const char* prefi /* Memory bits */ if (arm_lir && (mask & ENCODE_DALVIK_REG)) { - sprintf(buf + strlen(buf), "dr%d%s", arm_lir->alias_info & 0xffff, - (arm_lir->alias_info & 0x80000000) ? "(+1)" : ""); + sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info), + DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : ""); } if (mask & ENCODE_LITERAL) { strcat(buf, "lit "); @@ -691,11 +698,6 @@ RegLocation ArmMir2Lir::GetReturnAlt() { return res; } -ArmMir2Lir::RegisterInfo* ArmMir2Lir::GetRegInfo(int reg) { - return ARM_FPREG(reg) ? ®_pool_->FPRegs[reg & ARM_FP_REG_MASK] - : ®_pool_->core_regs[reg]; -} - /* To be used when explicitly managing register use */ void ArmMir2Lir::LockCallTemps() { LockTemp(r0); @@ -718,14 +720,17 @@ int ArmMir2Lir::LoadHelper(ThreadOffset offset) { } uint64_t ArmMir2Lir::GetTargetInstFlags(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return ArmMir2Lir::EncodingMap[opcode].flags; } const char* ArmMir2Lir::GetTargetInstName(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return ArmMir2Lir::EncodingMap[opcode].name; } const char* ArmMir2Lir::GetTargetInstFmt(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return ArmMir2Lir::EncodingMap[opcode].fmt; } diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc index c63de69284..d631cf7047 100644 --- a/compiler/dex/quick/arm/utility_arm.cc +++ b/compiler/dex/quick/arm/utility_arm.cc @@ -22,14 +22,14 @@ namespace art { /* This file contains codegen for the Thumb ISA. */ -static int EncodeImmSingle(int value) { - int res; - int bit_a = (value & 0x80000000) >> 31; - int not_bit_b = (value & 0x40000000) >> 30; - int bit_b = (value & 0x20000000) >> 29; - int b_smear = (value & 0x3e000000) >> 25; - int slice = (value & 0x01f80000) >> 19; - int zeroes = (value & 0x0007ffff); +static int32_t EncodeImmSingle(int32_t value) { + int32_t res; + int32_t bit_a = (value & 0x80000000) >> 31; + int32_t not_bit_b = (value & 0x40000000) >> 30; + int32_t bit_b = (value & 0x20000000) >> 29; + int32_t b_smear = (value & 0x3e000000) >> 25; + int32_t slice = (value & 0x01f80000) >> 19; + int32_t zeroes = (value & 0x0007ffff); if (zeroes != 0) return -1; if (bit_b) { @@ -47,15 +47,15 @@ static int EncodeImmSingle(int value) { * Determine whether value can be encoded as a Thumb2 floating point * immediate. If not, return -1. If so return encoded 8-bit value. */ -static int EncodeImmDouble(int64_t value) { - int res; - int bit_a = (value & 0x8000000000000000ll) >> 63; - int not_bit_b = (value & 0x4000000000000000ll) >> 62; - int bit_b = (value & 0x2000000000000000ll) >> 61; - int b_smear = (value & 0x3fc0000000000000ll) >> 54; - int slice = (value & 0x003f000000000000ll) >> 48; +static int32_t EncodeImmDouble(int64_t value) { + int32_t res; + int32_t bit_a = (value & 0x8000000000000000ll) >> 63; + int32_t not_bit_b = (value & 0x4000000000000000ll) >> 62; + int32_t bit_b = (value & 0x2000000000000000ll) >> 61; + int32_t b_smear = (value & 0x3fc0000000000000ll) >> 54; + int32_t slice = (value & 0x003f000000000000ll) >> 48; uint64_t zeroes = (value & 0x0000ffffffffffffll); - if (zeroes != 0) + if (zeroes != 0ull) return -1; if (bit_b) { if ((not_bit_b != 0) || (b_smear != 0xff)) @@ -90,15 +90,14 @@ LIR* ArmMir2Lir::LoadFPConstantValue(int r_dest, int value) { LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kThumb2Vldrs, r_dest, r15pc, 0, 0, 0, data_target); SetMemRefType(load_pc_rel, true, kLiteral); - load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target); AppendLIR(load_pc_rel); return load_pc_rel; } static int LeadingZeros(uint32_t val) { uint32_t alt; - int n; - int count; + int32_t n; + int32_t count; count = 16; n = 32; @@ -118,8 +117,8 @@ static int LeadingZeros(uint32_t val) { * immediate. If not, return -1. If so, return i:imm3:a:bcdefgh form. */ int ArmMir2Lir::ModifiedImmediate(uint32_t value) { - int z_leading; - int z_trailing; + int32_t z_leading; + int32_t z_trailing; uint32_t b0 = value & 0xff; /* Note: case of value==0 must use 0:000:0:0000000 encoding */ @@ -315,6 +314,22 @@ LIR* ArmMir2Lir::OpRegRegShift(OpKind op, int r_dest_src1, int r_src2, case kOpSub: opcode = (thumb_form) ? kThumbSubRRR : kThumb2SubRRR; break; + case kOpRev: + DCHECK_EQ(shift, 0); + if (!thumb_form) { + // Binary, but rm is encoded twice. + return NewLIR3(kThumb2RevRR, r_dest_src1, r_src2, r_src2); + } + opcode = kThumbRev; + break; + case kOpRevsh: + DCHECK_EQ(shift, 0); + if (!thumb_form) { + // Binary, but rm is encoded twice. + return NewLIR3(kThumb2RevshRR, r_dest_src1, r_src2, r_src2); + } + opcode = kThumbRevsh; + break; case kOp2Byte: DCHECK_EQ(shift, 0); return NewLIR4(kThumb2Sbfx, r_dest_src1, r_src2, 0, 8); @@ -328,7 +343,7 @@ LIR* ArmMir2Lir::OpRegRegShift(OpKind op, int r_dest_src1, int r_src2, LOG(FATAL) << "Bad opcode: " << op; break; } - DCHECK_GE(static_cast<int>(opcode), 0); + DCHECK(!IsPseudoLirOp(opcode)); if (EncodingMap[opcode].flags & IS_BINARY_OP) { return NewLIR2(opcode, r_dest_src1, r_src2); } else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) { @@ -380,6 +395,10 @@ LIR* ArmMir2Lir::OpRegRegRegShift(OpKind op, int r_dest, int r_src1, DCHECK_EQ(shift, 0); opcode = kThumb2MulRRR; break; + case kOpDiv: + DCHECK_EQ(shift, 0); + opcode = kThumb2SdivRRR; + break; case kOpOr: opcode = kThumb2OrrRRR; break; @@ -406,7 +425,7 @@ LIR* ArmMir2Lir::OpRegRegRegShift(OpKind op, int r_dest, int r_src1, LOG(FATAL) << "Bad opcode: " << op; break; } - DCHECK_GE(static_cast<int>(opcode), 0); + DCHECK(!IsPseudoLirOp(opcode)); if (EncodingMap[opcode].flags & IS_QUAD_OP) { return NewLIR4(opcode, r_dest, r_src1, r_src2, shift); } else { @@ -422,12 +441,12 @@ LIR* ArmMir2Lir::OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2) { LIR* ArmMir2Lir::OpRegRegImm(OpKind op, int r_dest, int r_src1, int value) { LIR* res; bool neg = (value < 0); - int abs_value = (neg) ? -value : value; + int32_t abs_value = (neg) ? -value : value; ArmOpcode opcode = kThumbBkpt; ArmOpcode alt_opcode = kThumbBkpt; bool all_low_regs = (ARM_LOWREG(r_dest) && ARM_LOWREG(r_src1)); - int mod_imm = ModifiedImmediate(value); - int mod_imm_neg = ModifiedImmediate(-value); + int32_t mod_imm = ModifiedImmediate(value); + int32_t mod_imm_neg = ModifiedImmediate(-value); switch (op) { case kOpLsl: @@ -545,7 +564,7 @@ LIR* ArmMir2Lir::OpRegRegImm(OpKind op, int r_dest, int r_src1, int value) { /* Handle Thumb-only variants here - otherwise punt to OpRegRegImm */ LIR* ArmMir2Lir::OpRegImm(OpKind op, int r_dest_src1, int value) { bool neg = (value < 0); - int abs_value = (neg) ? -value : value; + int32_t abs_value = (neg) ? -value : value; bool short_form = (((abs_value & 0xff) == abs_value) && ARM_LOWREG(r_dest_src1)); ArmOpcode opcode = kThumbBkpt; switch (op) { @@ -626,7 +645,6 @@ LIR* ArmMir2Lir::LoadConstantWide(int r_dest_lo, int r_dest_hi, int64_t value) { r_dest_lo, r_dest_hi, r15pc, 0, 0, data_target); } SetMemRefType(res, true, kLiteral); - res->alias_info = reinterpret_cast<uintptr_t>(data_target); AppendLIR(res); } return res; diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index a49fa7b44d..dfbc887299 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -45,29 +45,54 @@ bool Mir2Lir::IsInexpensiveConstant(RegLocation rl_src) { } void Mir2Lir::MarkSafepointPC(LIR* inst) { - inst->def_mask = ENCODE_ALL; + DCHECK(!inst->flags.use_def_invalid); + inst->u.m.def_mask = ENCODE_ALL; LIR* safepoint_pc = NewLIR0(kPseudoSafepointPC); - DCHECK_EQ(safepoint_pc->def_mask, ENCODE_ALL); + DCHECK_EQ(safepoint_pc->u.m.def_mask, ENCODE_ALL); } -bool Mir2Lir::FastInstance(uint32_t field_idx, int& field_offset, bool& is_volatile, bool is_put) { +bool Mir2Lir::FastInstance(uint32_t field_idx, bool is_put, int* field_offset, bool* is_volatile) { return cu_->compiler_driver->ComputeInstanceFieldInfo( - field_idx, mir_graph_->GetCurrentDexCompilationUnit(), field_offset, is_volatile, is_put); + field_idx, mir_graph_->GetCurrentDexCompilationUnit(), is_put, field_offset, is_volatile); +} + +/* Remove a LIR from the list. */ +void Mir2Lir::UnlinkLIR(LIR* lir) { + if (UNLIKELY(lir == first_lir_insn_)) { + first_lir_insn_ = lir->next; + if (lir->next != NULL) { + lir->next->prev = NULL; + } else { + DCHECK(lir->next == NULL); + DCHECK(lir == last_lir_insn_); + last_lir_insn_ = NULL; + } + } else if (lir == last_lir_insn_) { + last_lir_insn_ = lir->prev; + lir->prev->next = NULL; + } else if ((lir->prev != NULL) && (lir->next != NULL)) { + lir->prev->next = lir->next; + lir->next->prev = lir->prev; + } } /* Convert an instruction to a NOP */ void Mir2Lir::NopLIR(LIR* lir) { lir->flags.is_nop = true; + if (!cu_->verbose) { + UnlinkLIR(lir); + } } void Mir2Lir::SetMemRefType(LIR* lir, bool is_load, int mem_type) { uint64_t *mask_ptr; uint64_t mask = ENCODE_MEM; DCHECK(GetTargetInstFlags(lir->opcode) & (IS_LOAD | IS_STORE)); + DCHECK(!lir->flags.use_def_invalid); if (is_load) { - mask_ptr = &lir->use_mask; + mask_ptr = &lir->u.m.use_mask; } else { - mask_ptr = &lir->def_mask; + mask_ptr = &lir->u.m.def_mask; } /* Clear out the memref flags */ *mask_ptr &= ~mask; @@ -104,7 +129,7 @@ void Mir2Lir::AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, * Store the Dalvik register id in alias_info. Mark the MSB if it is a 64-bit * access. */ - lir->alias_info = ENCODE_ALIAS_INFO(reg_id, is64bit); + lir->flags.alias_info = ENCODE_ALIAS_INFO(reg_id, is64bit); } /* @@ -135,10 +160,12 @@ void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) { break; case kPseudoDalvikByteCodeBoundary: if (lir->operands[0] == 0) { - lir->operands[0] = reinterpret_cast<uintptr_t>("No instruction string"); + // NOTE: only used for debug listings. + lir->operands[0] = WrapPointer(ArenaStrdup("No instruction string")); } LOG(INFO) << "-------- dalvik offset: 0x" << std::hex - << lir->dalvik_offset << " @ " << reinterpret_cast<char*>(lir->operands[0]); + << lir->dalvik_offset << " @ " + << reinterpret_cast<char*>(UnwrapPointer(lir->operands[0])); break; case kPseudoExitBlock: LOG(INFO) << "-------- exit offset: 0x" << std::hex << dest; @@ -190,11 +217,11 @@ void Mir2Lir::DumpLIRInsn(LIR* lir, unsigned char* base_addr) { break; } - if (lir->use_mask && (!lir->flags.is_nop || dump_nop)) { - DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->use_mask, "use")); + if (lir->u.m.use_mask && (!lir->flags.is_nop || dump_nop)) { + DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.use_mask, "use")); } - if (lir->def_mask && (!lir->flags.is_nop || dump_nop)) { - DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->def_mask, "def")); + if (lir->u.m.def_mask && (!lir->flags.is_nop || dump_nop)) { + DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.def_mask, "def")); } } @@ -225,12 +252,12 @@ void Mir2Lir::DumpPromotionMap() { } /* Dump a mapping table */ -void Mir2Lir::DumpMappingTable(const char* table_name, const std::string& descriptor, - const std::string& name, const std::string& signature, +void Mir2Lir::DumpMappingTable(const char* table_name, const char* descriptor, + const char* name, const Signature& signature, const std::vector<uint32_t>& v) { if (v.size() > 0) { std::string line(StringPrintf("\n %s %s%s_%s_table[%zu] = {", table_name, - descriptor.c_str(), name.c_str(), signature.c_str(), v.size())); + descriptor, name, signature.ToString().c_str(), v.size())); std::replace(line.begin(), line.end(), ';', '_'); LOG(INFO) << line; for (uint32_t i = 0; i < v.size(); i+=2) { @@ -270,9 +297,9 @@ void Mir2Lir::CodegenDump() { const DexFile::MethodId& method_id = cu_->dex_file->GetMethodId(cu_->method_idx); - std::string signature(cu_->dex_file->GetMethodSignature(method_id)); - std::string name(cu_->dex_file->GetMethodName(method_id)); - std::string descriptor(cu_->dex_file->GetMethodDeclaringClassDescriptor(method_id)); + const Signature signature = cu_->dex_file->GetMethodSignature(method_id); + const char* name = cu_->dex_file->GetMethodName(method_id); + const char* descriptor(cu_->dex_file->GetMethodDeclaringClassDescriptor(method_id)); // Dump mapping tables DumpMappingTable("PC2Dex_MappingTable", descriptor, name, signature, pc2dex_mapping_table_); @@ -325,6 +352,7 @@ LIR* Mir2Lir::AddWordData(LIR* *constant_list_p, int value) { new_value->operands[0] = value; new_value->next = *constant_list_p; *constant_list_p = new_value; + estimated_native_code_size_ += sizeof(value); return new_value; } return NULL; @@ -343,6 +371,17 @@ static void PushWord(std::vector<uint8_t>&buf, int data) { buf.push_back((data >> 24) & 0xff); } +// Push 8 bytes on 64-bit systems; 4 on 32-bit systems. +static void PushPointer(std::vector<uint8_t>&buf, void const* pointer) { + uintptr_t data = reinterpret_cast<uintptr_t>(pointer); + if (sizeof(void*) == sizeof(uint64_t)) { + PushWord(buf, (data >> (sizeof(void*) * 4)) & 0xFFFFFFFF); + PushWord(buf, data & 0xFFFFFFFF); + } else { + PushWord(buf, data); + } +} + static void AlignBuffer(std::vector<uint8_t>&buf, size_t offset) { while (buf.size() < offset) { buf.push_back(0); @@ -369,9 +408,8 @@ void Mir2Lir::InstallLiteralPools() { static_cast<InvokeType>(data_lir->operands[1]), code_buffer_.size()); const DexFile::MethodId& id = cu_->dex_file->GetMethodId(target); - // unique based on target to ensure code deduplication works - uint32_t unique_patch_value = reinterpret_cast<uint32_t>(&id); - PushWord(code_buffer_, unique_patch_value); + // unique value based on target to ensure code deduplication works + PushPointer(code_buffer_, &id); data_lir = NEXT_LIR(data_lir); } data_lir = method_literal_list_; @@ -385,9 +423,8 @@ void Mir2Lir::InstallLiteralPools() { static_cast<InvokeType>(data_lir->operands[1]), code_buffer_.size()); const DexFile::MethodId& id = cu_->dex_file->GetMethodId(target); - // unique based on target to ensure code deduplication works - uint32_t unique_patch_value = reinterpret_cast<uint32_t>(&id); - PushWord(code_buffer_, unique_patch_value); + // unique value based on target to ensure code deduplication works + PushPointer(code_buffer_, &id); data_lir = NEXT_LIR(data_lir); } } @@ -408,6 +445,7 @@ void Mir2Lir::InstallSwitchTables() { int bx_offset = INVALID_OFFSET; switch (cu_->instruction_set) { case kThumb2: + DCHECK(tab_rec->anchor->flags.fixup != kFixupNone); bx_offset = tab_rec->anchor->offset + 4; break; case kX86: @@ -422,7 +460,7 @@ void Mir2Lir::InstallSwitchTables() { LOG(INFO) << "Switch table for offset 0x" << std::hex << bx_offset; } if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) { - const int* keys = reinterpret_cast<const int*>(&(tab_rec->table[2])); + const int32_t* keys = reinterpret_cast<const int32_t*>(&(tab_rec->table[2])); for (int elems = 0; elems < tab_rec->table[1]; elems++) { int disp = tab_rec->targets[elems]->offset - bx_offset; if (cu_->verbose) { @@ -463,7 +501,7 @@ void Mir2Lir::InstallFillArrayData() { } } -static int AssignLiteralOffsetCommon(LIR* lir, int offset) { +static int AssignLiteralOffsetCommon(LIR* lir, CodeOffset offset) { for (; lir != NULL; lir = lir->next) { lir->offset = offset; offset += 4; @@ -471,6 +509,17 @@ static int AssignLiteralOffsetCommon(LIR* lir, int offset) { return offset; } +static int AssignLiteralPointerOffsetCommon(LIR* lir, CodeOffset offset) { + unsigned int element_size = sizeof(void*); + // Align to natural pointer size. + offset = (offset + (element_size - 1)) & ~(element_size - 1); + for (; lir != NULL; lir = lir->next) { + lir->offset = offset; + offset += element_size; + } + return offset; +} + // Make sure we have a code address for every declared catch entry bool Mir2Lir::VerifyCatchEntries() { bool success = true; @@ -580,8 +629,8 @@ class NativePcToReferenceMapBuilder { table_index = (table_index + 1) % entries_; } in_use_[table_index] = true; - SetNativeOffset(table_index, native_offset); - DCHECK_EQ(native_offset, GetNativeOffset(table_index)); + SetCodeOffset(table_index, native_offset); + DCHECK_EQ(native_offset, GetCodeOffset(table_index)); SetReferences(table_index, references); } @@ -590,7 +639,7 @@ class NativePcToReferenceMapBuilder { return NativePcOffsetToReferenceMap::Hash(native_offset) % entries_; } - uint32_t GetNativeOffset(size_t table_index) { + uint32_t GetCodeOffset(size_t table_index) { uint32_t native_offset = 0; size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t); for (size_t i = 0; i < native_offset_width_; i++) { @@ -599,7 +648,7 @@ class NativePcToReferenceMapBuilder { return native_offset; } - void SetNativeOffset(size_t table_index, uint32_t native_offset) { + void SetCodeOffset(size_t table_index, uint32_t native_offset) { size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t); for (size_t i = 0; i < native_offset_width_; i++) { (*table_)[table_offset + i] = (native_offset >> (i * 8)) & 0xFF; @@ -654,17 +703,17 @@ void Mir2Lir::CreateNativeGcMap() { } /* Determine the offset of each literal field */ -int Mir2Lir::AssignLiteralOffset(int offset) { +int Mir2Lir::AssignLiteralOffset(CodeOffset offset) { offset = AssignLiteralOffsetCommon(literal_list_, offset); - offset = AssignLiteralOffsetCommon(code_literal_list_, offset); - offset = AssignLiteralOffsetCommon(method_literal_list_, offset); + offset = AssignLiteralPointerOffsetCommon(code_literal_list_, offset); + offset = AssignLiteralPointerOffsetCommon(method_literal_list_, offset); return offset; } -int Mir2Lir::AssignSwitchTablesOffset(int offset) { +int Mir2Lir::AssignSwitchTablesOffset(CodeOffset offset) { GrowableArray<SwitchTable*>::Iterator iterator(&switch_tables_); while (true) { - Mir2Lir::SwitchTable *tab_rec = iterator.Next(); + Mir2Lir::SwitchTable* tab_rec = iterator.Next(); if (tab_rec == NULL) break; tab_rec->offset = offset; if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) { @@ -678,7 +727,7 @@ int Mir2Lir::AssignSwitchTablesOffset(int offset) { return offset; } -int Mir2Lir::AssignFillArrayDataOffset(int offset) { +int Mir2Lir::AssignFillArrayDataOffset(CodeOffset offset) { GrowableArray<FillArrayData*>::Iterator iterator(&fill_array_data_); while (true) { Mir2Lir::FillArrayData *tab_rec = iterator.Next(); @@ -691,122 +740,35 @@ int Mir2Lir::AssignFillArrayDataOffset(int offset) { return offset; } -// LIR offset assignment. -int Mir2Lir::AssignInsnOffsets() { - LIR* lir; - int offset = 0; - - for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { - lir->offset = offset; - if (lir->opcode >= 0) { - if (!lir->flags.is_nop) { - offset += lir->flags.size; - } - } else if (lir->opcode == kPseudoPseudoAlign4) { - if (offset & 0x2) { - offset += 2; - lir->operands[0] = 1; - } else { - lir->operands[0] = 0; - } - } - /* Pseudo opcodes don't consume space */ - } - - return offset; -} - -/* - * Walk the compilation unit and assign offsets to instructions - * and literals and compute the total size of the compiled unit. - */ -void Mir2Lir::AssignOffsets() { - int offset = AssignInsnOffsets(); - - /* Const values have to be word aligned */ - offset = (offset + 3) & ~3; - - /* Set up offsets for literals */ - data_offset_ = offset; - - offset = AssignLiteralOffset(offset); - - offset = AssignSwitchTablesOffset(offset); - - offset = AssignFillArrayDataOffset(offset); - - total_size_ = offset; -} - -/* - * Go over each instruction in the list and calculate the offset from the top - * before sending them off to the assembler. If out-of-range branch distance is - * seen rearrange the instructions a bit to correct it. - */ -void Mir2Lir::AssembleLIR() { - AssignOffsets(); - int assembler_retries = 0; - /* - * Assemble here. Note that we generate code with optimistic assumptions - * and if found now to work, we'll have to redo the sequence and retry. - */ - - while (true) { - AssemblerStatus res = AssembleInstructions(0); - if (res == kSuccess) { - break; - } else { - assembler_retries++; - if (assembler_retries > MAX_ASSEMBLER_RETRIES) { - CodegenDump(); - LOG(FATAL) << "Assembler error - too many retries"; - } - // Redo offsets and try again - AssignOffsets(); - code_buffer_.clear(); - } - } - - // Install literals - InstallLiteralPools(); - - // Install switch tables - InstallSwitchTables(); - - // Install fill array data - InstallFillArrayData(); - - // Create the mapping table and native offset to reference map. - CreateMappingTables(); - - CreateNativeGcMap(); -} - /* * Insert a kPseudoCaseLabel at the beginning of the Dalvik - * offset vaddr. This label will be used to fix up the case - * branch table during the assembly phase. Be sure to set - * all resource flags on this to prevent code motion across - * target boundaries. KeyVal is just there for debugging. + * offset vaddr if pretty-printing, otherise use the standard block + * label. The selected label will be used to fix up the case + * branch table during the assembly phase. All resource flags + * are set to prevent code motion. KeyVal is just there for debugging. */ -LIR* Mir2Lir::InsertCaseLabel(int vaddr, int keyVal) { - SafeMap<unsigned int, LIR*>::iterator it; - it = boundary_map_.find(vaddr); - if (it == boundary_map_.end()) { - LOG(FATAL) << "Error: didn't find vaddr 0x" << std::hex << vaddr; +LIR* Mir2Lir::InsertCaseLabel(DexOffset vaddr, int keyVal) { + LIR* boundary_lir = &block_label_list_[mir_graph_->FindBlock(vaddr)->id]; + LIR* res = boundary_lir; + if (cu_->verbose) { + // Only pay the expense if we're pretty-printing. + LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR)); + new_label->dalvik_offset = vaddr; + new_label->opcode = kPseudoCaseLabel; + new_label->operands[0] = keyVal; + new_label->flags.fixup = kFixupLabel; + DCHECK(!new_label->flags.use_def_invalid); + new_label->u.m.def_mask = ENCODE_ALL; + InsertLIRAfter(boundary_lir, new_label); + res = new_label; } - LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR)); - new_label->dalvik_offset = vaddr; - new_label->opcode = kPseudoCaseLabel; - new_label->operands[0] = keyVal; - InsertLIRAfter(it->second, new_label); - return new_label; + return res; } -void Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable *tab_rec) { +void Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec) { const uint16_t* table = tab_rec->table; - int base_vaddr = tab_rec->vaddr; - const int *targets = reinterpret_cast<const int*>(&table[4]); + DexOffset base_vaddr = tab_rec->vaddr; + const int32_t *targets = reinterpret_cast<const int32_t*>(&table[4]); int entries = table[1]; int low_key = s4FromSwitchData(&table[2]); for (int i = 0; i < entries; i++) { @@ -814,12 +776,12 @@ void Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable *tab_rec) { } } -void Mir2Lir::MarkSparseCaseLabels(Mir2Lir::SwitchTable *tab_rec) { +void Mir2Lir::MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec) { const uint16_t* table = tab_rec->table; - int base_vaddr = tab_rec->vaddr; + DexOffset base_vaddr = tab_rec->vaddr; int entries = table[1]; - const int* keys = reinterpret_cast<const int*>(&table[2]); - const int* targets = &keys[entries]; + const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]); + const int32_t* targets = &keys[entries]; for (int i = 0; i < entries; i++) { tab_rec->targets[i] = InsertCaseLabel(base_vaddr + targets[i], keys[i]); } @@ -852,8 +814,8 @@ void Mir2Lir::DumpSparseSwitchTable(const uint16_t* table) { */ uint16_t ident = table[0]; int entries = table[1]; - const int* keys = reinterpret_cast<const int*>(&table[2]); - const int* targets = &keys[entries]; + const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]); + const int32_t* targets = &keys[entries]; LOG(INFO) << "Sparse switch table - ident:0x" << std::hex << ident << ", entries: " << std::dec << entries; for (int i = 0; i < entries; i++) { @@ -872,7 +834,7 @@ void Mir2Lir::DumpPackedSwitchTable(const uint16_t* table) { * Total size is (4+size*2) 16-bit code units. */ uint16_t ident = table[0]; - const int* targets = reinterpret_cast<const int*>(&table[4]); + const int32_t* targets = reinterpret_cast<const int32_t*>(&table[4]); int entries = table[1]; int low_key = s4FromSwitchData(&table[2]); LOG(INFO) << "Packed switch table - ident:0x" << std::hex << ident @@ -883,18 +845,10 @@ void Mir2Lir::DumpPackedSwitchTable(const uint16_t* table) { } } -/* - * Set up special LIR to mark a Dalvik byte-code instruction start and - * record it in the boundary_map. NOTE: in cases such as kMirOpCheck in - * which we split a single Dalvik instruction, only the first MIR op - * associated with a Dalvik PC should be entered into the map. - */ -LIR* Mir2Lir::MarkBoundary(int offset, const char* inst_str) { - LIR* res = NewLIR1(kPseudoDalvikByteCodeBoundary, reinterpret_cast<uintptr_t>(inst_str)); - if (boundary_map_.find(offset) == boundary_map_.end()) { - boundary_map_.Put(offset, res); - } - return res; +/* Set up special LIR to mark a Dalvik byte-code instruction start for pretty printing */ +void Mir2Lir::MarkBoundary(DexOffset offset, const char* inst_str) { + // NOTE: only used for debug listings. + NewLIR1(kPseudoDalvikByteCodeBoundary, WrapPointer(ArenaStrdup(inst_str))); } bool Mir2Lir::EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) { @@ -942,6 +896,7 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena literal_list_(NULL), method_literal_list_(NULL), code_literal_list_(NULL), + first_fixup_(NULL), cu_(cu), mir_graph_(mir_graph), switch_tables_(arena, 4, kGrowableArraySwitchTables), @@ -949,10 +904,14 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena throw_launchpads_(arena, 2048, kGrowableArrayThrowLaunchPads), suspend_launchpads_(arena, 4, kGrowableArraySuspendLaunchPads), intrinsic_launchpads_(arena, 2048, kGrowableArrayMisc), + tempreg_info_(arena, 20, kGrowableArrayMisc), + reginfo_map_(arena, 64, kGrowableArrayMisc), + pointer_storage_(arena, 128, kGrowableArrayMisc), data_offset_(0), total_size_(0), block_label_list_(NULL), current_dalvik_offset_(0), + estimated_native_code_size_(0), reg_pool_(NULL), live_sreg_(0), num_core_spills_(0), @@ -965,9 +924,13 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena promotion_map_ = static_cast<PromotionMap*> (arena_->Alloc((cu_->num_dalvik_registers + cu_->num_compiler_temps + 1) * sizeof(promotion_map_[0]), ArenaAllocator::kAllocRegAlloc)); + // Reserve pointer id 0 for NULL. + size_t null_idx = WrapPointer(NULL); + DCHECK_EQ(null_idx, 0U); } void Mir2Lir::Materialize() { + cu_->NewTimingSplit("RegisterAllocation"); CompilerInitializeRegAlloc(); // Needs to happen after SSA naming /* Allocate Registers using simple local allocation scheme */ @@ -979,6 +942,7 @@ void Mir2Lir::Materialize() { * special codegen doesn't succeed, first_lir_insn_ will * set to NULL; */ + cu_->NewTimingSplit("SpecialMIR2LIR"); SpecialMIR2LIR(mir_graph_->GetSpecialCase()); } @@ -1091,5 +1055,4 @@ void Mir2Lir::InsertLIRAfter(LIR* current_lir, LIR* new_lir) { new_lir->next->prev = new_lir; } - } // namespace art diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index f018c61819..df6493dc77 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -30,16 +30,17 @@ namespace art { */ /* - * Generate an kPseudoBarrier marker to indicate the boundary of special + * Generate a kPseudoBarrier marker to indicate the boundary of special * blocks. */ void Mir2Lir::GenBarrier() { LIR* barrier = NewLIR0(kPseudoBarrier); /* Mark all resources as being clobbered */ - barrier->def_mask = -1; + DCHECK(!barrier->flags.use_def_invalid); + barrier->u.m.def_mask = ENCODE_ALL; } -// FIXME: need to do some work to split out targets with +// TODO: need to do some work to split out targets with // condition codes and those without LIR* Mir2Lir::GenCheck(ConditionCode c_code, ThrowKind kind) { DCHECK_NE(cu_->instruction_set, kMips); @@ -65,8 +66,7 @@ LIR* Mir2Lir::GenImmedCheck(ConditionCode c_code, int reg, int imm_val, ThrowKin /* Perform null-check on a register. */ LIR* Mir2Lir::GenNullCheck(int s_reg, int m_reg, int opt_flags) { - if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && - opt_flags & MIR_IGNORE_NULL_CHECK) { + if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) { return NULL; } return GenImmedCheck(kCondEq, m_reg, 0, kThrowNullPointer); @@ -127,13 +127,11 @@ void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1, InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src2))) { // OK - convert this to a compare immediate and branch OpCmpImmBranch(cond, rl_src1.low_reg, mir_graph_->ConstantValue(rl_src2), taken); - OpUnconditionalBranch(fall_through); return; } } rl_src2 = LoadValue(rl_src2, kCoreReg); OpCmpBranch(cond, rl_src1.low_reg, rl_src2.low_reg, taken); - OpUnconditionalBranch(fall_through); } void Mir2Lir::GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src, LIR* taken, @@ -164,7 +162,6 @@ void Mir2Lir::GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_s LOG(FATAL) << "Unexpected opcode " << opcode; } OpCmpImmBranch(cond, rl_src.low_reg, 0, taken); - OpUnconditionalBranch(fall_through); } void Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) { @@ -337,8 +334,8 @@ void Mir2Lir::GenSput(uint32_t field_idx, RegLocation rl_src, bool is_long_or_do bool is_volatile; bool is_referrers_class; bool fast_path = cu_->compiler_driver->ComputeStaticFieldInfo( - field_idx, mir_graph_->GetCurrentDexCompilationUnit(), field_offset, ssb_index, - is_referrers_class, is_volatile, true); + field_idx, mir_graph_->GetCurrentDexCompilationUnit(), true, + &field_offset, &ssb_index, &is_referrers_class, &is_volatile); if (fast_path && !SLOW_FIELD_PATH) { DCHECK_GE(field_offset, 0); int rBase; @@ -423,8 +420,8 @@ void Mir2Lir::GenSget(uint32_t field_idx, RegLocation rl_dest, bool is_volatile; bool is_referrers_class; bool fast_path = cu_->compiler_driver->ComputeStaticFieldInfo( - field_idx, mir_graph_->GetCurrentDexCompilationUnit(), field_offset, ssb_index, - is_referrers_class, is_volatile, false); + field_idx, mir_graph_->GetCurrentDexCompilationUnit(), false, + &field_offset, &ssb_index, &is_referrers_class, &is_volatile); if (fast_path && !SLOW_FIELD_PATH) { DCHECK_GE(field_offset, 0); int rBase; @@ -506,7 +503,7 @@ void Mir2Lir::HandleSuspendLaunchPads() { ResetRegPool(); ResetDefTracking(); LIR* lab = suspend_launchpads_.Get(i); - LIR* resume_lab = reinterpret_cast<LIR*>(lab->operands[0]); + LIR* resume_lab = reinterpret_cast<LIR*>(UnwrapPointer(lab->operands[0])); current_dalvik_offset_ = lab->operands[1]; AppendLIR(lab); int r_tgt = CallHelperSetup(helper_offset); @@ -521,12 +518,12 @@ void Mir2Lir::HandleIntrinsicLaunchPads() { ResetRegPool(); ResetDefTracking(); LIR* lab = intrinsic_launchpads_.Get(i); - CallInfo* info = reinterpret_cast<CallInfo*>(lab->operands[0]); + CallInfo* info = reinterpret_cast<CallInfo*>(UnwrapPointer(lab->operands[0])); current_dalvik_offset_ = info->offset; AppendLIR(lab); // NOTE: GenInvoke handles MarkSafepointPC GenInvoke(info); - LIR* resume_lab = reinterpret_cast<LIR*>(lab->operands[2]); + LIR* resume_lab = reinterpret_cast<LIR*>(UnwrapPointer(lab->operands[2])); if (resume_lab != NULL) { OpUnconditionalBranch(resume_lab); } @@ -626,7 +623,7 @@ void Mir2Lir::GenIGet(uint32_t field_idx, int opt_flags, OpSize size, int field_offset; bool is_volatile; - bool fast_path = FastInstance(field_idx, field_offset, is_volatile, false); + bool fast_path = FastInstance(field_idx, false, &field_offset, &is_volatile); if (fast_path && !SLOW_FIELD_PATH) { RegLocation rl_result; @@ -687,8 +684,7 @@ void Mir2Lir::GenIPut(uint32_t field_idx, int opt_flags, OpSize size, int field_offset; bool is_volatile; - bool fast_path = FastInstance(field_idx, field_offset, is_volatile, - true); + bool fast_path = FastInstance(field_idx, true, &field_offset, &is_volatile); if (fast_path && !SLOW_FIELD_PATH) { RegisterClass reg_class = oat_reg_class_by_size(size); DCHECK_GE(field_offset, 0); @@ -730,6 +726,18 @@ void Mir2Lir::GenIPut(uint32_t field_idx, int opt_flags, OpSize size, } } +void Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index, + RegLocation rl_src) { + bool needs_range_check = !(opt_flags & MIR_IGNORE_RANGE_CHECK); + bool needs_null_check = !((cu_->disable_opt & (1 << kNullCheckElimination)) && + (opt_flags & MIR_IGNORE_NULL_CHECK)); + ThreadOffset helper = needs_range_check + ? (needs_null_check ? QUICK_ENTRYPOINT_OFFSET(pAputObjectWithNullAndBoundCheck) + : QUICK_ENTRYPOINT_OFFSET(pAputObjectWithBoundCheck)) + : QUICK_ENTRYPOINT_OFFSET(pAputObject); + CallRuntimeHelperRegLocationRegLocationRegLocation(helper, rl_array, rl_index, rl_src, true); +} + void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) { RegLocation rl_method = LoadCurrMethod(); int res_reg = AllocTemp(); @@ -1113,8 +1121,8 @@ void Mir2Lir::GenCheckCast(uint32_t insn_idx, uint32_t type_idx, RegLocation rl_ if (!type_known_abstract) { branch2 = OpCmpBranch(kCondEq, TargetReg(kArg1), class_reg, NULL); } - CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCheckCast), TargetReg(kArg1), - TargetReg(kArg2), true); + CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCheckCast), TargetReg(kArg2), + TargetReg(kArg1), true); /* branch target here */ LIR* target = NewLIR0(kPseudoTargetLabel); branch1->target = target; @@ -1299,6 +1307,7 @@ void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, } StoreValue(rl_dest, rl_result); } else { + bool done = false; // Set to true if we happen to find a way to use a real instruction. if (cu_->instruction_set == kMips) { rl_src1 = LoadValue(rl_src1, kCoreReg); rl_src2 = LoadValue(rl_src2, kCoreReg); @@ -1306,7 +1315,23 @@ void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, GenImmedCheck(kCondEq, rl_src2.low_reg, 0, kThrowDivZero); } rl_result = GenDivRem(rl_dest, rl_src1.low_reg, rl_src2.low_reg, op == kOpDiv); - } else { + done = true; + } else if (cu_->instruction_set == kThumb2) { + if (cu_->GetInstructionSetFeatures().HasDivideInstruction()) { + // Use ARM SDIV instruction for division. For remainder we also need to + // calculate using a MUL and subtract. + rl_src1 = LoadValue(rl_src1, kCoreReg); + rl_src2 = LoadValue(rl_src2, kCoreReg); + if (check_zero) { + GenImmedCheck(kCondEq, rl_src2.low_reg, 0, kThrowDivZero); + } + rl_result = GenDivRem(rl_dest, rl_src1.low_reg, rl_src2.low_reg, op == kOpDiv); + done = true; + } + } + + // If we haven't already generated the code use the callout function. + if (!done) { ThreadOffset func_offset = QUICK_ENTRYPOINT_OFFSET(pIdivmod); FlushAllRegs(); /* Send everything to home location */ LoadValueDirectFixed(rl_src2, TargetReg(kArg1)); @@ -1315,7 +1340,7 @@ void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, if (check_zero) { GenImmedCheck(kCondEq, TargetReg(kArg1), 0, kThrowDivZero); } - // NOTE: callout here is not a safepoint + // NOTE: callout here is not a safepoint. CallHelper(r_tgt, func_offset, false /* not a safepoint */); if (op == kOpDiv) rl_result = GetReturn(false); @@ -1343,7 +1368,7 @@ static bool IsPopCountLE2(unsigned int x) { } // Returns the index of the lowest set bit in 'x'. -static int LowestSetBit(unsigned int x) { +static int32_t LowestSetBit(uint32_t x) { int bit_posn = 0; while ((x & 0xf) == 0) { bit_posn += 4; @@ -1553,11 +1578,24 @@ void Mir2Lir::GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest, Re if (HandleEasyDivRem(opcode, is_div, rl_src, rl_dest, lit)) { return; } + + bool done = false; if (cu_->instruction_set == kMips) { rl_src = LoadValue(rl_src, kCoreReg); rl_result = GenDivRemLit(rl_dest, rl_src.low_reg, lit, is_div); - } else { - FlushAllRegs(); /* Everything to home location */ + done = true; + } else if (cu_->instruction_set == kThumb2) { + if (cu_->GetInstructionSetFeatures().HasDivideInstruction()) { + // Use ARM SDIV instruction for division. For remainder we also need to + // calculate using a MUL and subtract. + rl_src = LoadValue(rl_src, kCoreReg); + rl_result = GenDivRemLit(rl_dest, rl_src.low_reg, lit, is_div); + done = true; + } + } + + if (!done) { + FlushAllRegs(); /* Everything to home location. */ LoadValueDirectFixed(rl_src, TargetReg(kArg0)); Clobber(TargetReg(kArg0)); ThreadOffset func_offset = QUICK_ENTRYPOINT_OFFSET(pIdivmod); @@ -1575,7 +1613,7 @@ void Mir2Lir::GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest, Re } rl_src = LoadValue(rl_src, kCoreReg); rl_result = EvalLoc(rl_dest, kCoreReg, true); - // Avoid shifts by literal 0 - no support in Thumb. Change to copy + // Avoid shifts by literal 0 - no support in Thumb. Change to copy. if (shift_op && (lit == 0)) { OpRegCopy(rl_result.low_reg, rl_src.low_reg); } else { @@ -1651,7 +1689,7 @@ void Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, case Instruction::REM_LONG_2ADDR: call_out = true; check_zero = true; - func_offset = QUICK_ENTRYPOINT_OFFSET(pLdivmod); + func_offset = QUICK_ENTRYPOINT_OFFSET(pLmod); /* NOTE - for Arm, result is in kArg2/kArg3 instead of kRet0/kRet1 */ ret_reg = (cu_->instruction_set == kThumb2) ? TargetReg(kArg2) : TargetReg(kRet0); break; @@ -1744,8 +1782,8 @@ void Mir2Lir::GenSuspendTest(int opt_flags) { FlushAllRegs(); LIR* branch = OpTestSuspend(NULL); LIR* ret_lab = NewLIR0(kPseudoTargetLabel); - LIR* target = RawLIR(current_dalvik_offset_, kPseudoSuspendTarget, - reinterpret_cast<uintptr_t>(ret_lab), current_dalvik_offset_); + LIR* target = RawLIR(current_dalvik_offset_, kPseudoSuspendTarget, WrapPointer(ret_lab), + current_dalvik_offset_); branch->target = target; suspend_launchpads_.Insert(target); } @@ -1758,11 +1796,23 @@ void Mir2Lir::GenSuspendTestAndBranch(int opt_flags, LIR* target) { } OpTestSuspend(target); LIR* launch_pad = - RawLIR(current_dalvik_offset_, kPseudoSuspendTarget, - reinterpret_cast<uintptr_t>(target), current_dalvik_offset_); + RawLIR(current_dalvik_offset_, kPseudoSuspendTarget, WrapPointer(target), + current_dalvik_offset_); FlushAllRegs(); OpUnconditionalBranch(launch_pad); suspend_launchpads_.Insert(launch_pad); } +/* Call out to helper assembly routine that will null check obj and then lock it. */ +void Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { + FlushAllRegs(); + CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(pLockObject), rl_src, true); +} + +/* Call out to helper assembly routine that will null check obj and then unlock it. */ +void Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { + FlushAllRegs(); + CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(pUnlockObject), rl_src, true); +} + } // namespace art diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 2a0a23c7cd..d1a9a132bc 100644 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -214,6 +214,7 @@ void Mir2Lir::CallRuntimeHelperImmRegLocationRegLocation(ThreadOffset helper_off int arg0, RegLocation arg1, RegLocation arg2, bool safepoint_pc) { int r_tgt = CallHelperSetup(helper_offset); + DCHECK_EQ(arg1.wide, 0U); LoadValueDirectFixed(arg1, TargetReg(kArg1)); if (arg2.wide == 0) { LoadValueDirectFixed(arg2, TargetReg(kArg2)); @@ -225,6 +226,21 @@ void Mir2Lir::CallRuntimeHelperImmRegLocationRegLocation(ThreadOffset helper_off CallHelper(r_tgt, helper_offset, safepoint_pc); } +void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocation(ThreadOffset helper_offset, + RegLocation arg0, RegLocation arg1, + RegLocation arg2, + bool safepoint_pc) { + int r_tgt = CallHelperSetup(helper_offset); + DCHECK_EQ(arg0.wide, 0U); + LoadValueDirectFixed(arg0, TargetReg(kArg0)); + DCHECK_EQ(arg1.wide, 0U); + LoadValueDirectFixed(arg1, TargetReg(kArg1)); + DCHECK_EQ(arg1.wide, 0U); + LoadValueDirectFixed(arg2, TargetReg(kArg2)); + ClobberCalleeSave(); + CallHelper(r_tgt, helper_offset, safepoint_pc); +} + /* * If there are any ins passed in registers that have not been promoted * to a callee-save register, flush them to the frame. Perform intial @@ -334,16 +350,13 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info, uintptr_t direct_code, uintptr_t direct_method, InvokeType type) { Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); - if (cu->instruction_set != kThumb2) { - // Disable sharpening - direct_code = 0; - direct_method = 0; - } if (direct_code != 0 && direct_method != 0) { switch (state) { case 0: // Get the current Method* [sets kArg0] if (direct_code != static_cast<unsigned int>(-1)) { - cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code); + if (cu->instruction_set != kX86) { + cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code); + } } else { CHECK_EQ(cu->dex_file, target_method.dex_file); LIR* data_target = cg->ScanLiteralPool(cg->code_literal_list_, @@ -389,6 +402,7 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info, cg->LoadConstant(cg->TargetReg(kInvokeTgt), direct_code); } else { CHECK_EQ(cu->dex_file, target_method.dex_file); + CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds()); LIR* data_target = cg->ScanLiteralPool(cg->code_literal_list_, target_method.dex_method_index, 0); if (data_target == NULL) { @@ -477,73 +491,56 @@ static int NextVCallInsn(CompilationUnit* cu, CallInfo* info, } /* - * All invoke-interface calls bounce off of art_quick_invoke_interface_trampoline, - * which will locate the target and continue on via a tail call. + * Emit the next instruction in an invoke interface sequence. This will do a lookup in the + * class's IMT, calling either the actual method or art_quick_imt_conflict_trampoline if + * more than one interface method map to the same index. Note also that we'll load the first + * argument ("this") into kArg1 here rather than the standard LoadArgRegs. */ static int NextInterfaceCallInsn(CompilationUnit* cu, CallInfo* info, int state, const MethodReference& target_method, - uint32_t unused, uintptr_t unused2, - uintptr_t direct_method, InvokeType unused4) { + uint32_t method_idx, uintptr_t unused, + uintptr_t direct_method, InvokeType unused2) { Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get()); - if (cu->instruction_set != kThumb2) { - // Disable sharpening - direct_method = 0; - } - ThreadOffset trampoline = QUICK_ENTRYPOINT_OFFSET(pInvokeInterfaceTrampoline); - if (direct_method != 0) { - switch (state) { - case 0: // Load the trampoline target [sets kInvokeTgt]. - if (cu->instruction_set != kX86) { - cg->LoadWordDisp(cg->TargetReg(kSelf), trampoline.Int32Value(), - cg->TargetReg(kInvokeTgt)); - } - // Get the interface Method* [sets kArg0] - if (direct_method != static_cast<unsigned int>(-1)) { - cg->LoadConstant(cg->TargetReg(kArg0), direct_method); - } else { - CHECK_EQ(cu->dex_file, target_method.dex_file); - LIR* data_target = cg->ScanLiteralPool(cg->method_literal_list_, - target_method.dex_method_index, 0); - if (data_target == NULL) { - data_target = cg->AddWordData(&cg->method_literal_list_, - target_method.dex_method_index); - data_target->operands[1] = kInterface; - } - LIR* load_pc_rel = cg->OpPcRelLoad(cg->TargetReg(kArg0), data_target); - cg->AppendLIR(load_pc_rel); - DCHECK_EQ(cu->instruction_set, kThumb2) << reinterpret_cast<void*>(data_target); - } - break; - default: - return -1; + switch (state) { + case 0: // Set target method index in case of conflict [set kHiddenArg, kHiddenFpArg (x86)] + CHECK_EQ(cu->dex_file, target_method.dex_file); + CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds()); + cg->LoadConstant(cg->TargetReg(kHiddenArg), target_method.dex_method_index); + if (cu->instruction_set == kX86) { + cg->OpRegCopy(cg->TargetReg(kHiddenFpArg), cg->TargetReg(kHiddenArg)); + } + break; + case 1: { // Get "this" [set kArg1] + RegLocation rl_arg = info->args[0]; + cg->LoadValueDirectFixed(rl_arg, cg->TargetReg(kArg1)); + break; } - } else { - switch (state) { - case 0: - // Get the current Method* [sets kArg0] - TUNING: remove copy of method if it is promoted. - cg->LoadCurrMethodDirect(cg->TargetReg(kArg0)); - // Load the trampoline target [sets kInvokeTgt]. - if (cu->instruction_set != kX86) { - cg->LoadWordDisp(cg->TargetReg(kSelf), trampoline.Int32Value(), - cg->TargetReg(kInvokeTgt)); - } - break; - case 1: // Get method->dex_cache_resolved_methods_ [set/use kArg0] - cg->LoadWordDisp(cg->TargetReg(kArg0), - mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(), - cg->TargetReg(kArg0)); + case 2: // Is "this" null? [use kArg1] + cg->GenNullCheck(info->args[0].s_reg_low, cg->TargetReg(kArg1), info->opt_flags); + // Get this->klass_ [use kArg1, set kInvokeTgt] + cg->LoadWordDisp(cg->TargetReg(kArg1), mirror::Object::ClassOffset().Int32Value(), + cg->TargetReg(kInvokeTgt)); break; - case 2: // Grab target method* [set/use kArg0] - CHECK_EQ(cu->dex_file, target_method.dex_file); - cg->LoadWordDisp(cg->TargetReg(kArg0), - mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value() + - (target_method.dex_method_index * 4), + case 3: // Get this->klass_->imtable [use kInvokeTgt, set kInvokeTgt] + cg->LoadWordDisp(cg->TargetReg(kInvokeTgt), mirror::Class::ImTableOffset().Int32Value(), + cg->TargetReg(kInvokeTgt)); + break; + case 4: // Get target method [use kInvokeTgt, set kArg0] + cg->LoadWordDisp(cg->TargetReg(kInvokeTgt), ((method_idx % ClassLinker::kImtSize) * 4) + + mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(), cg->TargetReg(kArg0)); break; + case 5: // Get the compiled code address [use kArg0, set kInvokeTgt] + if (cu->instruction_set != kX86) { + cg->LoadWordDisp(cg->TargetReg(kArg0), + mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value(), + cg->TargetReg(kInvokeTgt)); + break; + } + // Intentional fallthrough for X86 default: return -1; - } } return state + 1; } @@ -810,7 +807,7 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), start_offset); LIR* ld = OpVldm(TargetReg(kArg3), regs_left); // TUNING: loosen barrier - ld->def_mask = ENCODE_ALL; + ld->u.m.def_mask = ENCODE_ALL; SetMemRefType(ld, true /* is_load */, kDalvikReg); call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); @@ -819,7 +816,7 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, direct_code, direct_method, type); LIR* st = OpVstm(TargetReg(kArg3), regs_left); SetMemRefType(st, false /* is_load */, kDalvikReg); - st->def_mask = ENCODE_ALL; + st->u.m.def_mask = ENCODE_ALL; call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); } @@ -892,7 +889,7 @@ bool Mir2Lir::GenInlinedCharAt(CallInfo* info) { LoadWordDisp(rl_obj.low_reg, value_offset, reg_ptr); if (range_check) { // Set up a launch pad to allow retry in case of bounds violation */ - launch_pad = RawLIR(0, kPseudoIntrinsicRetry, reinterpret_cast<uintptr_t>(info)); + launch_pad = RawLIR(0, kPseudoIntrinsicRetry, WrapPointer(info)); intrinsic_launchpads_.Insert(launch_pad); OpRegReg(kOpCmp, rl_idx.low_reg, reg_max); FreeTemp(reg_max); @@ -903,7 +900,7 @@ bool Mir2Lir::GenInlinedCharAt(CallInfo* info) { reg_max = AllocTemp(); LoadWordDisp(rl_obj.low_reg, count_offset, reg_max); // Set up a launch pad to allow retry in case of bounds violation */ - launch_pad = RawLIR(0, kPseudoIntrinsicRetry, reinterpret_cast<uintptr_t>(info)); + launch_pad = RawLIR(0, kPseudoIntrinsicRetry, WrapPointer(info)); intrinsic_launchpads_.Insert(launch_pad); OpRegReg(kOpCmp, rl_idx.low_reg, reg_max); FreeTemp(reg_max); @@ -961,6 +958,31 @@ bool Mir2Lir::GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty) { return true; } +bool Mir2Lir::GenInlinedReverseBytes(CallInfo* info, OpSize size) { + if (cu_->instruction_set == kMips) { + // TODO - add Mips implementation + return false; + } + RegLocation rl_src_i = info->args[0]; + RegLocation rl_dest = InlineTarget(info); // result reg + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (size == kLong) { + RegLocation rl_i = LoadValueWide(rl_src_i, kCoreReg); + int reg_tmp = AllocTemp(); + OpRegCopy(reg_tmp, rl_result.low_reg); + OpRegReg(kOpRev, rl_result.low_reg, rl_i.high_reg); + OpRegReg(kOpRev, rl_result.high_reg, reg_tmp); + StoreValueWide(rl_dest, rl_result); + } else { + DCHECK(size == kWord || size == kSignedHalf); + OpKind op = (size == kWord) ? kOpRev : kOpRevsh; + RegLocation rl_i = LoadValue(rl_src_i, kCoreReg); + OpRegReg(op, rl_result.low_reg, rl_i.low_reg); + StoreValue(rl_dest, rl_result); + } + return true; +} + bool Mir2Lir::GenInlinedAbsInt(CallInfo* info) { if (cu_->instruction_set == kMips) { // TODO - add Mips implementation @@ -1069,7 +1091,7 @@ bool Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { } int r_tgt = (cu_->instruction_set != kX86) ? LoadHelper(QUICK_ENTRYPOINT_OFFSET(pIndexOf)) : 0; GenNullCheck(rl_obj.s_reg_low, reg_ptr, info->opt_flags); - LIR* launch_pad = RawLIR(0, kPseudoIntrinsicRetry, reinterpret_cast<uintptr_t>(info)); + LIR* launch_pad = RawLIR(0, kPseudoIntrinsicRetry, WrapPointer(info)); intrinsic_launchpads_.Insert(launch_pad); OpCmpImmBranch(kCondGt, reg_char, 0xFFFF, launch_pad); // NOTE: not a safepoint @@ -1079,7 +1101,7 @@ bool Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(pIndexOf)); } LIR* resume_tgt = NewLIR0(kPseudoTargetLabel); - launch_pad->operands[2] = reinterpret_cast<uintptr_t>(resume_tgt); + launch_pad->operands[2] = WrapPointer(resume_tgt); // Record that we've already inlined & null checked info->opt_flags |= (MIR_INLINED | MIR_IGNORE_NULL_CHECK); RegLocation rl_return = GetReturn(false); @@ -1107,7 +1129,7 @@ bool Mir2Lir::GenInlinedStringCompareTo(CallInfo* info) { LoadHelper(QUICK_ENTRYPOINT_OFFSET(pStringCompareTo)) : 0; GenNullCheck(rl_this.s_reg_low, reg_this, info->opt_flags); // TUNING: check if rl_cmp.s_reg_low is already null checked - LIR* launch_pad = RawLIR(0, kPseudoIntrinsicRetry, reinterpret_cast<uintptr_t>(info)); + LIR* launch_pad = RawLIR(0, kPseudoIntrinsicRetry, WrapPointer(info)); intrinsic_launchpads_.Insert(launch_pad); OpCmpImmBranch(kCondEq, reg_cmp, 0, launch_pad); // NOTE: not a safepoint @@ -1219,71 +1241,117 @@ bool Mir2Lir::GenIntrinsic(CallInfo* info) { * method. By doing this during basic block construction, we can also * take advantage of/generate new useful dataflow info. */ + const DexFile::MethodId& target_mid = cu_->dex_file->GetMethodId(info->index); + const DexFile::TypeId& declaring_type = cu_->dex_file->GetTypeId(target_mid.class_idx_); StringPiece tgt_methods_declaring_class( - cu_->dex_file->GetMethodDeclaringClassDescriptor(cu_->dex_file->GetMethodId(info->index))); - if (tgt_methods_declaring_class.starts_with("Ljava/lang/Double;")) { - std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); - if (tgt_method == "long java.lang.Double.doubleToRawLongBits(double)") { - return GenInlinedDoubleCvt(info); - } - if (tgt_method == "double java.lang.Double.longBitsToDouble(long)") { - return GenInlinedDoubleCvt(info); - } - } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Float;")) { - std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); - if (tgt_method == "int java.lang.Float.float_to_raw_int_bits(float)") { - return GenInlinedFloatCvt(info); - } - if (tgt_method == "float java.lang.Float.intBitsToFloat(int)") { - return GenInlinedFloatCvt(info); + cu_->dex_file->StringDataByIdx(declaring_type.descriptor_idx_)); + if (tgt_methods_declaring_class.starts_with("Ljava/lang/")) { + tgt_methods_declaring_class.remove_prefix(sizeof("Ljava/lang/") - 1); + if (tgt_methods_declaring_class.starts_with("Double;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "long java.lang.Double.doubleToRawLongBits(double)") { + return GenInlinedDoubleCvt(info); + } + if (tgt_method == "double java.lang.Double.longBitsToDouble(long)") { + return GenInlinedDoubleCvt(info); + } + } else if (tgt_methods_declaring_class.starts_with("Float;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "int java.lang.Float.floatToRawIntBits(float)") { + return GenInlinedFloatCvt(info); + } + if (tgt_method == "float java.lang.Float.intBitsToFloat(int)") { + return GenInlinedFloatCvt(info); + } + } else if (tgt_methods_declaring_class.starts_with("Integer;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "int java.lang.Integer.reverseBytes(int)") { + return GenInlinedReverseBytes(info, kWord); + } + } else if (tgt_methods_declaring_class.starts_with("Long;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "long java.lang.Long.reverseBytes(long)") { + return GenInlinedReverseBytes(info, kLong); + } + } else if (tgt_methods_declaring_class.starts_with("Math;") || + tgt_methods_declaring_class.starts_with("StrictMath;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "int java.lang.Math.abs(int)" || + tgt_method == "int java.lang.StrictMath.abs(int)") { + return GenInlinedAbsInt(info); + } + if (tgt_method == "long java.lang.Math.abs(long)" || + tgt_method == "long java.lang.StrictMath.abs(long)") { + return GenInlinedAbsLong(info); + } + if (tgt_method == "int java.lang.Math.max(int, int)" || + tgt_method == "int java.lang.StrictMath.max(int, int)") { + return GenInlinedMinMaxInt(info, false /* is_min */); + } + if (tgt_method == "int java.lang.Math.min(int, int)" || + tgt_method == "int java.lang.StrictMath.min(int, int)") { + return GenInlinedMinMaxInt(info, true /* is_min */); + } + if (tgt_method == "double java.lang.Math.sqrt(double)" || + tgt_method == "double java.lang.StrictMath.sqrt(double)") { + return GenInlinedSqrt(info); + } + } else if (tgt_methods_declaring_class.starts_with("Short;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "short java.lang.Short.reverseBytes(short)") { + return GenInlinedReverseBytes(info, kSignedHalf); + } + } else if (tgt_methods_declaring_class.starts_with("String;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "char java.lang.String.charAt(int)") { + return GenInlinedCharAt(info); + } + if (tgt_method == "int java.lang.String.compareTo(java.lang.String)") { + return GenInlinedStringCompareTo(info); + } + if (tgt_method == "boolean java.lang.String.is_empty()") { + return GenInlinedStringIsEmptyOrLength(info, true /* is_empty */); + } + if (tgt_method == "int java.lang.String.index_of(int, int)") { + return GenInlinedIndexOf(info, false /* base 0 */); + } + if (tgt_method == "int java.lang.String.index_of(int)") { + return GenInlinedIndexOf(info, true /* base 0 */); + } + if (tgt_method == "int java.lang.String.length()") { + return GenInlinedStringIsEmptyOrLength(info, false /* is_empty */); + } + } else if (tgt_methods_declaring_class.starts_with("Thread;")) { + std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); + if (tgt_method == "java.lang.Thread java.lang.Thread.currentThread()") { + return GenInlinedCurrentThread(info); + } } - } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Math;") || - tgt_methods_declaring_class.starts_with("Ljava/lang/StrictMath;")) { + } else if (tgt_methods_declaring_class.starts_with("Llibcore/io/Memory;")) { std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); - if (tgt_method == "int java.lang.Math.abs(int)" || - tgt_method == "int java.lang.StrictMath.abs(int)") { - return GenInlinedAbsInt(info); - } - if (tgt_method == "long java.lang.Math.abs(long)" || - tgt_method == "long java.lang.StrictMath.abs(long)") { - return GenInlinedAbsLong(info); + if (tgt_method == "byte libcore.io.Memory.peekByte(long)") { + return GenInlinedPeek(info, kSignedByte); } - if (tgt_method == "int java.lang.Math.max(int, int)" || - tgt_method == "int java.lang.StrictMath.max(int, int)") { - return GenInlinedMinMaxInt(info, false /* is_min */); + if (tgt_method == "int libcore.io.Memory.peekIntNative(long)") { + return GenInlinedPeek(info, kWord); } - if (tgt_method == "int java.lang.Math.min(int, int)" || - tgt_method == "int java.lang.StrictMath.min(int, int)") { - return GenInlinedMinMaxInt(info, true /* is_min */); + if (tgt_method == "long libcore.io.Memory.peekLongNative(long)") { + return GenInlinedPeek(info, kLong); } - if (tgt_method == "double java.lang.Math.sqrt(double)" || - tgt_method == "double java.lang.StrictMath.sqrt(double)") { - return GenInlinedSqrt(info); - } - } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/String;")) { - std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); - if (tgt_method == "char java.lang.String.charAt(int)") { - return GenInlinedCharAt(info); + if (tgt_method == "short libcore.io.Memory.peekShortNative(long)") { + return GenInlinedPeek(info, kSignedHalf); } - if (tgt_method == "int java.lang.String.compareTo(java.lang.String)") { - return GenInlinedStringCompareTo(info); + if (tgt_method == "void libcore.io.Memory.pokeByte(long, byte)") { + return GenInlinedPoke(info, kSignedByte); } - if (tgt_method == "boolean java.lang.String.is_empty()") { - return GenInlinedStringIsEmptyOrLength(info, true /* is_empty */); + if (tgt_method == "void libcore.io.Memory.pokeIntNative(long, int)") { + return GenInlinedPoke(info, kWord); } - if (tgt_method == "int java.lang.String.index_of(int, int)") { - return GenInlinedIndexOf(info, false /* base 0 */); + if (tgt_method == "void libcore.io.Memory.pokeLongNative(long, long)") { + return GenInlinedPoke(info, kLong); } - if (tgt_method == "int java.lang.String.index_of(int)") { - return GenInlinedIndexOf(info, true /* base 0 */); - } - if (tgt_method == "int java.lang.String.length()") { - return GenInlinedStringIsEmptyOrLength(info, false /* is_empty */); - } - } else if (tgt_methods_declaring_class.starts_with("Ljava/lang/Thread;")) { - std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); - if (tgt_method == "java.lang.Thread java.lang.Thread.currentThread()") { - return GenInlinedCurrentThread(info); + if (tgt_method == "void libcore.io.Memory.pokeShortNative(long, short)") { + return GenInlinedPoke(info, kSignedHalf); } } else if (tgt_methods_declaring_class.starts_with("Lsun/misc/Unsafe;")) { std::string tgt_method(PrettyMethod(info->index, *cu_->dex_file)); @@ -1373,16 +1441,13 @@ void Mir2Lir::GenInvoke(CallInfo* info) { bool fast_path = cu_->compiler_driver->ComputeInvokeInfo(mir_graph_->GetCurrentDexCompilationUnit(), current_dalvik_offset_, - info->type, target_method, - vtable_idx, - direct_code, direct_method, - true) && !SLOW_INVOKE_PATH; + true, true, + &info->type, &target_method, + &vtable_idx, + &direct_code, &direct_method) && !SLOW_INVOKE_PATH; if (info->type == kInterface) { - if (fast_path) { - p_null_ck = &null_ck; - } next_call_insn = fast_path ? NextInterfaceCallInsn : NextInterfaceCallInsnWithAccessCheck; - skip_this = false; + skip_this = fast_path; } else if (info->type == kDirect) { if (fast_path) { p_null_ck = &null_ck; @@ -1422,15 +1487,14 @@ void Mir2Lir::GenInvoke(CallInfo* info) { if (cu_->instruction_set != kX86) { call_inst = OpReg(kOpBlx, TargetReg(kInvokeTgt)); } else { - if (fast_path && info->type != kInterface) { + if (fast_path) { call_inst = OpMem(kOpBlx, TargetReg(kArg0), mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value()); } else { ThreadOffset trampoline(-1); switch (info->type) { case kInterface: - trampoline = fast_path ? QUICK_ENTRYPOINT_OFFSET(pInvokeInterfaceTrampoline) - : QUICK_ENTRYPOINT_OFFSET(pInvokeInterfaceTrampolineWithAccessCheck); + trampoline = QUICK_ENTRYPOINT_OFFSET(pInvokeInterfaceTrampolineWithAccessCheck); break; case kDirect: trampoline = QUICK_ENTRYPOINT_OFFSET(pInvokeDirectTrampolineWithAccessCheck); diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc index 630e990733..0f29578c4e 100644 --- a/compiler/dex/quick/local_optimizations.cc +++ b/compiler/dex/quick/local_optimizations.cc @@ -21,8 +21,8 @@ namespace art { #define DEBUG_OPT(X) /* Check RAW, WAR, and RAW dependency on the register operands */ -#define CHECK_REG_DEP(use, def, check) ((def & check->use_mask) || \ - ((use | def) & check->def_mask)) +#define CHECK_REG_DEP(use, def, check) ((def & check->u.m.use_mask) || \ + ((use | def) & check->u.m.def_mask)) /* Scheduler heuristics */ #define MAX_HOIST_DISTANCE 20 @@ -30,10 +30,10 @@ namespace art { #define LD_LATENCY 2 static bool IsDalvikRegisterClobbered(LIR* lir1, LIR* lir2) { - int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->alias_info); - int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->alias_info); - int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->alias_info); - int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->alias_info); + int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->flags.alias_info); + int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->flags.alias_info); + int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->flags.alias_info); + int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->flags.alias_info); return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo); } @@ -78,7 +78,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { } for (this_lir = PREV_LIR(tail_lir); this_lir != head_lir; this_lir = PREV_LIR(this_lir)) { - if (is_pseudo_opcode(this_lir->opcode)) { + if (IsPseudoLirOp(this_lir->opcode)) { continue; } @@ -99,15 +99,14 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { int native_reg_id; if (cu_->instruction_set == kX86) { // If x86, location differs depending on whether memory/reg operation. - native_reg_id = (GetTargetInstFlags(this_lir->opcode) & IS_STORE) ? this_lir->operands[2] - : this_lir->operands[0]; + native_reg_id = (target_flags & IS_STORE) ? this_lir->operands[2] : this_lir->operands[0]; } else { native_reg_id = this_lir->operands[0]; } - bool is_this_lir_load = GetTargetInstFlags(this_lir->opcode) & IS_LOAD; + bool is_this_lir_load = target_flags & IS_LOAD; LIR* check_lir; /* Use the mem mask to determine the rough memory location */ - uint64_t this_mem_mask = (this_lir->use_mask | this_lir->def_mask) & ENCODE_MEM; + uint64_t this_mem_mask = (this_lir->u.m.use_mask | this_lir->u.m.def_mask) & ENCODE_MEM; /* * Currently only eliminate redundant ld/st for constant and Dalvik @@ -117,10 +116,10 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { continue; } - uint64_t stop_def_reg_mask = this_lir->def_mask & ~ENCODE_MEM; + uint64_t stop_def_reg_mask = this_lir->u.m.def_mask & ~ENCODE_MEM; uint64_t stop_use_reg_mask; if (cu_->instruction_set == kX86) { - stop_use_reg_mask = (IS_BRANCH | this_lir->use_mask) & ~ENCODE_MEM; + stop_use_reg_mask = (IS_BRANCH | this_lir->u.m.use_mask) & ~ENCODE_MEM; } else { /* * Add pc to the resource mask to prevent this instruction @@ -128,7 +127,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { * region bits since stop_mask is used to check data/control * dependencies. */ - stop_use_reg_mask = (GetPCUseDefEncoding() | this_lir->use_mask) & ~ENCODE_MEM; + stop_use_reg_mask = (GetPCUseDefEncoding() | this_lir->u.m.use_mask) & ~ENCODE_MEM; } for (check_lir = NEXT_LIR(this_lir); check_lir != tail_lir; check_lir = NEXT_LIR(check_lir)) { @@ -136,11 +135,11 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { * Skip already dead instructions (whose dataflow information is * outdated and misleading). */ - if (check_lir->flags.is_nop || is_pseudo_opcode(check_lir->opcode)) { + if (check_lir->flags.is_nop || IsPseudoLirOp(check_lir->opcode)) { continue; } - uint64_t check_mem_mask = (check_lir->use_mask | check_lir->def_mask) & ENCODE_MEM; + uint64_t check_mem_mask = (check_lir->u.m.use_mask | check_lir->u.m.def_mask) & ENCODE_MEM; uint64_t alias_condition = this_mem_mask & check_mem_mask; bool stop_here = false; @@ -160,7 +159,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { */ DCHECK(!(check_flags & IS_STORE)); /* Same value && same register type */ - if (check_lir->alias_info == this_lir->alias_info && + if (check_lir->flags.alias_info == this_lir->flags.alias_info && SameRegType(check_lir->operands[0], native_reg_id)) { /* * Different destination register - insert @@ -169,11 +168,11 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { if (check_lir->operands[0] != native_reg_id) { ConvertMemOpIntoMove(check_lir, check_lir->operands[0], native_reg_id); } - check_lir->flags.is_nop = true; + NopLIR(check_lir); } } else if (alias_condition == ENCODE_DALVIK_REG) { /* Must alias */ - if (check_lir->alias_info == this_lir->alias_info) { + if (check_lir->flags.alias_info == this_lir->flags.alias_info) { /* Only optimize compatible registers */ bool reg_compatible = SameRegType(check_lir->operands[0], native_reg_id); if ((is_this_lir_load && is_check_lir_load) || @@ -188,7 +187,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { native_reg_id) { ConvertMemOpIntoMove(check_lir, check_lir->operands[0], native_reg_id); } - check_lir->flags.is_nop = true; + NopLIR(check_lir); } else { /* * Destinaions are of different types - @@ -202,7 +201,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { stop_here = true; } else if (!is_this_lir_load && !is_check_lir_load) { /* WAW - nuke the earlier store */ - this_lir->flags.is_nop = true; + NopLIR(this_lir); stop_here = true; } /* Partial overlap */ @@ -257,7 +256,7 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { * top-down order. */ InsertLIRBefore(check_lir, new_store_lir); - this_lir->flags.is_nop = true; + NopLIR(this_lir); } break; } else if (!check_lir->flags.is_nop) { @@ -286,7 +285,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { /* Start from the second instruction */ for (this_lir = NEXT_LIR(head_lir); this_lir != tail_lir; this_lir = NEXT_LIR(this_lir)) { - if (is_pseudo_opcode(this_lir->opcode)) { + if (IsPseudoLirOp(this_lir->opcode)) { continue; } @@ -298,7 +297,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { continue; } - uint64_t stop_use_all_mask = this_lir->use_mask; + uint64_t stop_use_all_mask = this_lir->u.m.use_mask; if (cu_->instruction_set != kX86) { /* @@ -314,7 +313,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { /* Similar as above, but just check for pure register dependency */ uint64_t stop_use_reg_mask = stop_use_all_mask & ~ENCODE_MEM; - uint64_t stop_def_reg_mask = this_lir->def_mask & ~ENCODE_MEM; + uint64_t stop_def_reg_mask = this_lir->u.m.def_mask & ~ENCODE_MEM; int next_slot = 0; bool stop_here = false; @@ -329,7 +328,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { continue; } - uint64_t check_mem_mask = check_lir->def_mask & ENCODE_MEM; + uint64_t check_mem_mask = check_lir->u.m.def_mask & ENCODE_MEM; uint64_t alias_condition = stop_use_all_mask & check_mem_mask; stop_here = false; @@ -338,7 +337,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { /* We can fully disambiguate Dalvik references */ if (alias_condition == ENCODE_DALVIK_REG) { /* Must alias or partually overlap */ - if ((check_lir->alias_info == this_lir->alias_info) || + if ((check_lir->flags.alias_info == this_lir->flags.alias_info) || IsDalvikRegisterClobbered(this_lir, check_lir)) { stop_here = true; } @@ -363,7 +362,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { * Store the dependent or non-pseudo/indepedent instruction to the * list. */ - if (stop_here || !is_pseudo_opcode(check_lir->opcode)) { + if (stop_here || !IsPseudoLirOp(check_lir->opcode)) { prev_inst_list[next_slot++] = check_lir; if (next_slot == MAX_HOIST_DISTANCE) { break; @@ -394,7 +393,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { int slot; LIR* dep_lir = prev_inst_list[next_slot-1]; /* If there is ld-ld dependency, wait LDLD_DISTANCE cycles */ - if (!is_pseudo_opcode(dep_lir->opcode) && + if (!IsPseudoLirOp(dep_lir->opcode) && (GetTargetInstFlags(dep_lir->opcode) & IS_LOAD)) { first_slot -= LDLD_DISTANCE; } @@ -407,7 +406,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { LIR* prev_lir = prev_inst_list[slot+1]; /* Check the highest instruction */ - if (prev_lir->def_mask == ENCODE_ALL) { + if (prev_lir->u.m.def_mask == ENCODE_ALL) { /* * If the first instruction is a load, don't hoist anything * above it since it is unlikely to be beneficial. @@ -435,9 +434,9 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { * Try to find two instructions with load/use dependency until * the remaining instructions are less than LD_LATENCY. */ - bool prev_is_load = is_pseudo_opcode(prev_lir->opcode) ? false : + bool prev_is_load = IsPseudoLirOp(prev_lir->opcode) ? false : (GetTargetInstFlags(prev_lir->opcode) & IS_LOAD); - if (((cur_lir->use_mask & prev_lir->def_mask) && prev_is_load) || (slot < LD_LATENCY)) { + if (((cur_lir->u.m.use_mask & prev_lir->u.m.def_mask) && prev_is_load) || (slot < LD_LATENCY)) { break; } } @@ -453,7 +452,7 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { * is never the first LIR on the list */ InsertLIRBefore(cur_lir, new_load_lir); - this_lir->flags.is_nop = true; + NopLIR(this_lir); } } } @@ -468,41 +467,4 @@ void Mir2Lir::ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir) { } } -/* - * Nop any unconditional branches that go to the next instruction. - * Note: new redundant branches may be inserted later, and we'll - * use a check in final instruction assembly to nop those out. - */ -void Mir2Lir::RemoveRedundantBranches() { - LIR* this_lir; - - for (this_lir = first_lir_insn_; this_lir != last_lir_insn_; this_lir = NEXT_LIR(this_lir)) { - /* Branch to the next instruction */ - if (IsUnconditionalBranch(this_lir)) { - LIR* next_lir = this_lir; - - while (true) { - next_lir = NEXT_LIR(next_lir); - - /* - * Is the branch target the next instruction? - */ - if (next_lir == this_lir->target) { - this_lir->flags.is_nop = true; - break; - } - - /* - * Found real useful stuff between the branch and the target. - * Need to explicitly check the last_lir_insn_ here because it - * might be the last real instruction. - */ - if (!is_pseudo_opcode(next_lir->opcode) || - (next_lir == last_lir_insn_)) - break; - } - } - } -} - } // namespace art diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc index cd25232c21..5f5e5e44ac 100644 --- a/compiler/dex/quick/mips/assemble_mips.cc +++ b/compiler/dex/quick/mips/assemble_mips.cc @@ -489,12 +489,12 @@ void MipsMir2Lir::ConvertShortToLongBranch(LIR* lir) { LIR* curr_pc = RawLIR(dalvik_offset, kMipsCurrPC); InsertLIRBefore(lir, curr_pc); LIR* anchor = RawLIR(dalvik_offset, kPseudoTargetLabel); - LIR* delta_hi = RawLIR(dalvik_offset, kMipsDeltaHi, r_AT, 0, - reinterpret_cast<uintptr_t>(anchor), 0, 0, lir->target); + LIR* delta_hi = RawLIR(dalvik_offset, kMipsDeltaHi, r_AT, 0, WrapPointer(anchor), 0, 0, + lir->target); InsertLIRBefore(lir, delta_hi); InsertLIRBefore(lir, anchor); - LIR* delta_lo = RawLIR(dalvik_offset, kMipsDeltaLo, r_AT, 0, - reinterpret_cast<uintptr_t>(anchor), 0, 0, lir->target); + LIR* delta_lo = RawLIR(dalvik_offset, kMipsDeltaLo, r_AT, 0, WrapPointer(anchor), 0, 0, + lir->target); InsertLIRBefore(lir, delta_lo); LIR* addu = RawLIR(dalvik_offset, kMipsAddu, r_AT, r_AT, r_RA); InsertLIRBefore(lir, addu); @@ -503,7 +503,7 @@ void MipsMir2Lir::ConvertShortToLongBranch(LIR* lir) { if (!unconditional) { InsertLIRBefore(lir, hop_target); } - lir->flags.is_nop = true; + NopLIR(lir); } /* @@ -512,7 +512,7 @@ void MipsMir2Lir::ConvertShortToLongBranch(LIR* lir) { * instruction. In those cases we will try to substitute a new code * sequence or request that the trace be shortened and retried. */ -AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { +AssemblerStatus MipsMir2Lir::AssembleInstructions(CodeOffset start_addr) { LIR *lir; AssemblerStatus res = kSuccess; // Assume success @@ -526,7 +526,7 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { continue; } - if (lir->flags.pcRelFixup) { + if (lir->flags.fixup != kFixupNone) { if (lir->opcode == kMipsDelta) { /* * The "Delta" pseudo-ops load the difference between @@ -538,8 +538,8 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { * and is found in lir->target. If operands[3] is non-NULL, * then it is a Switch/Data table. */ - int offset1 = (reinterpret_cast<LIR*>(lir->operands[2]))->offset; - SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]); + int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset; + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3])); int offset2 = tab_rec ? tab_rec->offset : lir->target->offset; int delta = offset2 - offset1; if ((delta & 0xffff) == delta && ((delta & 0x8000) == 0)) { @@ -561,25 +561,25 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { RawLIR(lir->dalvik_offset, kMipsAddu, lir->operands[0], lir->operands[0], r_RA); InsertLIRBefore(lir, new_addu); - lir->flags.is_nop = true; + NopLIR(lir); res = kRetryAll; } } else if (lir->opcode == kMipsDeltaLo) { - int offset1 = (reinterpret_cast<LIR*>(lir->operands[2]))->offset; - SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]); + int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset; + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3])); int offset2 = tab_rec ? tab_rec->offset : lir->target->offset; int delta = offset2 - offset1; lir->operands[1] = delta & 0xffff; } else if (lir->opcode == kMipsDeltaHi) { - int offset1 = (reinterpret_cast<LIR*>(lir->operands[2]))->offset; - SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(lir->operands[3]); + int offset1 = (reinterpret_cast<LIR*>(UnwrapPointer(lir->operands[2])))->offset; + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(lir->operands[3])); int offset2 = tab_rec ? tab_rec->offset : lir->target->offset; int delta = offset2 - offset1; lir->operands[1] = (delta >> 16) & 0xffff; } else if (lir->opcode == kMipsB || lir->opcode == kMipsBal) { LIR *target_lir = lir->target; - uintptr_t pc = lir->offset + 4; - uintptr_t target = target_lir->offset; + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset; int delta = target - pc; if (delta & 0x3) { LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta; @@ -592,8 +592,8 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { } } else if (lir->opcode >= kMipsBeqz && lir->opcode <= kMipsBnez) { LIR *target_lir = lir->target; - uintptr_t pc = lir->offset + 4; - uintptr_t target = target_lir->offset; + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset; int delta = target - pc; if (delta & 0x3) { LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta; @@ -606,8 +606,8 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { } } else if (lir->opcode == kMipsBeq || lir->opcode == kMipsBne) { LIR *target_lir = lir->target; - uintptr_t pc = lir->offset + 4; - uintptr_t target = target_lir->offset; + CodeOffset pc = lir->offset + 4; + CodeOffset target = target_lir->offset; int delta = target - pc; if (delta & 0x3) { LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta; @@ -619,8 +619,8 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { lir->operands[2] = delta >> 2; } } else if (lir->opcode == kMipsJal) { - uintptr_t cur_pc = (start_addr + lir->offset + 4) & ~3; - uintptr_t target = lir->operands[0]; + CodeOffset cur_pc = (start_addr + lir->offset + 4) & ~3; + CodeOffset target = lir->operands[0]; /* ensure PC-region branch can be used */ DCHECK_EQ((cur_pc & 0xF0000000), (target & 0xF0000000)); if (target & 0x3) { @@ -629,11 +629,11 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { lir->operands[0] = target >> 2; } else if (lir->opcode == kMipsLahi) { /* ld address hi (via lui) */ LIR *target_lir = lir->target; - uintptr_t target = start_addr + target_lir->offset; + CodeOffset target = start_addr + target_lir->offset; lir->operands[1] = target >> 16; } else if (lir->opcode == kMipsLalo) { /* ld address lo (via ori) */ LIR *target_lir = lir->target; - uintptr_t target = start_addr + target_lir->offset; + CodeOffset target = start_addr + target_lir->offset; lir->operands[2] = lir->operands[2] + target; } } @@ -646,6 +646,7 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { if (res != kSuccess) { continue; } + DCHECK(!IsPseudoLirOp(lir->opcode)); const MipsEncodingMap *encoder = &EncodingMap[lir->opcode]; uint32_t bits = encoder->skeleton; int i; @@ -695,6 +696,7 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { code_buffer_.push_back((bits >> 24) & 0xff); // TUNING: replace with proper delay slot handling if (encoder->size == 8) { + DCHECK(!IsPseudoLirOp(lir->opcode)); const MipsEncodingMap *encoder = &EncodingMap[kMipsNop]; uint32_t bits = encoder->skeleton; code_buffer_.push_back(bits & 0xff); @@ -707,7 +709,105 @@ AssemblerStatus MipsMir2Lir::AssembleInstructions(uintptr_t start_addr) { } int MipsMir2Lir::GetInsnSize(LIR* lir) { + DCHECK(!IsPseudoLirOp(lir->opcode)); return EncodingMap[lir->opcode].size; } +// LIR offset assignment. +// TODO: consolidate w/ Arm assembly mechanism. +int MipsMir2Lir::AssignInsnOffsets() { + LIR* lir; + int offset = 0; + + for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { + lir->offset = offset; + if (LIKELY(lir->opcode >= 0)) { + if (!lir->flags.is_nop) { + offset += lir->flags.size; + } + } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) { + if (offset & 0x2) { + offset += 2; + lir->operands[0] = 1; + } else { + lir->operands[0] = 0; + } + } + /* Pseudo opcodes don't consume space */ + } + return offset; +} + +/* + * Walk the compilation unit and assign offsets to instructions + * and literals and compute the total size of the compiled unit. + * TODO: consolidate w/ Arm assembly mechanism. + */ +void MipsMir2Lir::AssignOffsets() { + int offset = AssignInsnOffsets(); + + /* Const values have to be word aligned */ + offset = (offset + 3) & ~3; + + /* Set up offsets for literals */ + data_offset_ = offset; + + offset = AssignLiteralOffset(offset); + + offset = AssignSwitchTablesOffset(offset); + + offset = AssignFillArrayDataOffset(offset); + + total_size_ = offset; +} + +/* + * Go over each instruction in the list and calculate the offset from the top + * before sending them off to the assembler. If out-of-range branch distance is + * seen rearrange the instructions a bit to correct it. + * TODO: consolidate w/ Arm assembly mechanism. + */ +void MipsMir2Lir::AssembleLIR() { + cu_->NewTimingSplit("Assemble"); + AssignOffsets(); + int assembler_retries = 0; + /* + * Assemble here. Note that we generate code with optimistic assumptions + * and if found now to work, we'll have to redo the sequence and retry. + */ + + while (true) { + AssemblerStatus res = AssembleInstructions(0); + if (res == kSuccess) { + break; + } else { + assembler_retries++; + if (assembler_retries > MAX_ASSEMBLER_RETRIES) { + CodegenDump(); + LOG(FATAL) << "Assembler error - too many retries"; + } + // Redo offsets and try again + AssignOffsets(); + code_buffer_.clear(); + } + } + + // Install literals + cu_->NewTimingSplit("LiteralData"); + InstallLiteralPools(); + + // Install switch tables + InstallSwitchTables(); + + // Install fill array data + InstallFillArrayData(); + + // Create the mapping table and native offset to reference map. + cu_->NewTimingSplit("PcMappingTable"); + CreateMappingTables(); + + cu_->NewTimingSplit("GcMap"); + CreateNativeGcMap(); +} + } // namespace art diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc index d53c012466..18c8cf87f2 100644 --- a/compiler/dex/quick/mips/call_mips.cc +++ b/compiler/dex/quick/mips/call_mips.cc @@ -59,14 +59,14 @@ void MipsMir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, * done: * */ -void MipsMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, +void MipsMir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpSparseSwitchTable(table); } // Add the table to the list - we'll process it later - SwitchTable *tab_rec = + SwitchTable* tab_rec = static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; @@ -101,8 +101,7 @@ void MipsMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, // Remember base label so offsets can be computed later tab_rec->anchor = base_label; int rBase = AllocTemp(); - NewLIR4(kMipsDelta, rBase, 0, reinterpret_cast<uintptr_t>(base_label), - reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR4(kMipsDelta, rBase, 0, WrapPointer(base_label), WrapPointer(tab_rec)); OpRegRegReg(kOpAdd, rEnd, rEnd, rBase); // Grab switch test value @@ -138,20 +137,20 @@ void MipsMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, * jr r_RA * done: */ -void MipsMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, +void MipsMir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpPackedSwitchTable(table); } // Add the table to the list - we'll process it later - SwitchTable *tab_rec = + SwitchTable* tab_rec = static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; int size = table[1]; tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), - ArenaAllocator::kAllocLIR)); + ArenaAllocator::kAllocLIR)); switch_tables_.Insert(tab_rec); // Get the switch value @@ -196,8 +195,7 @@ void MipsMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, // Materialize the table base pointer int rBase = AllocTemp(); - NewLIR4(kMipsDelta, rBase, 0, reinterpret_cast<uintptr_t>(base_label), - reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR4(kMipsDelta, rBase, 0, WrapPointer(base_label), WrapPointer(tab_rec)); // Load the displacement from the switch table int r_disp = AllocTemp(); @@ -222,10 +220,10 @@ void MipsMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, * * Total size is 4+(width * size + 1)/2 16-bit code units. */ -void MipsMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { +void MipsMir2Lir::GenFillArrayData(DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; // Add the table to the list - we'll process it later - FillArrayData *tab_rec = + FillArrayData* tab_rec = reinterpret_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), ArenaAllocator::kAllocData)); tab_rec->table = table; @@ -252,8 +250,7 @@ void MipsMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { LIR* base_label = NewLIR0(kPseudoTargetLabel); // Materialize a pointer to the fill data image - NewLIR4(kMipsDelta, rMIPS_ARG1, 0, reinterpret_cast<uintptr_t>(base_label), - reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR4(kMipsDelta, rMIPS_ARG1, 0, WrapPointer(base_label), WrapPointer(tab_rec)); // And go... ClobberCalleeSave(); @@ -261,36 +258,6 @@ void MipsMir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { MarkSafepointPC(call_inst); } -/* - * TODO: implement fast path to short-circuit thin-lock case - */ -void MipsMir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { - FlushAllRegs(); - LoadValueDirectFixed(rl_src, rMIPS_ARG0); // Get obj - LockCallTemps(); // Prepare for explicit register usage - GenNullCheck(rl_src.s_reg_low, rMIPS_ARG0, opt_flags); - // Go expensive route - artLockObjectFromCode(self, obj); - int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pLockObject)); - ClobberCalleeSave(); - LIR* call_inst = OpReg(kOpBlx, r_tgt); - MarkSafepointPC(call_inst); -} - -/* - * TODO: implement fast path to short-circuit thin-lock case - */ -void MipsMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { - FlushAllRegs(); - LoadValueDirectFixed(rl_src, rMIPS_ARG0); // Get obj - LockCallTemps(); // Prepare for explicit register usage - GenNullCheck(rl_src.s_reg_low, rMIPS_ARG0, opt_flags); - // Go expensive route - UnlockObjectFromCode(obj); - int r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(pUnlockObject)); - ClobberCalleeSave(); - LIR* call_inst = OpReg(kOpBlx, r_tgt); - MarkSafepointPC(call_inst); -} - void MipsMir2Lir::GenMoveException(RegLocation rl_dest) { int ex_offset = Thread::ExceptionOffset().Int32Value(); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); @@ -318,6 +285,7 @@ void MipsMir2Lir::MarkGCCard(int val_reg, int tgt_addr_reg) { FreeTemp(reg_card_base); FreeTemp(reg_card_no); } + void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { int spill_count = num_core_spills_ + num_fp_spills_; /* diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h index b9cb720962..88b244ba90 100644 --- a/compiler/dex/quick/mips/codegen_mips.h +++ b/compiler/dex/quick/mips/codegen_mips.h @@ -52,7 +52,6 @@ class MipsMir2Lir : public Mir2Lir { int AllocTypedTempPair(bool fp_hint, int reg_class); int S2d(int low_reg, int high_reg); int TargetReg(SpecialTargetRegister reg); - RegisterInfo* GetRegInfo(int reg); RegLocation GetReturnAlt(); RegLocation GetReturnWideAlt(); RegLocation LocCReturn(); @@ -72,9 +71,12 @@ class MipsMir2Lir : public Mir2Lir { void CompilerInitializeRegAlloc(); // Required for target - miscellaneous. - AssemblerStatus AssembleInstructions(uintptr_t start_addr); + void AssembleLIR(); + int AssignInsnOffsets(); + void AssignOffsets(); + AssemblerStatus AssembleInstructions(CodeOffset start_addr); void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); - void SetupTargetResourceMasks(LIR* lir); + void SetupTargetResourceMasks(LIR* lir, uint64_t flags); const char* GetTargetInstFmt(int opcode); const char* GetTargetInstName(int opcode); std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); @@ -86,12 +88,10 @@ class MipsMir2Lir : public Mir2Lir { // Required for target - Dalvik-level generators. void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); - void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index, - RegLocation rl_src, int scale); void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_dest, int scale); + RegLocation rl_index, RegLocation rl_dest, int scale); void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale); + RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark); void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift); void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -107,6 +107,8 @@ class MipsMir2Lir : public Mir2Lir { bool GenInlinedCas32(CallInfo* info, bool need_write_barrier); bool GenInlinedMinMaxInt(CallInfo* info, bool is_min); bool GenInlinedSqrt(CallInfo* info); + bool GenInlinedPeek(CallInfo* info, OpSize size); + bool GenInlinedPoke(CallInfo* info, OpSize size); void GenNegLong(RegLocation rl_dest, RegLocation rl_src); void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -124,8 +126,6 @@ class MipsMir2Lir : public Mir2Lir { void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir); void GenSelect(BasicBlock* bb, MIR* mir); void GenMemBarrier(MemBarrierKind barrier_kind); - void GenMonitorEnter(int opt_flags, RegLocation rl_src); - void GenMonitorExit(int opt_flags, RegLocation rl_src); void GenMoveException(RegLocation rl_dest); void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, int first_bit, int second_bit); diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc index 6ce5750a5f..52294290c9 100644 --- a/compiler/dex/quick/mips/int_mips.cc +++ b/compiler/dex/quick/mips/int_mips.cc @@ -268,6 +268,37 @@ bool MipsMir2Lir::GenInlinedSqrt(CallInfo* info) { return false; } +bool MipsMir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { + if (size != kSignedByte) { + // MIPS supports only aligned access. Defer unaligned access to JNI implementation. + return false; + } + RegLocation rl_src_address = info->args[0]; // long address + rl_src_address.wide = 0; // ignore high half in info->args[1] + RegLocation rl_dest = InlineTarget(info); + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + DCHECK(size == kSignedByte); + LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, size, INVALID_SREG); + StoreValue(rl_dest, rl_result); + return true; +} + +bool MipsMir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { + if (size != kSignedByte) { + // MIPS supports only aligned access. Defer unaligned access to JNI implementation. + return false; + } + RegLocation rl_src_address = info->args[0]; // long address + rl_src_address.wide = 0; // ignore high half in info->args[1] + RegLocation rl_src_value = info->args[2]; // [size] value + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + DCHECK(size == kSignedByte); + RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); + StoreBaseDisp(rl_address.low_reg, 0, rl_value.low_reg, size); + return true; +} + LIR* MipsMir2Lir::OpPcRelLoad(int reg, LIR* target) { LOG(FATAL) << "Unexpected use of OpPcRelLoad for Mips"; return NULL; @@ -484,7 +515,7 @@ void MipsMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, * */ void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) { + RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { RegisterClass reg_class = oat_reg_class_by_size(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; @@ -498,12 +529,14 @@ void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, rl_array = LoadValue(rl_array, kCoreReg); rl_index = LoadValue(rl_index, kCoreReg); int reg_ptr = INVALID_REG; - if (IsTemp(rl_array.low_reg)) { + bool allocated_reg_ptr_temp = false; + if (IsTemp(rl_array.low_reg) && !card_mark) { Clobber(rl_array.low_reg); reg_ptr = rl_array.low_reg; } else { reg_ptr = AllocTemp(); OpRegCopy(reg_ptr, rl_array.low_reg); + allocated_reg_ptr_temp = true; } /* null object? */ @@ -538,8 +571,6 @@ void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, } StoreBaseDispWide(reg_ptr, 0, rl_src.low_reg, rl_src.high_reg); - - FreeTemp(reg_ptr); } else { rl_src = LoadValue(rl_src, reg_class); if (needs_range_check) { @@ -549,65 +580,11 @@ void MipsMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, StoreBaseIndexed(reg_ptr, rl_index.low_reg, rl_src.low_reg, scale, size); } -} - -/* - * Generate array store - * - */ -void MipsMir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) { - int len_offset = mirror::Array::LengthOffset().Int32Value(); - int data_offset = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(); - - FlushAllRegs(); // Use explicit registers - LockCallTemps(); - - int r_value = TargetReg(kArg0); // Register holding value - int r_array_class = TargetReg(kArg1); // Register holding array's Class - int r_array = TargetReg(kArg2); // Register holding array - int r_index = TargetReg(kArg3); // Register holding index into array - - LoadValueDirectFixed(rl_array, r_array); // Grab array - LoadValueDirectFixed(rl_src, r_value); // Grab value - LoadValueDirectFixed(rl_index, r_index); // Grab index - - GenNullCheck(rl_array.s_reg_low, r_array, opt_flags); // NPE? - - // Store of null? - LIR* null_value_check = OpCmpImmBranch(kCondEq, r_value, 0, NULL); - - // Get the array's class. - LoadWordDisp(r_array, mirror::Object::ClassOffset().Int32Value(), r_array_class); - CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCanPutArrayElement), r_value, - r_array_class, true); - // Redo LoadValues in case they didn't survive the call. - LoadValueDirectFixed(rl_array, r_array); // Reload array - LoadValueDirectFixed(rl_index, r_index); // Reload index - LoadValueDirectFixed(rl_src, r_value); // Reload value - r_array_class = INVALID_REG; - - // Branch here if value to be stored == null - LIR* target = NewLIR0(kPseudoTargetLabel); - null_value_check->target = target; - - bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK)); - int reg_len = INVALID_REG; - if (needs_range_check) { - reg_len = TargetReg(kArg1); - LoadWordDisp(r_array, len_offset, reg_len); // Get len - } - /* r_ptr -> array data */ - int r_ptr = AllocTemp(); - OpRegRegImm(kOpAdd, r_ptr, r_array, data_offset); - if (needs_range_check) { - GenRegRegCheck(kCondCs, r_index, reg_len, kThrowArrayBounds); + if (allocated_reg_ptr_temp) { + FreeTemp(reg_ptr); } - StoreBaseIndexed(r_ptr, r_index, r_value, scale, kWord); - FreeTemp(r_ptr); - FreeTemp(r_index); - if (!mir_graph_->IsConstantNullRef(rl_src)) { - MarkGCCard(r_value, r_array); + if (card_mark) { + MarkGCCard(rl_src.low_reg, rl_array.low_reg); } } diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index 4ee5b23eb9..9c598e6bee 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -76,6 +76,8 @@ int MipsMir2Lir::TargetReg(SpecialTargetRegister reg) { case kRet0: res = rMIPS_RET0; break; case kRet1: res = rMIPS_RET1; break; case kInvokeTgt: res = rMIPS_INVOKE_TGT; break; + case kHiddenArg: res = r_T0; break; + case kHiddenFpArg: res = INVALID_REG; break; case kCount: res = rMIPS_COUNT; break; } return res; @@ -120,22 +122,21 @@ uint64_t MipsMir2Lir::GetPCUseDefEncoding() { } -void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir) { +void MipsMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { DCHECK_EQ(cu_->instruction_set, kMips); + DCHECK(!lir->flags.use_def_invalid); // Mips-specific resource map setup here. - uint64_t flags = MipsMir2Lir::EncodingMap[lir->opcode].flags; - if (flags & REG_DEF_SP) { - lir->def_mask |= ENCODE_MIPS_REG_SP; + lir->u.m.def_mask |= ENCODE_MIPS_REG_SP; } if (flags & REG_USE_SP) { - lir->use_mask |= ENCODE_MIPS_REG_SP; + lir->u.m.use_mask |= ENCODE_MIPS_REG_SP; } if (flags & REG_DEF_LR) { - lir->def_mask |= ENCODE_MIPS_REG_LR; + lir->u.m.def_mask |= ENCODE_MIPS_REG_LR; } } @@ -269,8 +270,8 @@ void MipsMir2Lir::DumpResourceMask(LIR *mips_lir, uint64_t mask, const char *pre } /* Memory bits */ if (mips_lir && (mask & ENCODE_DALVIK_REG)) { - sprintf(buf + strlen(buf), "dr%d%s", mips_lir->alias_info & 0xffff, - (mips_lir->alias_info & 0x80000000) ? "(+1)" : ""); + sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(mips_lir->flags.alias_info), + DECODE_ALIAS_INFO_WIDE(mips_lir->flags.alias_info) ? "(+1)" : ""); } if (mask & ENCODE_LITERAL) { strcat(buf, "lit "); @@ -399,11 +400,6 @@ RegLocation MipsMir2Lir::GetReturnAlt() { return res; } -MipsMir2Lir::RegisterInfo* MipsMir2Lir::GetRegInfo(int reg) { - return MIPS_FPREG(reg) ? ®_pool_->FPRegs[reg & MIPS_FP_REG_MASK] - : ®_pool_->core_regs[reg]; -} - /* To be used when explicitly managing register use */ void MipsMir2Lir::LockCallTemps() { LockTemp(rMIPS_ARG0); @@ -559,14 +555,17 @@ Mir2Lir* MipsCodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, } uint64_t MipsMir2Lir::GetTargetInstFlags(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return MipsMir2Lir::EncodingMap[opcode].flags; } const char* MipsMir2Lir::GetTargetInstName(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return MipsMir2Lir::EncodingMap[opcode].name; } const char* MipsMir2Lir::GetTargetInstFmt(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return MipsMir2Lir::EncodingMap[opcode].fmt; } diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc index 5d9ae33921..2ba2c8487d 100644 --- a/compiler/dex/quick/mips/utility_mips.cc +++ b/compiler/dex/quick/mips/utility_mips.cc @@ -93,7 +93,7 @@ LIR* MipsMir2Lir::LoadConstantNoClobber(int r_dest, int value) { } else if ((value < 0) && (value >= -32768)) { res = NewLIR3(kMipsAddiu, r_dest, r_ZERO, value); } else { - res = NewLIR2(kMipsLui, r_dest, value>>16); + res = NewLIR2(kMipsLui, r_dest, value >> 16); if (value & 0xffff) NewLIR3(kMipsOri, r_dest, r_dest, value); } diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h index 440df2afa6..1a30b7aef0 100644 --- a/compiler/dex/quick/mir_to_lir-inl.h +++ b/compiler/dex/quick/mir_to_lir-inl.h @@ -33,12 +33,17 @@ inline void Mir2Lir::ClobberBody(RegisterInfo* p) { p->def_end = NULL; if (p->pair) { p->pair = false; - Clobber(p->partner); + p = GetRegInfo(p->partner); + p->pair = false; + p->live = false; + p->s_reg = INVALID_SREG; + p->def_start = NULL; + p->def_end = NULL; } } } -inline LIR* Mir2Lir::RawLIR(int dalvik_offset, int opcode, int op0, +inline LIR* Mir2Lir::RawLIR(DexOffset dalvik_offset, int opcode, int op0, int op1, int op2, int op3, int op4, LIR* target) { LIR* insn = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR)); insn->dalvik_offset = dalvik_offset; @@ -53,7 +58,8 @@ inline LIR* Mir2Lir::RawLIR(int dalvik_offset, int opcode, int op0, if ((opcode == kPseudoTargetLabel) || (opcode == kPseudoSafepointPC) || (opcode == kPseudoExportedPC)) { // Always make labels scheduling barriers - insn->use_mask = insn->def_mask = ENCODE_ALL; + DCHECK(!insn->flags.use_def_invalid); + insn->u.m.use_mask = insn->u.m.def_mask = ENCODE_ALL; } return insn; } @@ -63,7 +69,7 @@ inline LIR* Mir2Lir::RawLIR(int dalvik_offset, int opcode, int op0, * operands. */ inline LIR* Mir2Lir::NewLIR0(int opcode) { - DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & NO_OPERAND)) + DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & NO_OPERAND)) << GetTargetInstName(opcode) << " " << opcode << " " << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " << current_dalvik_offset_; @@ -73,7 +79,7 @@ inline LIR* Mir2Lir::NewLIR0(int opcode) { } inline LIR* Mir2Lir::NewLIR1(int opcode, int dest) { - DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_UNARY_OP)) + DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_UNARY_OP)) << GetTargetInstName(opcode) << " " << opcode << " " << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " << current_dalvik_offset_; @@ -83,7 +89,7 @@ inline LIR* Mir2Lir::NewLIR1(int opcode, int dest) { } inline LIR* Mir2Lir::NewLIR2(int opcode, int dest, int src1) { - DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_BINARY_OP)) + DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_BINARY_OP)) << GetTargetInstName(opcode) << " " << opcode << " " << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " << current_dalvik_offset_; @@ -93,7 +99,7 @@ inline LIR* Mir2Lir::NewLIR2(int opcode, int dest, int src1) { } inline LIR* Mir2Lir::NewLIR3(int opcode, int dest, int src1, int src2) { - DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_TERTIARY_OP)) + DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_TERTIARY_OP)) << GetTargetInstName(opcode) << " " << opcode << " " << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " << current_dalvik_offset_; @@ -103,7 +109,7 @@ inline LIR* Mir2Lir::NewLIR3(int opcode, int dest, int src1, int src2) { } inline LIR* Mir2Lir::NewLIR4(int opcode, int dest, int src1, int src2, int info) { - DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_QUAD_OP)) + DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_QUAD_OP)) << GetTargetInstName(opcode) << " " << opcode << " " << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " << current_dalvik_offset_; @@ -114,7 +120,7 @@ inline LIR* Mir2Lir::NewLIR4(int opcode, int dest, int src1, int src2, int info) inline LIR* Mir2Lir::NewLIR5(int opcode, int dest, int src1, int src2, int info1, int info2) { - DCHECK(is_pseudo_opcode(opcode) || (GetTargetInstFlags(opcode) & IS_QUIN_OP)) + DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_QUIN_OP)) << GetTargetInstName(opcode) << " " << opcode << " " << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " " << current_dalvik_offset_; @@ -136,20 +142,23 @@ inline void Mir2Lir::SetupRegMask(uint64_t* mask, int reg) { inline void Mir2Lir::SetupResourceMasks(LIR* lir) { int opcode = lir->opcode; - if (opcode <= 0) { - lir->use_mask = lir->def_mask = 0; + if (IsPseudoLirOp(opcode)) { + if (opcode != kPseudoBarrier) { + lir->flags.fixup = kFixupLabel; + } return; } uint64_t flags = GetTargetInstFlags(opcode); if (flags & NEEDS_FIXUP) { - lir->flags.pcRelFixup = true; + // Note: target-specific setup may specialize the fixup kind. + lir->flags.fixup = kFixupLabel; } /* Get the starting size of the instruction's template */ lir->flags.size = GetInsnSize(lir); - + estimated_native_code_size_ += lir->flags.size; /* Set up the mask for resources that are updated */ if (flags & (IS_LOAD | IS_STORE)) { /* Default to heap - will catch specialized classes later */ @@ -161,39 +170,49 @@ inline void Mir2Lir::SetupResourceMasks(LIR* lir) { * turn will trash everything. */ if (flags & IS_BRANCH) { - lir->def_mask = lir->use_mask = ENCODE_ALL; + lir->u.m.def_mask = lir->u.m.use_mask = ENCODE_ALL; return; } if (flags & REG_DEF0) { - SetupRegMask(&lir->def_mask, lir->operands[0]); + SetupRegMask(&lir->u.m.def_mask, lir->operands[0]); } if (flags & REG_DEF1) { - SetupRegMask(&lir->def_mask, lir->operands[1]); + SetupRegMask(&lir->u.m.def_mask, lir->operands[1]); } + if (flags & REG_USE0) { + SetupRegMask(&lir->u.m.use_mask, lir->operands[0]); + } - if (flags & SETS_CCODES) { - lir->def_mask |= ENCODE_CCODE; + if (flags & REG_USE1) { + SetupRegMask(&lir->u.m.use_mask, lir->operands[1]); + } + + if (flags & REG_USE2) { + SetupRegMask(&lir->u.m.use_mask, lir->operands[2]); } - if (flags & (REG_USE0 | REG_USE1 | REG_USE2 | REG_USE3)) { - int i; + if (flags & REG_USE3) { + SetupRegMask(&lir->u.m.use_mask, lir->operands[3]); + } - for (i = 0; i < 4; i++) { - if (flags & (1 << (kRegUse0 + i))) { - SetupRegMask(&lir->use_mask, lir->operands[i]); - } - } + if (flags & SETS_CCODES) { + lir->u.m.def_mask |= ENCODE_CCODE; } if (flags & USES_CCODES) { - lir->use_mask |= ENCODE_CCODE; + lir->u.m.use_mask |= ENCODE_CCODE; } // Handle target-specific actions - SetupTargetResourceMasks(lir); + SetupTargetResourceMasks(lir, flags); +} + +inline art::Mir2Lir::RegisterInfo* Mir2Lir::GetRegInfo(int reg) { + DCHECK(reginfo_map_.Get(reg) != NULL); + return reginfo_map_.Get(reg); } } // namespace art diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index c41feb1348..fa9a3ad566 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -18,6 +18,7 @@ #include "dex/dataflow_iterator-inl.h" #include "mir_to_lir-inl.h" #include "object_utils.h" +#include "thread-inl.h" namespace art { @@ -240,9 +241,9 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::GOTO_16: case Instruction::GOTO_32: if (mir_graph_->IsBackedge(bb, bb->taken)) { - GenSuspendTestAndBranch(opt_flags, &label_list[bb->taken->id]); + GenSuspendTestAndBranch(opt_flags, &label_list[bb->taken]); } else { - OpUnconditionalBranch(&label_list[bb->taken->id]); + OpUnconditionalBranch(&label_list[bb->taken]); } break; @@ -271,23 +272,22 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::IF_GE: case Instruction::IF_GT: case Instruction::IF_LE: { - LIR* taken = &label_list[bb->taken->id]; - LIR* fall_through = &label_list[bb->fall_through->id]; + LIR* taken = &label_list[bb->taken]; + LIR* fall_through = &label_list[bb->fall_through]; // Result known at compile time? if (rl_src[0].is_const && rl_src[1].is_const) { bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg), mir_graph_->ConstantValue(rl_src[1].orig_sreg)); - BasicBlock* target = is_taken ? bb->taken : bb->fall_through; - if (mir_graph_->IsBackedge(bb, target)) { + BasicBlockId target_id = is_taken ? bb->taken : bb->fall_through; + if (mir_graph_->IsBackedge(bb, target_id)) { GenSuspendTest(opt_flags); } - OpUnconditionalBranch(&label_list[target->id]); + OpUnconditionalBranch(&label_list[target_id]); } else { if (mir_graph_->IsBackwardsBranch(bb)) { GenSuspendTest(opt_flags); } - GenCompareAndBranch(opcode, rl_src[0], rl_src[1], taken, - fall_through); + GenCompareAndBranch(opcode, rl_src[0], rl_src[1], taken, fall_through); } break; } @@ -298,16 +298,16 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::IF_GEZ: case Instruction::IF_GTZ: case Instruction::IF_LEZ: { - LIR* taken = &label_list[bb->taken->id]; - LIR* fall_through = &label_list[bb->fall_through->id]; + LIR* taken = &label_list[bb->taken]; + LIR* fall_through = &label_list[bb->fall_through]; // Result known at compile time? if (rl_src[0].is_const) { bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg), 0); - BasicBlock* target = is_taken ? bb->taken : bb->fall_through; - if (mir_graph_->IsBackedge(bb, target)) { + BasicBlockId target_id = is_taken ? bb->taken : bb->fall_through; + if (mir_graph_->IsBackedge(bb, target_id)) { GenSuspendTest(opt_flags); } - OpUnconditionalBranch(&label_list[target->id]); + OpUnconditionalBranch(&label_list[target_id]); } else { if (mir_graph_->IsBackwardsBranch(bb)) { GenSuspendTest(opt_flags); @@ -337,22 +337,35 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list GenArrayGet(opt_flags, kSignedHalf, rl_src[0], rl_src[1], rl_dest, 1); break; case Instruction::APUT_WIDE: - GenArrayPut(opt_flags, kLong, rl_src[1], rl_src[2], rl_src[0], 3); + GenArrayPut(opt_flags, kLong, rl_src[1], rl_src[2], rl_src[0], 3, false); break; case Instruction::APUT: - GenArrayPut(opt_flags, kWord, rl_src[1], rl_src[2], rl_src[0], 2); - break; - case Instruction::APUT_OBJECT: - GenArrayObjPut(opt_flags, rl_src[1], rl_src[2], rl_src[0], 2); + GenArrayPut(opt_flags, kWord, rl_src[1], rl_src[2], rl_src[0], 2, false); + break; + case Instruction::APUT_OBJECT: { + bool is_null = mir_graph_->IsConstantNullRef(rl_src[0]); + bool is_safe = is_null; // Always safe to store null. + if (!is_safe) { + // Check safety from verifier type information. + const MethodReference mr(cu_->dex_file, cu_->method_idx); + is_safe = cu_->compiler_driver->IsSafeCast(mr, mir->offset); + } + if (is_null || is_safe) { + // Store of constant null doesn't require an assignability test and can be generated inline + // without fixed register usage or a card mark. + GenArrayPut(opt_flags, kWord, rl_src[1], rl_src[2], rl_src[0], 2, !is_null); + } else { + GenArrayObjPut(opt_flags, rl_src[1], rl_src[2], rl_src[0]); + } break; + } case Instruction::APUT_SHORT: case Instruction::APUT_CHAR: - GenArrayPut(opt_flags, kUnsignedHalf, rl_src[1], rl_src[2], rl_src[0], 1); + GenArrayPut(opt_flags, kUnsignedHalf, rl_src[1], rl_src[2], rl_src[0], 1, false); break; case Instruction::APUT_BYTE: case Instruction::APUT_BOOLEAN: - GenArrayPut(opt_flags, kUnsignedByte, rl_src[1], rl_src[2], - rl_src[0], 0); + GenArrayPut(opt_flags, kUnsignedByte, rl_src[1], rl_src[2], rl_src[0], 0, false); break; case Instruction::IGET_OBJECT: @@ -696,6 +709,7 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { // Insert the block label. block_label_list_[block_id].opcode = kPseudoNormalBlockLabel; + block_label_list_[block_id].flags.fixup = kFixupLabel; AppendLIR(&block_label_list_[block_id]); LIR* head_lir = NULL; @@ -706,16 +720,15 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { } // Free temp registers and reset redundant store tracking. - ResetRegPool(); - ResetDefTracking(); - ClobberAllRegs(); if (bb->block_type == kEntryBlock) { + ResetRegPool(); int start_vreg = cu_->num_dalvik_registers - cu_->num_ins; GenEntrySequence(&mir_graph_->reg_location_[start_vreg], mir_graph_->reg_location_[mir_graph_->GetMethodSReg()]); } else if (bb->block_type == kExitBlock) { + ResetRegPool(); GenExitSequence(); } @@ -736,17 +749,18 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { current_dalvik_offset_ = mir->offset; int opcode = mir->dalvikInsn.opcode; - LIR* boundary_lir; // Mark the beginning of a Dalvik instruction for line tracking. - char* inst_str = cu_->verbose ? - mir_graph_->GetDalvikDisassembly(mir) : NULL; - boundary_lir = MarkBoundary(mir->offset, inst_str); + if (cu_->verbose) { + char* inst_str = mir_graph_->GetDalvikDisassembly(mir); + MarkBoundary(mir->offset, inst_str); + } // Remember the first LIR for this block. if (head_lir == NULL) { - head_lir = boundary_lir; - // Set the first boundary_lir as a scheduling barrier. - head_lir->def_mask = ENCODE_ALL; + head_lir = &block_label_list_[bb->id]; + // Set the first label as a scheduling barrier. + DCHECK(!head_lir->flags.use_def_invalid); + head_lir->u.m.def_mask = ENCODE_ALL; } if (opcode == kMirOpCheck) { @@ -771,11 +785,6 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { if (head_lir) { // Eliminate redundant loads/stores and delay stores into later slots. ApplyLocalOptimizations(head_lir, last_lir_insn_); - - // Generate an unconditional branch to the fallthrough block. - if (bb->fall_through) { - OpUnconditionalBranch(&block_label_list_[bb->fall_through->id]); - } } return false; } @@ -810,25 +819,34 @@ void Mir2Lir::SpecialMIR2LIR(SpecialCaseHandler special_case) { } void Mir2Lir::MethodMIR2LIR() { + cu_->NewTimingSplit("MIR2LIR"); + // Hold the labels of each block. block_label_list_ = static_cast<LIR*>(arena_->Alloc(sizeof(LIR) * mir_graph_->GetNumBlocks(), ArenaAllocator::kAllocLIR)); - PreOrderDfsIterator iter(mir_graph_, false /* not iterative */); - for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { - MethodBlockCodeGen(bb); + PreOrderDfsIterator iter(mir_graph_); + BasicBlock* curr_bb = iter.Next(); + BasicBlock* next_bb = iter.Next(); + while (curr_bb != NULL) { + MethodBlockCodeGen(curr_bb); + // If the fall_through block is no longer laid out consecutively, drop in a branch. + BasicBlock* curr_bb_fall_through = mir_graph_->GetBasicBlock(curr_bb->fall_through); + if ((curr_bb_fall_through != NULL) && (curr_bb_fall_through != next_bb)) { + OpUnconditionalBranch(&block_label_list_[curr_bb->fall_through]); + } + curr_bb = next_bb; + do { + next_bb = iter.Next(); + } while ((next_bb != NULL) && (next_bb->block_type == kDead)); } - + cu_->NewTimingSplit("Launchpads"); HandleSuspendLaunchPads(); HandleThrowLaunchPads(); HandleIntrinsicLaunchPads(); - - if (!(cu_->disable_opt & (1 << kSafeOptimizations))) { - RemoveRedundantBranches(); - } } } // namespace art diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index a37ebd173f..4c56b74dc4 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -30,6 +30,14 @@ namespace art { +/* + * TODO: refactoring pass to move these (and other) typdefs towards usage style of runtime to + * add type safety (see runtime/offsets.h). + */ +typedef uint32_t DexOffset; // Dex offset in code units. +typedef uint16_t NarrowDexOffset; // For use in structs, Dex offsets range from 0 .. 0xffff. +typedef uint32_t CodeOffset; // Native code offset in bytes. + // Set to 1 to measure cost of suspend check. #define NO_SUSPEND 0 @@ -95,6 +103,7 @@ struct BasicBlock; struct CallInfo; struct CompilationUnit; struct MIR; +struct LIR; struct RegLocation; struct RegisterInfo; class MIRGraph; @@ -107,24 +116,36 @@ typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int, typedef std::vector<uint8_t> CodeBuffer; +struct UseDefMasks { + uint64_t use_mask; // Resource mask for use. + uint64_t def_mask; // Resource mask for def. +}; + +struct AssemblyInfo { + LIR* pcrel_next; // Chain of LIR nodes needing pc relative fixups. + uint8_t bytes[16]; // Encoded instruction bytes. +}; struct LIR { - int offset; // Offset of this instruction. - int dalvik_offset; // Offset of Dalvik opcode. + CodeOffset offset; // Offset of this instruction. + NarrowDexOffset dalvik_offset; // Offset of Dalvik opcode in code units (16-bit words). + int16_t opcode; LIR* next; LIR* prev; LIR* target; - int opcode; - int operands[5]; // [0..4] = [dest, src1, src2, extra, extra2]. struct { - bool is_nop:1; // LIR is optimized away. - bool pcRelFixup:1; // May need pc-relative fixup. - unsigned int size:5; // Note: size is in bytes. - unsigned int unused:25; + unsigned int alias_info:17; // For Dalvik register disambiguation. + bool is_nop:1; // LIR is optimized away. + unsigned int size:4; // Note: size of encoded instruction is in bytes. + bool use_def_invalid:1; // If true, masks should not be used. + unsigned int generation:1; // Used to track visitation state during fixup pass. + unsigned int fixup:8; // Fixup kind. } flags; - int alias_info; // For Dalvik register & litpool disambiguation. - uint64_t use_mask; // Resource mask for use. - uint64_t def_mask; // Resource mask for def. + union { + UseDefMasks m; // Use & Def masks used during optimization. + AssemblyInfo a; // Instruction encoding used during assembly phase. + } u; + int32_t operands[5]; // [0..4] = [dest, src1, src2, extra, extra2]. }; // Target-specific initialization. @@ -141,7 +162,7 @@ Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, // Defines for alias_info (tracks Dalvik register references). #define DECODE_ALIAS_INFO_REG(X) (X & 0xffff) -#define DECODE_ALIAS_INFO_WIDE_FLAG (0x80000000) +#define DECODE_ALIAS_INFO_WIDE_FLAG (0x10000) #define DECODE_ALIAS_INFO_WIDE(X) ((X & DECODE_ALIAS_INFO_WIDE_FLAG) ? 1 : 0) #define ENCODE_ALIAS_INFO(REG, ISWIDE) (REG | (ISWIDE ? DECODE_ALIAS_INFO_WIDE_FLAG : 0)) @@ -158,36 +179,42 @@ Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, #define ENCODE_ALL (~0ULL) #define ENCODE_MEM (ENCODE_DALVIK_REG | ENCODE_LITERAL | \ ENCODE_HEAP_REF | ENCODE_MUST_NOT_ALIAS) + +// Mask to denote sreg as the start of a double. Must not interfere with low 16 bits. +#define STARTING_DOUBLE_SREG 0x10000 + // TODO: replace these macros #define SLOW_FIELD_PATH (cu_->enable_debug & (1 << kDebugSlowFieldPath)) #define SLOW_INVOKE_PATH (cu_->enable_debug & (1 << kDebugSlowInvokePath)) #define SLOW_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowStringPath)) #define SLOW_TYPE_PATH (cu_->enable_debug & (1 << kDebugSlowTypePath)) #define EXERCISE_SLOWEST_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowestStringPath)) -#define is_pseudo_opcode(opcode) (static_cast<int>(opcode) < 0) class Mir2Lir : public Backend { public: - struct SwitchTable { - int offset; - const uint16_t* table; // Original dex table. - int vaddr; // Dalvik offset of switch opcode. - LIR* anchor; // Reference instruction for relative offsets. - LIR** targets; // Array of case targets. + /* + * Auxiliary information describing the location of data embedded in the Dalvik + * byte code stream. + */ + struct EmbeddedData { + CodeOffset offset; // Code offset of data block. + const uint16_t* table; // Original dex data. + DexOffset vaddr; // Dalvik offset of parent opcode. }; - struct FillArrayData { - int offset; - const uint16_t* table; // Original dex table. - int size; - int vaddr; // Dalvik offset of FILL_ARRAY_DATA opcode. + struct FillArrayData : EmbeddedData { + int32_t size; + }; + + struct SwitchTable : EmbeddedData { + LIR* anchor; // Reference instruction for relative offsets. + LIR** targets; // Array of case targets. }; /* Static register use counts */ struct RefCounts { int count; int s_reg; - bool double_start; // Starting v_reg for a double }; /* @@ -241,6 +268,38 @@ class Mir2Lir : public Backend { return code_buffer_.size() / sizeof(code_buffer_[0]); } + bool IsPseudoLirOp(int opcode) { + return (opcode < 0); + } + + /* + * LIR operands are 32-bit integers. Sometimes, (especially for managing + * instructions which require PC-relative fixups), we need the operands to carry + * pointers. To do this, we assign these pointers an index in pointer_storage_, and + * hold that index in the operand array. + * TUNING: If use of these utilities becomes more common on 32-bit builds, it + * may be worth conditionally-compiling a set of identity functions here. + */ + uint32_t WrapPointer(void* pointer) { + uint32_t res = pointer_storage_.Size(); + pointer_storage_.Insert(pointer); + return res; + } + + void* UnwrapPointer(size_t index) { + return pointer_storage_.Get(index); + } + + // strdup(), but allocates from the arena. + char* ArenaStrdup(const char* str) { + size_t len = strlen(str) + 1; + char* res = reinterpret_cast<char*>(arena_->Alloc(len, ArenaAllocator::kAllocMisc)); + if (res != NULL) { + strncpy(res, str, len); + } + return res; + } + // Shared by all targets - implemented in codegen_util.cc void AppendLIR(LIR* lir); void InsertLIRBefore(LIR* current_lir, LIR* new_lir); @@ -250,16 +309,15 @@ class Mir2Lir : public Backend { virtual void Materialize(); virtual CompiledMethod* GetCompiledMethod(); void MarkSafepointPC(LIR* inst); - bool FastInstance(uint32_t field_idx, int& field_offset, bool& is_volatile, bool is_put); + bool FastInstance(uint32_t field_idx, bool is_put, int* field_offset, bool* is_volatile); void SetupResourceMasks(LIR* lir); - void AssembleLIR(); void SetMemRefType(LIR* lir, bool is_load, int mem_type); void AnnotateDalvikRegAccess(LIR* lir, int reg_id, bool is_load, bool is64bit); void SetupRegMask(uint64_t* mask, int reg); void DumpLIRInsn(LIR* arg, unsigned char* base_addr); void DumpPromotionMap(); void CodegenDump(); - LIR* RawLIR(int dalvik_offset, int opcode, int op0 = 0, int op1 = 0, + LIR* RawLIR(DexOffset dalvik_offset, int opcode, int op0 = 0, int op1 = 0, int op2 = 0, int op3 = 0, int op4 = 0, LIR* target = NULL); LIR* NewLIR0(int opcode); LIR* NewLIR1(int opcode, int dest); @@ -274,13 +332,14 @@ class Mir2Lir : public Backend { void ProcessSwitchTables(); void DumpSparseSwitchTable(const uint16_t* table); void DumpPackedSwitchTable(const uint16_t* table); - LIR* MarkBoundary(int offset, const char* inst_str); + void MarkBoundary(DexOffset offset, const char* inst_str); void NopLIR(LIR* lir); + void UnlinkLIR(LIR* lir); bool EvaluateBranch(Instruction::Code opcode, int src1, int src2); bool IsInexpensiveConstant(RegLocation rl_src); ConditionCode FlipComparisonOrder(ConditionCode before); - void DumpMappingTable(const char* table_name, const std::string& descriptor, - const std::string& name, const std::string& signature, + void DumpMappingTable(const char* table_name, const char* descriptor, + const char* name, const Signature& signature, const std::vector<uint32_t>& v); void InstallLiteralPools(); void InstallSwitchTables(); @@ -288,21 +347,18 @@ class Mir2Lir : public Backend { bool VerifyCatchEntries(); void CreateMappingTables(); void CreateNativeGcMap(); - int AssignLiteralOffset(int offset); - int AssignSwitchTablesOffset(int offset); - int AssignFillArrayDataOffset(int offset); - int AssignInsnOffsets(); - void AssignOffsets(); - LIR* InsertCaseLabel(int vaddr, int keyVal); - void MarkPackedCaseLabels(Mir2Lir::SwitchTable *tab_rec); - void MarkSparseCaseLabels(Mir2Lir::SwitchTable *tab_rec); + int AssignLiteralOffset(CodeOffset offset); + int AssignSwitchTablesOffset(CodeOffset offset); + int AssignFillArrayDataOffset(CodeOffset offset); + LIR* InsertCaseLabel(DexOffset vaddr, int keyVal); + void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec); + void MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec); // Shared by all targets - implemented in local_optimizations.cc void ConvertMemOpIntoMove(LIR* orig_lir, int dest, int src); void ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir); void ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir); void ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir); - void RemoveRedundantBranches(); // Shared by all targets - implemented in ralloc_util.cc int GetSRegHi(int lowSreg); @@ -324,11 +380,9 @@ class Mir2Lir : public Backend { void RecordCorePromotion(int reg, int s_reg); int AllocPreservedCoreReg(int s_reg); void RecordFpPromotion(int reg, int s_reg); - int AllocPreservedSingle(int s_reg, bool even); + int AllocPreservedSingle(int s_reg); int AllocPreservedDouble(int s_reg); - int AllocPreservedFPReg(int s_reg, bool double_start); - int AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp, - bool required); + int AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp, bool required); int AllocTempDouble(); int AllocFreeTemp(); int AllocTemp(); @@ -367,13 +421,14 @@ class Mir2Lir : public Backend { RegLocation UpdateRawLoc(RegLocation loc); RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update); RegLocation EvalLoc(RegLocation loc, int reg_class, bool update); - void CountRefs(RefCounts* core_counts, RefCounts* fp_counts); + void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs); void DumpCounts(const RefCounts* arr, int size, const char* msg); void DoPromotion(); int VRegOffset(int v_reg); int SRegOffset(int s_reg); RegLocation GetReturnWide(bool is_double); RegLocation GetReturn(bool is_float); + RegisterInfo* GetRegInfo(int reg); // Shared by all targets - implemented in gen_common.cc. bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div, @@ -407,6 +462,9 @@ class Mir2Lir : public Backend { RegLocation rl_dest, RegLocation rl_obj, bool is_long_or_double, bool is_object); void GenIPut(uint32_t field_idx, int opt_flags, OpSize size, RegLocation rl_src, RegLocation rl_obj, bool is_long_or_double, bool is_object); + void GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl_index, + RegLocation rl_src); + void GenConstClass(uint32_t type_idx, RegLocation rl_dest); void GenConstString(uint32_t string_idx, RegLocation rl_dest); void GenNewInstance(uint32_t type_idx, RegLocation rl_dest); @@ -463,6 +521,10 @@ class Mir2Lir : public Backend { void CallRuntimeHelperImmRegLocationRegLocation(ThreadOffset helper_offset, int arg0, RegLocation arg1, RegLocation arg2, bool safepoint_pc); + void CallRuntimeHelperRegLocationRegLocationRegLocation(ThreadOffset helper_offset, + RegLocation arg0, RegLocation arg1, + RegLocation arg2, + bool safepoint_pc); void GenInvoke(CallInfo* info); void FlushIns(RegLocation* ArgLocs, RegLocation rl_method); int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, @@ -482,6 +544,7 @@ class Mir2Lir : public Backend { bool GenInlinedCharAt(CallInfo* info); bool GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty); + bool GenInlinedReverseBytes(CallInfo* info, OpSize size); bool GenInlinedAbsInt(CallInfo* info); bool GenInlinedAbsLong(CallInfo* info); bool GenInlinedFloatCvt(CallInfo* info); @@ -550,7 +613,6 @@ class Mir2Lir : public Backend { virtual int AllocTypedTempPair(bool fp_hint, int reg_class) = 0; virtual int S2d(int low_reg, int high_reg) = 0; virtual int TargetReg(SpecialTargetRegister reg) = 0; - virtual RegisterInfo* GetRegInfo(int reg) = 0; virtual RegLocation GetReturnAlt() = 0; virtual RegLocation GetReturnWideAlt() = 0; virtual RegLocation LocCReturn() = 0; @@ -570,9 +632,9 @@ class Mir2Lir : public Backend { virtual void CompilerInitializeRegAlloc() = 0; // Required for target - miscellaneous. - virtual AssemblerStatus AssembleInstructions(uintptr_t start_addr) = 0; + virtual void AssembleLIR() = 0; virtual void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix) = 0; - virtual void SetupTargetResourceMasks(LIR* lir) = 0; + virtual void SetupTargetResourceMasks(LIR* lir, uint64_t flags) = 0; virtual const char* GetTargetInstFmt(int opcode) = 0; virtual const char* GetTargetInstName(int opcode) = 0; virtual std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr) = 0; @@ -602,6 +664,8 @@ class Mir2Lir : public Backend { virtual bool GenInlinedCas32(CallInfo* info, bool need_write_barrier) = 0; virtual bool GenInlinedMinMaxInt(CallInfo* info, bool is_min) = 0; virtual bool GenInlinedSqrt(CallInfo* info) = 0; + virtual bool GenInlinedPeek(CallInfo* info, OpSize size) = 0; + virtual bool GenInlinedPoke(CallInfo* info, OpSize size) = 0; virtual void GenNegLong(RegLocation rl_dest, RegLocation rl_src) = 0; virtual void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) = 0; @@ -621,46 +685,40 @@ class Mir2Lir : public Backend { virtual void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) = 0; virtual void GenExitSequence() = 0; - virtual void GenFillArrayData(uint32_t table_offset, + virtual void GenFillArrayData(DexOffset table_offset, RegLocation rl_src) = 0; virtual void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) = 0; virtual void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) = 0; virtual void GenSelect(BasicBlock* bb, MIR* mir) = 0; virtual void GenMemBarrier(MemBarrierKind barrier_kind) = 0; - virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src) = 0; - virtual void GenMonitorExit(int opt_flags, RegLocation rl_src) = 0; virtual void GenMoveException(RegLocation rl_dest) = 0; virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, int first_bit, int second_bit) = 0; virtual void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) = 0; virtual void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) = 0; - virtual void GenPackedSwitch(MIR* mir, uint32_t table_offset, + virtual void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) = 0; - virtual void GenSparseSwitch(MIR* mir, uint32_t table_offset, + virtual void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) = 0; virtual void GenSpecialCase(BasicBlock* bb, MIR* mir, SpecialCaseHandler special_case) = 0; - virtual void GenArrayObjPut(int opt_flags, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) = 0; virtual void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_dest, int scale) = 0; virtual void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) = 0; + RegLocation rl_index, RegLocation rl_src, int scale, + bool card_mark) = 0; virtual void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift) = 0; // Required for target - single operation generators. virtual LIR* OpUnconditionalBranch(LIR* target) = 0; - virtual LIR* OpCmpBranch(ConditionCode cond, int src1, int src2, - LIR* target) = 0; - virtual LIR* OpCmpImmBranch(ConditionCode cond, int reg, int check_value, - LIR* target) = 0; + virtual LIR* OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target) = 0; + virtual LIR* OpCmpImmBranch(ConditionCode cond, int reg, int check_value, LIR* target) = 0; virtual LIR* OpCondBranch(ConditionCode cc, LIR* target) = 0; - virtual LIR* OpDecAndBranch(ConditionCode c_code, int reg, - LIR* target) = 0; + virtual LIR* OpDecAndBranch(ConditionCode c_code, int reg, LIR* target) = 0; virtual LIR* OpFpRegCopy(int r_dest, int r_src) = 0; virtual LIR* OpIT(ConditionCode cond, const char* guide) = 0; virtual LIR* OpMem(OpKind op, int rBase, int disp) = 0; @@ -672,22 +730,23 @@ class Mir2Lir : public Backend { virtual LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset) = 0; virtual LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2) = 0; virtual LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value) = 0; - virtual LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, - int r_src2) = 0; + virtual LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2) = 0; virtual LIR* OpTestSuspend(LIR* target) = 0; virtual LIR* OpThreadMem(OpKind op, ThreadOffset thread_offset) = 0; virtual LIR* OpVldm(int rBase, int count) = 0; virtual LIR* OpVstm(int rBase, int count) = 0; - virtual void OpLea(int rBase, int reg1, int reg2, int scale, - int offset) = 0; - virtual void OpRegCopyWide(int dest_lo, int dest_hi, int src_lo, - int src_hi) = 0; + virtual void OpLea(int rBase, int reg1, int reg2, int scale, int offset) = 0; + virtual void OpRegCopyWide(int dest_lo, int dest_hi, int src_lo, int src_hi) = 0; virtual void OpTlsCmp(ThreadOffset offset, int val) = 0; virtual bool InexpensiveConstantInt(int32_t value) = 0; virtual bool InexpensiveConstantFloat(int32_t value) = 0; virtual bool InexpensiveConstantLong(int64_t value) = 0; virtual bool InexpensiveConstantDouble(int64_t value) = 0; + // May be optimized by targets. + virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src); + virtual void GenMonitorExit(int opt_flags, RegLocation rl_src); + // Temp workaround void Workaround7250540(RegLocation rl_dest, int value); @@ -718,6 +777,7 @@ class Mir2Lir : public Backend { LIR* literal_list_; // Constants. LIR* method_literal_list_; // Method literals requiring patching. LIR* code_literal_list_; // Code literals requiring patching. + LIR* first_fixup_; // Doubly-linked list of LIR nodes requiring fixups. protected: CompilationUnit* const cu_; @@ -727,7 +787,9 @@ class Mir2Lir : public Backend { GrowableArray<LIR*> throw_launchpads_; GrowableArray<LIR*> suspend_launchpads_; GrowableArray<LIR*> intrinsic_launchpads_; - SafeMap<unsigned int, LIR*> boundary_map_; // boundary lookup cache. + GrowableArray<RegisterInfo*> tempreg_info_; + GrowableArray<RegisterInfo*> reginfo_map_; + GrowableArray<void*> pointer_storage_; /* * Holds mapping from native PC to dex PC for safepoints where we may deoptimize. * Native PC is on the return address of the safepointed operation. Dex PC is for @@ -739,8 +801,9 @@ class Mir2Lir : public Backend { * immediately preceed the instruction. */ std::vector<uint32_t> dex2pc_mapping_table_; - int data_offset_; // starting offset of literal pool. - int total_size_; // header + code size. + CodeOffset current_code_offset_; // Working byte offset of machine instructons. + CodeOffset data_offset_; // starting offset of literal pool. + size_t total_size_; // header + code size. LIR* block_label_list_; PromotionMap* promotion_map_; /* @@ -752,7 +815,8 @@ class Mir2Lir : public Backend { * in the CompilationUnit struct before codegen for each instruction. * The low-level LIR creation utilites will pull it from here. Rework this. */ - int current_dalvik_offset_; + DexOffset current_dalvik_offset_; + size_t estimated_native_code_size_; // Just an estimate; used to reserve code_buffer_ size. RegisterPool* reg_pool_; /* * Sanity checking for the register temp tracking. The same ssa diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index 71b74a4a68..41a57afca1 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -28,13 +28,9 @@ namespace art { * live until it is either explicitly killed or reallocated. */ void Mir2Lir::ResetRegPool() { - for (int i = 0; i < reg_pool_->num_core_regs; i++) { - if (reg_pool_->core_regs[i].is_temp) - reg_pool_->core_regs[i].in_use = false; - } - for (int i = 0; i < reg_pool_->num_fp_regs; i++) { - if (reg_pool_->FPRegs[i].is_temp) - reg_pool_->FPRegs[i].in_use = false; + GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_); + for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) { + info->in_use = false; } // Reset temp tracking sanity check. if (kIsDebugBuild) { @@ -48,13 +44,21 @@ void Mir2Lir::ResetRegPool() { */ void Mir2Lir::CompilerInitPool(RegisterInfo* regs, int* reg_nums, int num) { for (int i = 0; i < num; i++) { - regs[i].reg = reg_nums[i]; + uint32_t reg_number = reg_nums[i]; + regs[i].reg = reg_number; regs[i].in_use = false; regs[i].is_temp = false; regs[i].pair = false; regs[i].live = false; regs[i].dirty = false; regs[i].s_reg = INVALID_SREG; + size_t map_size = reginfo_map_.Size(); + if (reg_number >= map_size) { + for (uint32_t i = 0; i < ((reg_number - map_size) + 1); i++) { + reginfo_map_.Insert(NULL); + } + } + reginfo_map_.Put(reg_number, ®s[i]); } } @@ -62,10 +66,9 @@ void Mir2Lir::DumpRegPool(RegisterInfo* p, int num_regs) { LOG(INFO) << "================================================"; for (int i = 0; i < num_regs; i++) { LOG(INFO) << StringPrintf( - "R[%d]: T:%d, U:%d, P:%d, p:%d, LV:%d, D:%d, SR:%d, ST:%x, EN:%x", + "R[%d]: T:%d, U:%d, P:%d, p:%d, LV:%d, D:%d, SR:%d", p[i].reg, p[i].is_temp, p[i].in_use, p[i].pair, p[i].partner, - p[i].live, p[i].dirty, p[i].s_reg, reinterpret_cast<uintptr_t>(p[i].def_start), - reinterpret_cast<uintptr_t>(p[i].def_end)); + p[i].live, p[i].dirty, p[i].s_reg); } LOG(INFO) << "================================================"; } @@ -170,17 +173,12 @@ void Mir2Lir::RecordFpPromotion(int reg, int s_reg) { promotion_map_[p_map_idx].FpReg = reg; } -/* - * Reserve a callee-save fp single register. Try to fullfill request for - * even/odd allocation, but go ahead and allocate anything if not - * available. If nothing's available, return -1. - */ -int Mir2Lir::AllocPreservedSingle(int s_reg, bool even) { - int res = -1; +// Reserve a callee-save fp single register. +int Mir2Lir::AllocPreservedSingle(int s_reg) { + int res = -1; // Return code if none available. RegisterInfo* FPRegs = reg_pool_->FPRegs; for (int i = 0; i < reg_pool_->num_fp_regs; i++) { - if (!FPRegs[i].is_temp && !FPRegs[i].in_use && - ((FPRegs[i].reg & 0x1) == 0) == even) { + if (!FPRegs[i].is_temp && !FPRegs[i].in_use) { res = FPRegs[i].reg; RecordFpPromotion(res, s_reg); break; @@ -246,26 +244,6 @@ int Mir2Lir::AllocPreservedDouble(int s_reg) { return res; } - -/* - * Reserve a callee-save fp register. If this register can be used - * as the first of a double, attempt to allocate an even pair of fp - * single regs (but if can't still attempt to allocate a single, preferring - * first to allocate an odd register. - */ -int Mir2Lir::AllocPreservedFPReg(int s_reg, bool double_start) { - int res = -1; - if (double_start) { - res = AllocPreservedDouble(s_reg); - } - if (res == -1) { - res = AllocPreservedSingle(s_reg, false /* try odd # */); - } - if (res == -1) - res = AllocPreservedSingle(s_reg, true /* try even # */); - return res; -} - int Mir2Lir::AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp, bool required) { int next = *next_temp; @@ -379,7 +357,7 @@ Mir2Lir::RegisterInfo* Mir2Lir::AllocLiveBody(RegisterInfo* p, int num_regs, int if (s_reg == -1) return NULL; for (int i = 0; i < num_regs; i++) { - if (p[i].live && (p[i].s_reg == s_reg)) { + if ((p[i].s_reg == s_reg) && p[i].live) { if (p[i].is_temp) p[i].in_use = true; return &p[i]; @@ -412,47 +390,16 @@ Mir2Lir::RegisterInfo* Mir2Lir::AllocLive(int s_reg, int reg_class) { } void Mir2Lir::FreeTemp(int reg) { - RegisterInfo* p = reg_pool_->core_regs; - int num_regs = reg_pool_->num_core_regs; - for (int i = 0; i< num_regs; i++) { - if (p[i].reg == reg) { - if (p[i].is_temp) { - p[i].in_use = false; - } - p[i].pair = false; - return; - } - } - p = reg_pool_->FPRegs; - num_regs = reg_pool_->num_fp_regs; - for (int i = 0; i< num_regs; i++) { - if (p[i].reg == reg) { - if (p[i].is_temp) { - p[i].in_use = false; - } - p[i].pair = false; - return; - } + RegisterInfo* p = GetRegInfo(reg); + if (p->is_temp) { + p->in_use = false; } - LOG(FATAL) << "Tried to free a non-existant temp: r" << reg; + p->pair = false; } Mir2Lir::RegisterInfo* Mir2Lir::IsLive(int reg) { - RegisterInfo* p = reg_pool_->core_regs; - int num_regs = reg_pool_->num_core_regs; - for (int i = 0; i< num_regs; i++) { - if (p[i].reg == reg) { - return p[i].live ? &p[i] : NULL; - } - } - p = reg_pool_->FPRegs; - num_regs = reg_pool_->num_fp_regs; - for (int i = 0; i< num_regs; i++) { - if (p[i].reg == reg) { - return p[i].live ? &p[i] : NULL; - } - } - return NULL; + RegisterInfo* p = GetRegInfo(reg); + return p->live ? p : NULL; } Mir2Lir::RegisterInfo* Mir2Lir::IsTemp(int reg) { @@ -476,27 +423,10 @@ bool Mir2Lir::IsDirty(int reg) { * allocated. Use with caution. */ void Mir2Lir::LockTemp(int reg) { - RegisterInfo* p = reg_pool_->core_regs; - int num_regs = reg_pool_->num_core_regs; - for (int i = 0; i< num_regs; i++) { - if (p[i].reg == reg) { - DCHECK(p[i].is_temp); - p[i].in_use = true; - p[i].live = false; - return; - } - } - p = reg_pool_->FPRegs; - num_regs = reg_pool_->num_fp_regs; - for (int i = 0; i< num_regs; i++) { - if (p[i].reg == reg) { - DCHECK(p[i].is_temp); - p[i].in_use = true; - p[i].live = false; - return; - } - } - LOG(FATAL) << "Tried to lock a non-existant temp: r" << reg; + RegisterInfo* p = GetRegInfo(reg); + DCHECK(p->is_temp); + p->in_use = true; + p->live = false; } void Mir2Lir::ResetDef(int reg) { @@ -599,11 +529,13 @@ void Mir2Lir::ResetDefTracking() { } void Mir2Lir::ClobberAllRegs() { - for (int i = 0; i< reg_pool_->num_core_regs; i++) { - ClobberBody(®_pool_->core_regs[i]); - } - for (int i = 0; i< reg_pool_->num_fp_regs; i++) { - ClobberBody(®_pool_->FPRegs[i]); + GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_); + for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) { + info->live = false; + info->s_reg = INVALID_SREG; + info->def_start = NULL; + info->def_end = NULL; + info->pair = false; } } @@ -659,11 +591,13 @@ void Mir2Lir::MarkLive(int reg, int s_reg) { void Mir2Lir::MarkTemp(int reg) { RegisterInfo* info = GetRegInfo(reg); + tempreg_info_.Insert(info); info->is_temp = true; } void Mir2Lir::UnmarkTemp(int reg) { RegisterInfo* info = GetRegInfo(reg); + tempreg_info_.Delete(info); info->is_temp = false; } @@ -834,9 +768,9 @@ RegLocation Mir2Lir::UpdateRawLoc(RegLocation loc) { RegLocation Mir2Lir::EvalLocWide(RegLocation loc, int reg_class, bool update) { DCHECK(loc.wide); - int new_regs; - int low_reg; - int high_reg; + int32_t new_regs; + int32_t low_reg; + int32_t high_reg; loc = UpdateLocWide(loc); @@ -912,18 +846,22 @@ RegLocation Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) { } /* USE SSA names to count references of base Dalvik v_regs. */ -void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts) { +void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) { for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) { RegLocation loc = mir_graph_->reg_location_[i]; RefCounts* counts = loc.fp ? fp_counts : core_counts; int p_map_idx = SRegToPMap(loc.s_reg_low); - // Don't count easily regenerated immediates - if (loc.fp || !IsInexpensiveConstant(loc)) { + if (loc.fp) { + if (loc.wide) { + // Treat doubles as a unit, using upper half of fp_counts array. + counts[p_map_idx + num_regs].count += mir_graph_->GetUseCount(i); + i++; + } else { + counts[p_map_idx].count += mir_graph_->GetUseCount(i); + } + } else if (!IsInexpensiveConstant(loc)) { counts[p_map_idx].count += mir_graph_->GetUseCount(i); } - if (loc.wide && loc.fp && !loc.high_word) { - counts[p_map_idx].double_start = true; - } } } @@ -942,7 +880,11 @@ static int SortCounts(const void *val1, const void *val2) { void Mir2Lir::DumpCounts(const RefCounts* arr, int size, const char* msg) { LOG(INFO) << msg; for (int i = 0; i < size; i++) { - LOG(INFO) << "s_reg[" << arr[i].s_reg << "]: " << arr[i].count; + if ((arr[i].s_reg & STARTING_DOUBLE_SREG) != 0) { + LOG(INFO) << "s_reg[D" << (arr[i].s_reg & ~STARTING_DOUBLE_SREG) << "]: " << arr[i].count; + } else { + LOG(INFO) << "s_reg[" << arr[i].s_reg << "]: " << arr[i].count; + } } } @@ -965,7 +907,7 @@ void Mir2Lir::DoPromotion() { * count based on original Dalvik register name. Count refs * separately based on type in order to give allocation * preference to fp doubles - which must be allocated sequential - * physical single fp registers started with an even-numbered + * physical single fp registers starting with an even-numbered * reg. * TUNING: replace with linear scan once we have the ability * to describe register live ranges for GC. @@ -974,7 +916,7 @@ void Mir2Lir::DoPromotion() { static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * num_regs, ArenaAllocator::kAllocRegAlloc)); RefCounts *FpRegs = - static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * num_regs, + static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * num_regs * 2, ArenaAllocator::kAllocRegAlloc)); // Set ssa names for original Dalvik registers for (int i = 0; i < dalvik_regs; i++) { @@ -982,46 +924,49 @@ void Mir2Lir::DoPromotion() { } // Set ssa name for Method* core_regs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg(); - FpRegs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg(); // For consistecy + FpRegs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg(); // For consistecy. + FpRegs[dalvik_regs + num_regs].s_reg = mir_graph_->GetMethodSReg(); // for consistency. // Set ssa names for compiler_temps for (int i = 1; i <= cu_->num_compiler_temps; i++) { CompilerTemp* ct = mir_graph_->compiler_temps_.Get(i); core_regs[dalvik_regs + i].s_reg = ct->s_reg; FpRegs[dalvik_regs + i].s_reg = ct->s_reg; + FpRegs[num_regs + dalvik_regs + i].s_reg = ct->s_reg; } - // Sum use counts of SSA regs by original Dalvik vreg. - CountRefs(core_regs, FpRegs); - - /* - * Ideally, we'd allocate doubles starting with an even-numbered - * register. Bias the counts to try to allocate any vreg that's - * used as the start of a pair first. - */ + // Duplicate in upper half to represent possible fp double starting sregs. for (int i = 0; i < num_regs; i++) { - if (FpRegs[i].double_start) { - FpRegs[i].count *= 2; - } + FpRegs[num_regs + i].s_reg = FpRegs[i].s_reg | STARTING_DOUBLE_SREG; } + // Sum use counts of SSA regs by original Dalvik vreg. + CountRefs(core_regs, FpRegs, num_regs); + + // Sort the count arrays qsort(core_regs, num_regs, sizeof(RefCounts), SortCounts); - qsort(FpRegs, num_regs, sizeof(RefCounts), SortCounts); + qsort(FpRegs, num_regs * 2, sizeof(RefCounts), SortCounts); if (cu_->verbose) { DumpCounts(core_regs, num_regs, "Core regs after sort"); - DumpCounts(FpRegs, num_regs, "Fp regs after sort"); + DumpCounts(FpRegs, num_regs * 2, "Fp regs after sort"); } if (!(cu_->disable_opt & (1 << kPromoteRegs))) { // Promote FpRegs - for (int i = 0; (i < num_regs) && (FpRegs[i].count >= promotion_threshold); i++) { - int p_map_idx = SRegToPMap(FpRegs[i].s_reg); - if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) { - int reg = AllocPreservedFPReg(FpRegs[i].s_reg, - FpRegs[i].double_start); + for (int i = 0; (i < (num_regs * 2)) && (FpRegs[i].count >= promotion_threshold); i++) { + int p_map_idx = SRegToPMap(FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG); + if ((FpRegs[i].s_reg & STARTING_DOUBLE_SREG) != 0) { + if ((promotion_map_[p_map_idx].fp_location != kLocPhysReg) && + (promotion_map_[p_map_idx + 1].fp_location != kLocPhysReg)) { + int low_sreg = FpRegs[i].s_reg & ~STARTING_DOUBLE_SREG; + // Ignore result - if can't alloc double may still be able to alloc singles. + AllocPreservedDouble(low_sreg); + } + } else if (promotion_map_[p_map_idx].fp_location != kLocPhysReg) { + int reg = AllocPreservedSingle(FpRegs[i].s_reg); if (reg < 0) { - break; // No more left + break; // No more left. } } } diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index e8834320a9..2047f30765 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -246,6 +246,8 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, UNARY_ENCODING_MAP(Idivmod, 0x7, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"), #undef UNARY_ENCODING_MAP + { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0, { 0, 0, 0x0F, 0xC8, 0, 0, 0, 0 }, "Bswap32R", "!0r" }, + #define EXT_0F_ENCODING_MAP(opname, prefix, opcode, reg_def) \ { kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \ { kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \ @@ -362,6 +364,7 @@ static size_t ComputeSize(const X86EncodingMap* entry, int base, int displacemen } int X86Mir2Lir::GetInsnSize(LIR* lir) { + DCHECK(!IsPseudoLirOp(lir->opcode)); const X86EncodingMap* entry = &X86Mir2Lir::EncodingMap[lir->opcode]; switch (entry->kind) { case kData: @@ -370,6 +373,8 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { return lir->operands[0]; // length of nop is sole operand case kNullary: return 1; // 1 byte of opcode + case kRegOpcode: // lir operands - 0: reg + return ComputeSize(entry, 0, 0, false) - 1; // substract 1 for modrm case kReg: // lir operands - 0: reg return ComputeSize(entry, 0, 0, false); case kMem: // lir operands - 0: base, 1: disp @@ -513,6 +518,33 @@ void X86Mir2Lir::EmitDisp(int base, int disp) { } } +void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg) { + if (entry->skeleton.prefix1 != 0) { + code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + code_buffer_.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + code_buffer_.push_back(entry->skeleton.opcode); + if (entry->skeleton.opcode == 0x0F) { + code_buffer_.push_back(entry->skeleton.extra_opcode1); + // There's no 3-byte instruction with +rd + DCHECK_NE(0x38, entry->skeleton.extra_opcode1); + DCHECK_NE(0x3A, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } else { + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + } + DCHECK(!X86_FPREG(reg)); + DCHECK_LT(reg, 8); + code_buffer_.back() += reg; + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); +} + void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) { if (entry->skeleton.prefix1 != 0) { code_buffer_.push_back(entry->skeleton.prefix1); @@ -525,7 +557,7 @@ void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) { code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -582,7 +614,7 @@ void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -595,7 +627,9 @@ void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, reg = reg & X86_FP_REG_MASK; } if (reg >= 4) { - DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << static_cast<int>(reg) + DCHECK(strchr(entry->name, '8') == NULL || + entry->opcode == kX86Movzx8RM || entry->opcode == kX86Movsx8RM) + << entry->name << " " << static_cast<int>(reg) << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file); } DCHECK_LT(reg, 8); @@ -631,7 +665,7 @@ void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -672,7 +706,7 @@ void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int dis code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -712,7 +746,7 @@ void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t r code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -749,7 +783,7 @@ void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry, code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -808,7 +842,7 @@ void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) { code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -858,7 +892,7 @@ void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int disp, int imm) { code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -923,7 +957,7 @@ void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int i } if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -1037,7 +1071,7 @@ void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -1066,7 +1100,7 @@ void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int disp) { code_buffer_.push_back(entry->skeleton.opcode); if (entry->skeleton.opcode == 0x0F) { code_buffer_.push_back(entry->skeleton.extra_opcode1); - if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode2 == 0x3A) { + if (entry->skeleton.extra_opcode1 == 0x38 || entry->skeleton.extra_opcode1 == 0x3A) { code_buffer_.push_back(entry->skeleton.extra_opcode2); } else { DCHECK_EQ(0, entry->skeleton.extra_opcode2); @@ -1089,11 +1123,13 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg, int base_or_table, uint8_t index, int scale, int table_or_disp) { int disp; if (entry->opcode == kX86PcRelLoadRA) { - Mir2Lir::SwitchTable *tab_rec = reinterpret_cast<Mir2Lir::SwitchTable*>(table_or_disp); + Mir2Lir::EmbeddedData *tab_rec = + reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(table_or_disp)); disp = tab_rec->offset; } else { DCHECK(entry->opcode == kX86PcRelAdr); - Mir2Lir::FillArrayData *tab_rec = reinterpret_cast<Mir2Lir::FillArrayData*>(base_or_table); + Mir2Lir::EmbeddedData *tab_rec = + reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(base_or_table)); disp = tab_rec->offset; } if (entry->skeleton.prefix1 != 0) { @@ -1160,13 +1196,13 @@ void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) { * instruction. In those cases we will try to substitute a new code * sequence or request that the trace be shortened and retried. */ -AssemblerStatus X86Mir2Lir::AssembleInstructions(uintptr_t start_addr) { +AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { LIR *lir; AssemblerStatus res = kSuccess; // Assume success const bool kVerbosePcFixup = false; for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { - if (lir->opcode < 0) { + if (IsPseudoLirOp(lir->opcode)) { continue; } @@ -1174,19 +1210,19 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(uintptr_t start_addr) { continue; } - if (lir->flags.pcRelFixup) { + if (lir->flags.fixup != kFixupNone) { switch (lir->opcode) { case kX86Jcc8: { LIR *target_lir = lir->target; DCHECK(target_lir != NULL); int delta = 0; - uintptr_t pc; + CodeOffset pc; if (IS_SIMM8(lir->operands[0])) { pc = lir->offset + 2 /* opcode + rel8 */; } else { pc = lir->offset + 6 /* 2 byte opcode + rel32 */; } - uintptr_t target = target_lir->offset; + CodeOffset target = target_lir->offset; delta = target - pc; if (IS_SIMM8(delta) != IS_SIMM8(lir->operands[0])) { if (kVerbosePcFixup) { @@ -1210,8 +1246,8 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(uintptr_t start_addr) { case kX86Jcc32: { LIR *target_lir = lir->target; DCHECK(target_lir != NULL); - uintptr_t pc = lir->offset + 6 /* 2 byte opcode + rel32 */; - uintptr_t target = target_lir->offset; + CodeOffset pc = lir->offset + 6 /* 2 byte opcode + rel32 */; + CodeOffset target = target_lir->offset; int delta = target - pc; if (kVerbosePcFixup) { LOG(INFO) << "Source:"; @@ -1227,17 +1263,17 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(uintptr_t start_addr) { LIR *target_lir = lir->target; DCHECK(target_lir != NULL); int delta = 0; - uintptr_t pc; + CodeOffset pc; if (IS_SIMM8(lir->operands[0])) { pc = lir->offset + 2 /* opcode + rel8 */; } else { pc = lir->offset + 5 /* opcode + rel32 */; } - uintptr_t target = target_lir->offset; + CodeOffset target = target_lir->offset; delta = target - pc; if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && delta == 0) { // Useless branch - lir->flags.is_nop = true; + NopLIR(lir); if (kVerbosePcFixup) { LOG(INFO) << "Retry for useless branch at " << lir->offset; } @@ -1256,8 +1292,8 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(uintptr_t start_addr) { case kX86Jmp32: { LIR *target_lir = lir->target; DCHECK(target_lir != NULL); - uintptr_t pc = lir->offset + 5 /* opcode + rel32 */; - uintptr_t target = target_lir->offset; + CodeOffset pc = lir->offset + 5 /* opcode + rel32 */; + CodeOffset target = target_lir->offset; int delta = target - pc; lir->operands[0] = delta; break; @@ -1298,6 +1334,9 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(uintptr_t start_addr) { DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); break; + case kRegOpcode: // lir operands - 0: reg + EmitOpRegOpcode(entry, lir->operands[0]); + break; case kReg: // lir operands - 0: reg EmitOpReg(entry, lir->operands[0]); break; @@ -1385,4 +1424,101 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(uintptr_t start_addr) { return res; } +// LIR offset assignment. +// TODO: consolidate w/ Arm assembly mechanism. +int X86Mir2Lir::AssignInsnOffsets() { + LIR* lir; + int offset = 0; + + for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) { + lir->offset = offset; + if (LIKELY(!IsPseudoLirOp(lir->opcode))) { + if (!lir->flags.is_nop) { + offset += lir->flags.size; + } + } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) { + if (offset & 0x2) { + offset += 2; + lir->operands[0] = 1; + } else { + lir->operands[0] = 0; + } + } + /* Pseudo opcodes don't consume space */ + } + return offset; +} + +/* + * Walk the compilation unit and assign offsets to instructions + * and literals and compute the total size of the compiled unit. + * TODO: consolidate w/ Arm assembly mechanism. + */ +void X86Mir2Lir::AssignOffsets() { + int offset = AssignInsnOffsets(); + + /* Const values have to be word aligned */ + offset = (offset + 3) & ~3; + + /* Set up offsets for literals */ + data_offset_ = offset; + + offset = AssignLiteralOffset(offset); + + offset = AssignSwitchTablesOffset(offset); + + offset = AssignFillArrayDataOffset(offset); + + total_size_ = offset; +} + +/* + * Go over each instruction in the list and calculate the offset from the top + * before sending them off to the assembler. If out-of-range branch distance is + * seen rearrange the instructions a bit to correct it. + * TODO: consolidate w/ Arm assembly mechanism. + */ +void X86Mir2Lir::AssembleLIR() { + cu_->NewTimingSplit("Assemble"); + AssignOffsets(); + int assembler_retries = 0; + /* + * Assemble here. Note that we generate code with optimistic assumptions + * and if found now to work, we'll have to redo the sequence and retry. + */ + + while (true) { + AssemblerStatus res = AssembleInstructions(0); + if (res == kSuccess) { + break; + } else { + assembler_retries++; + if (assembler_retries > MAX_ASSEMBLER_RETRIES) { + CodegenDump(); + LOG(FATAL) << "Assembler error - too many retries"; + } + // Redo offsets and try again + AssignOffsets(); + code_buffer_.clear(); + } + } + + cu_->NewTimingSplit("LiteralData"); + // Install literals + InstallLiteralPools(); + + // Install switch tables + InstallSwitchTables(); + + // Install fill array data + InstallFillArrayData(); + + // Create the mapping table and native offset to reference map. + cu_->NewTimingSplit("PcMappingTable"); + CreateMappingTables(); + + cu_->NewTimingSplit("GcMap"); + CreateNativeGcMap(); +} + } // namespace art diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index 2be2aa9a0e..17924b0f08 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -31,15 +31,15 @@ void X86Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, * The sparse table in the literal pool is an array of <key,displacement> * pairs. */ -void X86Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, +void X86Mir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpSparseSwitchTable(table); } int entries = table[1]; - const int* keys = reinterpret_cast<const int*>(&table[2]); - const int* targets = &keys[entries]; + const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]); + const int32_t* targets = &keys[entries]; rl_src = LoadValue(rl_src, kCoreReg); for (int i = 0; i < entries; i++) { int key = keys[i]; @@ -66,15 +66,15 @@ void X86Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset, * jmp r_start_of_method * done: */ -void X86Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, +void X86Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; if (cu_->verbose) { DumpPackedSwitchTable(table); } // Add the table to the list - we'll process it later - SwitchTable *tab_rec = - static_cast<SwitchTable *>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); + SwitchTable* tab_rec = + static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), ArenaAllocator::kAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; int size = table[1]; @@ -103,8 +103,7 @@ void X86Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, // Load the displacement from the switch table int disp_reg = AllocTemp(); - NewLIR5(kX86PcRelLoadRA, disp_reg, start_of_method_reg, keyReg, 2, - reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR5(kX86PcRelLoadRA, disp_reg, start_of_method_reg, keyReg, 2, WrapPointer(tab_rec)); // Add displacement to start of method OpRegReg(kOpAdd, start_of_method_reg, disp_reg); // ..and go! @@ -126,10 +125,10 @@ void X86Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset, * * Total size is 4+(width * size + 1)/2 16-bit code units. */ -void X86Mir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { +void X86Mir2Lir::GenFillArrayData(DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset; // Add the table to the list - we'll process it later - FillArrayData *tab_rec = + FillArrayData* tab_rec = static_cast<FillArrayData*>(arena_->Alloc(sizeof(FillArrayData), ArenaAllocator::kAllocData)); tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; @@ -144,49 +143,12 @@ void X86Mir2Lir::GenFillArrayData(uint32_t table_offset, RegLocation rl_src) { LoadValueDirectFixed(rl_src, rX86_ARG0); // Materialize a pointer to the fill data image NewLIR1(kX86StartOfMethod, rX86_ARG2); - NewLIR2(kX86PcRelAdr, rX86_ARG1, reinterpret_cast<uintptr_t>(tab_rec)); + NewLIR2(kX86PcRelAdr, rX86_ARG1, WrapPointer(tab_rec)); NewLIR2(kX86Add32RR, rX86_ARG1, rX86_ARG2); CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pHandleFillArrayData), rX86_ARG0, rX86_ARG1, true); } -void X86Mir2Lir::GenMonitorEnter(int opt_flags, RegLocation rl_src) { - FlushAllRegs(); - LoadValueDirectFixed(rl_src, rCX); // Get obj - LockCallTemps(); // Prepare for explicit register usage - GenNullCheck(rl_src.s_reg_low, rCX, opt_flags); - // If lock is unheld, try to grab it quickly with compare and exchange - // TODO: copy and clear hash state? - NewLIR2(kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value()); - NewLIR2(kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT); - NewLIR2(kX86Xor32RR, rAX, rAX); - NewLIR3(kX86LockCmpxchgMR, rCX, mirror::Object::MonitorOffset().Int32Value(), rDX); - LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); - // If lock is held, go the expensive route - artLockObjectFromCode(self, obj); - CallRuntimeHelperReg(QUICK_ENTRYPOINT_OFFSET(pLockObject), rCX, true); - branch->target = NewLIR0(kPseudoTargetLabel); -} - -void X86Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { - FlushAllRegs(); - LoadValueDirectFixed(rl_src, rAX); // Get obj - LockCallTemps(); // Prepare for explicit register usage - GenNullCheck(rl_src.s_reg_low, rAX, opt_flags); - // If lock is held by the current thread, clear it to quickly release it - // TODO: clear hash state? - NewLIR2(kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value()); - NewLIR2(kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT); - NewLIR3(kX86Mov32RM, rCX, rAX, mirror::Object::MonitorOffset().Int32Value()); - OpRegReg(kOpSub, rCX, rDX); - LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); - NewLIR3(kX86Mov32MR, rAX, mirror::Object::MonitorOffset().Int32Value(), rCX); - LIR* branch2 = NewLIR1(kX86Jmp8, 0); - branch->target = NewLIR0(kPseudoTargetLabel); - // Otherwise, go the expensive route - UnlockObjectFromCode(obj); - CallRuntimeHelperReg(QUICK_ENTRYPOINT_OFFSET(pUnlockObject), rAX, true); - branch2->target = NewLIR0(kPseudoTargetLabel); -} - void X86Mir2Lir::GenMoveException(RegLocation rl_dest) { int ex_offset = Thread::ExceptionOffset().Int32Value(); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 478654d0b4..1d6509eea5 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -52,7 +52,6 @@ class X86Mir2Lir : public Mir2Lir { int AllocTypedTempPair(bool fp_hint, int reg_class); int S2d(int low_reg, int high_reg); int TargetReg(SpecialTargetRegister reg); - RegisterInfo* GetRegInfo(int reg); RegLocation GetReturnAlt(); RegLocation GetReturnWideAlt(); RegLocation LocCReturn(); @@ -72,9 +71,12 @@ class X86Mir2Lir : public Mir2Lir { void CompilerInitializeRegAlloc(); // Required for target - miscellaneous. - AssemblerStatus AssembleInstructions(uintptr_t start_addr); + void AssembleLIR(); + int AssignInsnOffsets(); + void AssignOffsets(); + AssemblerStatus AssembleInstructions(CodeOffset start_addr); void DumpResourceMask(LIR* lir, uint64_t mask, const char* prefix); - void SetupTargetResourceMasks(LIR* lir); + void SetupTargetResourceMasks(LIR* lir, uint64_t flags); const char* GetTargetInstFmt(int opcode); const char* GetTargetInstName(int opcode); std::string BuildInsnString(const char* fmt, LIR* lir, unsigned char* base_addr); @@ -86,14 +88,12 @@ class X86Mir2Lir : public Mir2Lir { // Required for target - Dalvik-level generators. void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); - void GenArrayObjPut(int opt_flags, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale); void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index, RegLocation rl_dest, int scale); void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale); + RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark); void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, - RegLocation rl_src1, RegLocation rl_shift); + RegLocation rl_src1, RegLocation rl_shift); void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -107,6 +107,8 @@ class X86Mir2Lir : public Mir2Lir { bool GenInlinedCas32(CallInfo* info, bool need_write_barrier); bool GenInlinedMinMaxInt(CallInfo* info, bool is_min); bool GenInlinedSqrt(CallInfo* info); + bool GenInlinedPeek(CallInfo* info, OpSize size); + bool GenInlinedPoke(CallInfo* info, OpSize size); void GenNegLong(RegLocation rl_dest, RegLocation rl_src); void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); @@ -119,20 +121,18 @@ class X86Mir2Lir : public Mir2Lir { void GenDivZeroCheck(int reg_lo, int reg_hi); void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method); void GenExitSequence(); - void GenFillArrayData(uint32_t table_offset, RegLocation rl_src); + void GenFillArrayData(DexOffset table_offset, RegLocation rl_src); void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double); void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir); void GenSelect(BasicBlock* bb, MIR* mir); void GenMemBarrier(MemBarrierKind barrier_kind); - void GenMonitorEnter(int opt_flags, RegLocation rl_src); - void GenMonitorExit(int opt_flags, RegLocation rl_src); void GenMoveException(RegLocation rl_dest); void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, int first_bit, int second_bit); void GenNegDouble(RegLocation rl_dest, RegLocation rl_src); void GenNegFloat(RegLocation rl_dest, RegLocation rl_src); - void GenPackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); - void GenSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src); + void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); + void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src); void GenSpecialCase(BasicBlock* bb, MIR* mir, SpecialCaseHandler special_case); // Single operation generators. @@ -172,6 +172,7 @@ class X86Mir2Lir : public Mir2Lir { private: void EmitDisp(int base, int disp); + void EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg); void EmitOpReg(const X86EncodingMap* entry, uint8_t reg); void EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp); void EmitMemReg(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg); diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc index f736b5e28f..c9d6bfc8cc 100644 --- a/compiler/dex/quick/x86/fp_x86.cc +++ b/compiler/dex/quick/x86/fp_x86.cc @@ -284,8 +284,8 @@ void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest, void X86Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) { - LIR* taken = &block_label_list_[bb->taken->id]; - LIR* not_taken = &block_label_list_[bb->fall_through->id]; + LIR* taken = &block_label_list_[bb->taken]; + LIR* not_taken = &block_label_list_[bb->fall_through]; LIR* branch = NULL; RegLocation rl_src1; RegLocation rl_src2; diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 14be7dde90..499547bb37 100644 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -166,7 +166,7 @@ void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { } void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { - LIR* taken = &block_label_list_[bb->taken->id]; + LIR* taken = &block_label_list_[bb->taken]; RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); FlushAllRegs(); @@ -236,6 +236,43 @@ bool X86Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { return true; } +bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { + RegLocation rl_src_address = info->args[0]; // long address + rl_src_address.wide = 0; // ignore high half in info->args[1] + RegLocation rl_dest = InlineTarget(info); + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + if (size == kLong) { + // Unaligned access is allowed on x86. + LoadBaseDispWide(rl_address.low_reg, 0, rl_result.low_reg, rl_result.high_reg, INVALID_SREG); + StoreValueWide(rl_dest, rl_result); + } else { + DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); + // Unaligned access is allowed on x86. + LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, size, INVALID_SREG); + StoreValue(rl_dest, rl_result); + } + return true; +} + +bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { + RegLocation rl_src_address = info->args[0]; // long address + rl_src_address.wide = 0; // ignore high half in info->args[1] + RegLocation rl_src_value = info->args[2]; // [size] value + RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); + if (size == kLong) { + // Unaligned access is allowed on x86. + RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); + StoreBaseDispWide(rl_address.low_reg, 0, rl_value.low_reg, rl_value.high_reg); + } else { + DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); + // Unaligned access is allowed on x86. + RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); + StoreBaseDisp(rl_address.low_reg, 0, rl_value.low_reg, size); + } + return true; +} + void X86Mir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) { NewLIR5(kX86Lea32RA, rBase, reg1, reg2, scale, offset); } @@ -419,7 +456,7 @@ void X86Mir2Lir::OpRegThreadMem(OpKind op, int r_dest, ThreadOffset thread_offse * Generate array load */ void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_dest, int scale) { + RegLocation rl_index, RegLocation rl_dest, int scale) { RegisterClass reg_class = oat_reg_class_by_size(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; @@ -466,7 +503,7 @@ void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, * */ void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) { + RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { RegisterClass reg_class = oat_reg_class_by_size(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); int data_offset; @@ -502,59 +539,10 @@ void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, StoreBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, data_offset, rl_src.low_reg, rl_src.high_reg, size, INVALID_SREG); } -} - -/* - * Generate array store - * - */ -void X86Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, - RegLocation rl_index, RegLocation rl_src, int scale) { - int len_offset = mirror::Array::LengthOffset().Int32Value(); - int data_offset = mirror::Array::DataOffset(sizeof(mirror::Object*)).Int32Value(); - - FlushAllRegs(); // Use explicit registers - LockCallTemps(); - - int r_value = TargetReg(kArg0); // Register holding value - int r_array_class = TargetReg(kArg1); // Register holding array's Class - int r_array = TargetReg(kArg2); // Register holding array - int r_index = TargetReg(kArg3); // Register holding index into array - - LoadValueDirectFixed(rl_array, r_array); // Grab array - LoadValueDirectFixed(rl_src, r_value); // Grab value - LoadValueDirectFixed(rl_index, r_index); // Grab index - - GenNullCheck(rl_array.s_reg_low, r_array, opt_flags); // NPE? - - // Store of null? - LIR* null_value_check = OpCmpImmBranch(kCondEq, r_value, 0, NULL); - - // Get the array's class. - LoadWordDisp(r_array, mirror::Object::ClassOffset().Int32Value(), r_array_class); - CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCanPutArrayElement), r_value, - r_array_class, true); - // Redo LoadValues in case they didn't survive the call. - LoadValueDirectFixed(rl_array, r_array); // Reload array - LoadValueDirectFixed(rl_index, r_index); // Reload index - LoadValueDirectFixed(rl_src, r_value); // Reload value - r_array_class = INVALID_REG; - - // Branch here if value to be stored == null - LIR* target = NewLIR0(kPseudoTargetLabel); - null_value_check->target = target; - - // make an extra temp available for card mark below - FreeTemp(TargetReg(kArg1)); - if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { - /* if (rl_index >= [rl_array + len_offset]) goto kThrowArrayBounds */ - GenRegMemCheck(kCondUge, r_index, r_array, len_offset, kThrowArrayBounds); - } - StoreBaseIndexedDisp(r_array, r_index, scale, - data_offset, r_value, INVALID_REG, kWord, INVALID_SREG); - FreeTemp(r_index); - if (!mir_graph_->IsConstantNullRef(rl_src)) { - MarkGCCard(r_value, r_array); + if (card_mark) { + // Free rl_index if its a temp. Ensures there are 2 free regs for card mark. + FreeTemp(rl_index.low_reg); + MarkGCCard(rl_src.low_reg, rl_array.low_reg); } } diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 26accab360..878fa769b6 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -85,6 +85,8 @@ int X86Mir2Lir::TargetReg(SpecialTargetRegister reg) { case kRet0: res = rX86_RET0; break; case kRet1: res = rX86_RET1; break; case kInvokeTgt: res = rX86_INVOKE_TGT; break; + case kHiddenArg: res = rAX; break; + case kHiddenFpArg: res = fr0; break; case kCount: res = rX86_COUNT; break; } return res; @@ -132,37 +134,36 @@ uint64_t X86Mir2Lir::GetPCUseDefEncoding() { return 0ULL; } -void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir) { +void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { DCHECK_EQ(cu_->instruction_set, kX86); + DCHECK(!lir->flags.use_def_invalid); // X86-specific resource map setup here. - uint64_t flags = X86Mir2Lir::EncodingMap[lir->opcode].flags; - if (flags & REG_USE_SP) { - lir->use_mask |= ENCODE_X86_REG_SP; + lir->u.m.use_mask |= ENCODE_X86_REG_SP; } if (flags & REG_DEF_SP) { - lir->def_mask |= ENCODE_X86_REG_SP; + lir->u.m.def_mask |= ENCODE_X86_REG_SP; } if (flags & REG_DEFA) { - SetupRegMask(&lir->def_mask, rAX); + SetupRegMask(&lir->u.m.def_mask, rAX); } if (flags & REG_DEFD) { - SetupRegMask(&lir->def_mask, rDX); + SetupRegMask(&lir->u.m.def_mask, rDX); } if (flags & REG_USEA) { - SetupRegMask(&lir->use_mask, rAX); + SetupRegMask(&lir->u.m.use_mask, rAX); } if (flags & REG_USEC) { - SetupRegMask(&lir->use_mask, rCX); + SetupRegMask(&lir->u.m.use_mask, rCX); } if (flags & REG_USED) { - SetupRegMask(&lir->use_mask, rDX); + SetupRegMask(&lir->u.m.use_mask, rDX); } } @@ -224,7 +225,7 @@ std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char buf += StringPrintf("%d", operand); break; case 'p': { - SwitchTable *tab_rec = reinterpret_cast<SwitchTable*>(operand); + EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand)); buf += StringPrintf("0x%08x", tab_rec->offset); break; } @@ -239,7 +240,7 @@ std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char break; case 't': buf += StringPrintf("0x%08x (L%p)", - reinterpret_cast<uint32_t>(base_addr) + reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand, lir->target); break; default: @@ -275,8 +276,8 @@ void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix } /* Memory bits */ if (x86LIR && (mask & ENCODE_DALVIK_REG)) { - sprintf(buf + strlen(buf), "dr%d%s", x86LIR->alias_info & 0xffff, - (x86LIR->alias_info & 0x80000000) ? "(+1)" : ""); + sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info), + (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : ""); } if (mask & ENCODE_LITERAL) { strcat(buf, "lit "); @@ -375,11 +376,6 @@ RegLocation X86Mir2Lir::GetReturnAlt() { return res; } -X86Mir2Lir::RegisterInfo* X86Mir2Lir::GetRegInfo(int reg) { - return X86_FPREG(reg) ? ®_pool_->FPRegs[reg & X86_FP_REG_MASK] - : ®_pool_->core_regs[reg]; -} - /* To be used when explicitly managing register use */ void X86Mir2Lir::LockCallTemps() { LockTemp(rX86_ARG0); @@ -530,14 +526,17 @@ int X86Mir2Lir::LoadHelper(ThreadOffset offset) { } uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return X86Mir2Lir::EncodingMap[opcode].flags; } const char* X86Mir2Lir::GetTargetInstName(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return X86Mir2Lir::EncodingMap[opcode].name; } const char* X86Mir2Lir::GetTargetInstFmt(int opcode) { + DCHECK(!IsPseudoLirOp(opcode)); return X86Mir2Lir::EncodingMap[opcode].fmt; } diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index c519bfec44..6ec7ebb91a 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -117,6 +117,7 @@ LIR* X86Mir2Lir::OpReg(OpKind op, int r_dest_src) { switch (op) { case kOpNeg: opcode = kX86Neg32R; break; case kOpNot: opcode = kX86Not32R; break; + case kOpRev: opcode = kX86Bswap32R; break; case kOpBlx: opcode = kX86CallR; break; default: LOG(FATAL) << "Bad case in OpReg " << op; @@ -161,6 +162,13 @@ LIR* X86Mir2Lir::OpRegReg(OpKind op, int r_dest_src1, int r_src2) { case kOpNeg: OpRegCopy(r_dest_src1, r_src2); return OpReg(kOpNeg, r_dest_src1); + case kOpRev: + OpRegCopy(r_dest_src1, r_src2); + return OpReg(kOpRev, r_dest_src1); + case kOpRevsh: + OpRegCopy(r_dest_src1, r_src2); + OpReg(kOpRev, r_dest_src1); + return OpRegImm(kOpAsr, r_dest_src1, 16); // X86 binary opcodes case kOpSub: opcode = kX86Sub32RR; break; case kOpSbc: opcode = kX86Sbb32RR; break; diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 643a3d5b8f..3518131cfe 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -243,7 +243,7 @@ enum X86OpCode { // - lir operands - 0: base, 1: disp, 2: immediate // AI - Array Immediate - opcode [base + index * scale + disp], #immediate // - lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate - // TI - Thread Register - opcode fs:[disp], imm - where fs: is equal to Thread::Current() + // TI - Thread Immediate - opcode fs:[disp], imm - where fs: is equal to Thread::Current() // - lir operands - 0: disp, 1: imm #define BinaryOpCode(opcode) \ opcode ## 8MR, opcode ## 8AR, opcode ## 8TR, \ @@ -313,6 +313,7 @@ enum X86OpCode { UnaryOpcode(kX86Imul, DaR, DaM, DaA), UnaryOpcode(kX86Divmod, DaR, DaM, DaA), UnaryOpcode(kX86Idivmod, DaR, DaM, DaA), + kX86Bswap32R, #undef UnaryOpcode #define Binary0fOpCode(opcode) \ opcode ## RR, opcode ## RM, opcode ## RA @@ -381,6 +382,7 @@ enum X86EncodingKind { kData, // Special case for raw data. kNop, // Special case for variable length nop. kNullary, // Opcode that takes no arguments. + kRegOpcode, // Shorter form of R instruction kind (opcode+rd) kReg, kMem, kArray, // R, M and A instruction kinds. kMemReg, kArrayReg, kThreadReg, // MR, AR and TR instruction kinds. kRegReg, kRegMem, kRegArray, kRegThread, // RR, RM, RA and RT instruction kinds. diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc index cd1602f674..0d8bd07f40 100644 --- a/compiler/dex/ssa_transformation.cc +++ b/compiler/dex/ssa_transformation.cc @@ -22,7 +22,7 @@ namespace art { void MIRGraph::ClearAllVisitedFlags() { - AllNodesIterator iter(this, false /* not iterative */); + AllNodesIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { bb->visited = false; } @@ -38,18 +38,18 @@ BasicBlock* MIRGraph::NeedsVisit(BasicBlock* bb) { } BasicBlock* MIRGraph::NextUnvisitedSuccessor(BasicBlock* bb) { - BasicBlock* res = NeedsVisit(bb->fall_through); + BasicBlock* res = NeedsVisit(GetBasicBlock(bb->fall_through)); if (res == NULL) { - res = NeedsVisit(bb->taken); + res = NeedsVisit(GetBasicBlock(bb->taken)); if (res == NULL) { - if (bb->successor_block_list.block_list_type != kNotUsed) { - GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bb->successor_block_list.blocks); + if (bb->successor_block_list_type != kNotUsed) { + GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bb->successor_blocks); while (true) { SuccessorBlockInfo *sbi = iterator.Next(); if (sbi == NULL) { break; } - res = NeedsVisit(sbi->block); + res = NeedsVisit(GetBasicBlock(sbi->block)); if (res != NULL) { break; } @@ -63,7 +63,9 @@ BasicBlock* MIRGraph::NextUnvisitedSuccessor(BasicBlock* bb) { void MIRGraph::MarkPreOrder(BasicBlock* block) { block->visited = true; /* Enqueue the pre_order block id */ - dfs_order_->Insert(block->id); + if (block->id != NullBasicBlockId) { + dfs_order_->Insert(block->id); + } } void MIRGraph::RecordDFSOrders(BasicBlock* block) { @@ -79,7 +81,9 @@ void MIRGraph::RecordDFSOrders(BasicBlock* block) { continue; } curr->dfs_id = dfs_post_order_->Size(); - dfs_post_order_->Insert(curr->id); + if (curr->id != NullBasicBlockId) { + dfs_post_order_->Insert(curr->id); + } succ.pop_back(); } } @@ -88,7 +92,8 @@ void MIRGraph::RecordDFSOrders(BasicBlock* block) { void MIRGraph::ComputeDFSOrders() { /* Initialize or reset the DFS pre_order list */ if (dfs_order_ == NULL) { - dfs_order_ = new (arena_) GrowableArray<int>(arena_, GetNumBlocks(), kGrowableArrayDfsOrder); + dfs_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, GetNumBlocks(), + kGrowableArrayDfsOrder); } else { /* Just reset the used length on the counter */ dfs_order_->Reset(); @@ -96,7 +101,8 @@ void MIRGraph::ComputeDFSOrders() { /* Initialize or reset the DFS post_order list */ if (dfs_post_order_ == NULL) { - dfs_post_order_ = new (arena_) GrowableArray<int>(arena_, GetNumBlocks(), kGrowableArrayDfsPostOrder); + dfs_post_order_ = new (arena_) GrowableArray<BasicBlockId>(arena_, GetNumBlocks(), + kGrowableArrayDfsPostOrder); } else { /* Just reset the used length on the counter */ dfs_post_order_->Reset(); @@ -145,11 +151,11 @@ void MIRGraph::ComputeDefBlockMatrix() { def_block_matrix_[i] = new (arena_) ArenaBitVector(arena_, GetNumBlocks(), false, kBitMapBMatrix); } - AllNodesIterator iter(this, false /* not iterative */); + AllNodesIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { FindLocalLiveIn(bb); } - AllNodesIterator iter2(this, false /* not iterative */); + AllNodesIterator iter2(this); for (BasicBlock* bb = iter2.Next(); bb != NULL; bb = iter2.Next()) { FillDefBlockMatrix(bb); } @@ -169,7 +175,7 @@ void MIRGraph::ComputeDomPostOrderTraversal(BasicBlock* bb) { if (dom_post_order_traversal_ == NULL) { // First time - create the array. dom_post_order_traversal_ = - new (arena_) GrowableArray<int>(arena_, num_reachable_blocks_, + new (arena_) GrowableArray<BasicBlockId>(arena_, num_reachable_blocks_, kGrowableArrayDomPostOrderTraversal); } else { dom_post_order_traversal_->Reset(); @@ -177,9 +183,9 @@ void MIRGraph::ComputeDomPostOrderTraversal(BasicBlock* bb) { ClearAllVisitedFlags(); std::vector<std::pair<BasicBlock*, ArenaBitVector::Iterator*> > work_stack; bb->visited = true; - work_stack.push_back(std::make_pair(bb, new (arena_) ArenaBitVector::Iterator(bb->i_dominated))); + work_stack.push_back(std::make_pair(bb, bb->i_dominated->GetIterator())); while (!work_stack.empty()) { - std::pair<BasicBlock*, ArenaBitVector::Iterator*> curr = work_stack.back(); + const std::pair<BasicBlock*, ArenaBitVector::Iterator*>& curr = work_stack.back(); BasicBlock* curr_bb = curr.first; ArenaBitVector::Iterator* curr_idom_iter = curr.second; int bb_idx = curr_idom_iter->Next(); @@ -190,14 +196,17 @@ void MIRGraph::ComputeDomPostOrderTraversal(BasicBlock* bb) { BasicBlock* new_bb = GetBasicBlock(bb_idx); new_bb->visited = true; work_stack.push_back( - std::make_pair(new_bb, new (arena_) ArenaBitVector::Iterator(new_bb->i_dominated))); + std::make_pair(new_bb, new_bb->i_dominated->GetIterator())); } else { // no successor/next - dom_post_order_traversal_->Insert(curr_bb->id); + if (curr_bb->id != NullBasicBlockId) { + dom_post_order_traversal_->Insert(curr_bb->id); + } work_stack.pop_back(); /* hacky loop detection */ - if (curr_bb->taken && curr_bb->dominators->IsBitSet(curr_bb->taken->id)) { + if ((curr_bb->taken != NullBasicBlockId) && curr_bb->dominators->IsBitSet(curr_bb->taken)) { + curr_bb->nesting_depth++; attributes_ |= METHOD_HAS_LOOP; } } @@ -210,7 +219,7 @@ void MIRGraph::CheckForDominanceFrontier(BasicBlock* dom_bb, * TODO - evaluate whether phi will ever need to be inserted into exit * blocks. */ - if (succ_bb->i_dom != dom_bb && + if (succ_bb->i_dom != dom_bb->id && succ_bb->block_type == kDalvikByteCode && succ_bb->hidden == false) { dom_bb->dom_frontier->SetBit(succ_bb->id); @@ -220,20 +229,20 @@ void MIRGraph::CheckForDominanceFrontier(BasicBlock* dom_bb, /* Worker function to compute the dominance frontier */ bool MIRGraph::ComputeDominanceFrontier(BasicBlock* bb) { /* Calculate DF_local */ - if (bb->taken) { - CheckForDominanceFrontier(bb, bb->taken); + if (bb->taken != NullBasicBlockId) { + CheckForDominanceFrontier(bb, GetBasicBlock(bb->taken)); } - if (bb->fall_through) { - CheckForDominanceFrontier(bb, bb->fall_through); + if (bb->fall_through != NullBasicBlockId) { + CheckForDominanceFrontier(bb, GetBasicBlock(bb->fall_through)); } - if (bb->successor_block_list.block_list_type != kNotUsed) { - GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bb->successor_block_list.blocks); + if (bb->successor_block_list_type != kNotUsed) { + GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bb->successor_blocks); while (true) { SuccessorBlockInfo *successor_block_info = iterator.Next(); if (successor_block_info == NULL) { break; } - BasicBlock* succ_bb = successor_block_info->block; + BasicBlock* succ_bb = GetBasicBlock(successor_block_info->block); CheckForDominanceFrontier(bb, succ_bb); } } @@ -306,17 +315,17 @@ int MIRGraph::FindCommonParent(int block1, int block2) { /* Worker function to compute each block's immediate dominator */ bool MIRGraph::ComputeblockIDom(BasicBlock* bb) { /* Special-case entry block */ - if (bb == GetEntryBlock()) { + if ((bb->id == NullBasicBlockId) || (bb == GetEntryBlock())) { return false; } /* Iterate through the predecessors */ - GrowableArray<BasicBlock*>::Iterator iter(bb->predecessors); + GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors); /* Find the first processed predecessor */ int idom = -1; while (true) { - BasicBlock* pred_bb = iter.Next(); + BasicBlock* pred_bb = GetBasicBlock(iter.Next()); CHECK(pred_bb != NULL); if (i_dom_list_[pred_bb->dfs_id] != NOTVISITED) { idom = pred_bb->dfs_id; @@ -326,7 +335,7 @@ bool MIRGraph::ComputeblockIDom(BasicBlock* bb) { /* Scan the rest of the predecessors */ while (true) { - BasicBlock* pred_bb = iter.Next(); + BasicBlock* pred_bb = GetBasicBlock(iter.Next()); if (!pred_bb) { break; } @@ -352,7 +361,7 @@ bool MIRGraph::ComputeBlockDominators(BasicBlock* bb) { if (bb == GetEntryBlock()) { bb->dominators->ClearAllBits(); } else { - bb->dominators->Copy(bb->i_dom->dominators); + bb->dominators->Copy(GetBasicBlock(bb->i_dom)->dominators); } bb->dominators->SetBit(bb->id); return false; @@ -364,7 +373,7 @@ bool MIRGraph::SetDominators(BasicBlock* bb) { DCHECK_NE(idom_dfs_idx, NOTVISITED); int i_dom_idx = dfs_post_order_->Get(idom_dfs_idx); BasicBlock* i_dom = GetBasicBlock(i_dom_idx); - bb->i_dom = i_dom; + bb->i_dom = i_dom->id; /* Add bb to the i_dominated set of the immediate dominator block */ i_dom->i_dominated->SetBit(bb->id); } @@ -377,7 +386,7 @@ void MIRGraph::ComputeDominators() { int num_total_blocks = GetBasicBlockListCount(); /* Initialize domination-related data structures */ - ReachableNodesIterator iter(this, false /* not iterative */); + PreOrderDfsIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) { InitializeDominationInfo(bb); } @@ -396,7 +405,7 @@ void MIRGraph::ComputeDominators() { i_dom_list_[GetEntryBlock()->dfs_id] = GetEntryBlock()->dfs_id; /* Compute the immediate dominators */ - ReversePostOrderDfsIterator iter2(this, true /* iterative */); + RepeatingReversePostOrderDfsIterator iter2(this); bool change = false; for (BasicBlock* bb = iter2.Next(false); bb != NULL; bb = iter2.Next(change)) { change = ComputeblockIDom(bb); @@ -412,21 +421,21 @@ void MIRGraph::ComputeDominators() { } else { temp_block_v_->ClearAllBits(); } - GetEntryBlock()->i_dom = NULL; + GetEntryBlock()->i_dom = 0; - ReachableNodesIterator iter3(this, false /* not iterative */); + PreOrderDfsIterator iter3(this); for (BasicBlock* bb = iter3.Next(); bb != NULL; bb = iter3.Next()) { SetDominators(bb); } - ReversePostOrderDfsIterator iter4(this, false /* not iterative */); + ReversePostOrderDfsIterator iter4(this); for (BasicBlock* bb = iter4.Next(); bb != NULL; bb = iter4.Next()) { ComputeBlockDominators(bb); } // Compute the dominance frontier for each block. ComputeDomPostOrderTraversal(GetEntryBlock()); - PostOrderDOMIterator iter5(this, false /* not iterative */); + PostOrderDOMIterator iter5(this); for (BasicBlock* bb = iter5.Next(); bb != NULL; bb = iter5.Next()) { ComputeDominanceFrontier(bb); } @@ -463,20 +472,22 @@ bool MIRGraph::ComputeBlockLiveIns(BasicBlock* bb) { return false; } temp_dalvik_register_v->Copy(bb->data_flow_info->live_in_v); - if (bb->taken && bb->taken->data_flow_info) - ComputeSuccLineIn(temp_dalvik_register_v, bb->taken->data_flow_info->live_in_v, + BasicBlock* bb_taken = GetBasicBlock(bb->taken); + BasicBlock* bb_fall_through = GetBasicBlock(bb->fall_through); + if (bb_taken && bb_taken->data_flow_info) + ComputeSuccLineIn(temp_dalvik_register_v, bb_taken->data_flow_info->live_in_v, bb->data_flow_info->def_v); - if (bb->fall_through && bb->fall_through->data_flow_info) - ComputeSuccLineIn(temp_dalvik_register_v, bb->fall_through->data_flow_info->live_in_v, + if (bb_fall_through && bb_fall_through->data_flow_info) + ComputeSuccLineIn(temp_dalvik_register_v, bb_fall_through->data_flow_info->live_in_v, bb->data_flow_info->def_v); - if (bb->successor_block_list.block_list_type != kNotUsed) { - GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bb->successor_block_list.blocks); + if (bb->successor_block_list_type != kNotUsed) { + GrowableArray<SuccessorBlockInfo*>::Iterator iterator(bb->successor_blocks); while (true) { SuccessorBlockInfo *successor_block_info = iterator.Next(); if (successor_block_info == NULL) { break; } - BasicBlock* succ_bb = successor_block_info->block; + BasicBlock* succ_bb = GetBasicBlock(successor_block_info->block); if (succ_bb->data_flow_info) { ComputeSuccLineIn(temp_dalvik_register_v, succ_bb->data_flow_info->live_in_v, bb->data_flow_info->def_v); @@ -503,7 +514,7 @@ void MIRGraph::InsertPhiNodes() { temp_dalvik_register_v_ = new (arena_) ArenaBitVector(arena_, cu_->num_dalvik_registers, false, kBitMapRegisterV); - PostOrderDfsIterator iter(this, true /* iterative */); + RepeatingPostOrderDfsIterator iter(this); bool change = false; for (BasicBlock* bb = iter.Next(false); bb != NULL; bb = iter.Next(change)) { change = ComputeBlockLiveIns(bb); @@ -579,50 +590,37 @@ void MIRGraph::InsertPhiNodes() { * predecessor blocks */ bool MIRGraph::InsertPhiNodeOperands(BasicBlock* bb) { - MIR *mir; - std::vector<int> uses; - std::vector<int> incoming_arc; - /* Phi nodes are at the beginning of each block */ - for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { + for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { if (mir->dalvikInsn.opcode != static_cast<Instruction::Code>(kMirOpPhi)) return true; int ssa_reg = mir->ssa_rep->defs[0]; DCHECK_GE(ssa_reg, 0); // Shouldn't see compiler temps here int v_reg = SRegToVReg(ssa_reg); - uses.clear(); - incoming_arc.clear(); - /* Iterate through the predecessors */ - GrowableArray<BasicBlock*>::Iterator iter(bb->predecessors); + GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors); + size_t num_uses = bb->predecessors->Size(); + mir->ssa_rep->num_uses = num_uses; + int* uses = static_cast<int*>(arena_->Alloc(sizeof(int) * num_uses, + ArenaAllocator::kAllocDFInfo)); + mir->ssa_rep->uses = uses; + mir->ssa_rep->fp_use = + static_cast<bool*>(arena_->Alloc(sizeof(bool) * num_uses, ArenaAllocator::kAllocDFInfo)); + BasicBlockId* incoming = + static_cast<BasicBlockId*>(arena_->Alloc(sizeof(BasicBlockId) * num_uses, + ArenaAllocator::kAllocDFInfo)); + mir->meta.phi_incoming = incoming; + int idx = 0; while (true) { - BasicBlock* pred_bb = iter.Next(); + BasicBlock* pred_bb = GetBasicBlock(iter.Next()); if (!pred_bb) { break; } int ssa_reg = pred_bb->data_flow_info->vreg_to_ssa_map[v_reg]; - uses.push_back(ssa_reg); - incoming_arc.push_back(pred_bb->id); - } - - /* Count the number of SSA registers for a Dalvik register */ - int num_uses = uses.size(); - mir->ssa_rep->num_uses = num_uses; - mir->ssa_rep->uses = - static_cast<int*>(arena_->Alloc(sizeof(int) * num_uses, ArenaAllocator::kAllocDFInfo)); - mir->ssa_rep->fp_use = - static_cast<bool*>(arena_->Alloc(sizeof(bool) * num_uses, ArenaAllocator::kAllocDFInfo)); - int* incoming = - static_cast<int*>(arena_->Alloc(sizeof(int) * num_uses, ArenaAllocator::kAllocDFInfo)); - // TODO: Ugly, rework (but don't burden each MIR/LIR for Phi-only needs) - mir->dalvikInsn.vB = reinterpret_cast<uintptr_t>(incoming); - - /* Set the uses array for the phi node */ - int *use_ptr = mir->ssa_rep->uses; - for (int i = 0; i < num_uses; i++) { - *use_ptr++ = uses[i]; - *incoming++ = incoming_arc[i]; + uses[idx] = ssa_reg; + incoming[idx] = pred_bb->id; + idx++; } } @@ -644,24 +642,24 @@ void MIRGraph::DoDFSPreOrderSSARename(BasicBlock* block) { static_cast<int*>(arena_->Alloc(map_size, ArenaAllocator::kAllocDalvikToSSAMap)); memcpy(saved_ssa_map, vreg_to_ssa_map_, map_size); - if (block->fall_through) { - DoDFSPreOrderSSARename(block->fall_through); + if (block->fall_through != NullBasicBlockId) { + DoDFSPreOrderSSARename(GetBasicBlock(block->fall_through)); /* Restore SSA map snapshot */ memcpy(vreg_to_ssa_map_, saved_ssa_map, map_size); } - if (block->taken) { - DoDFSPreOrderSSARename(block->taken); + if (block->taken != NullBasicBlockId) { + DoDFSPreOrderSSARename(GetBasicBlock(block->taken)); /* Restore SSA map snapshot */ memcpy(vreg_to_ssa_map_, saved_ssa_map, map_size); } - if (block->successor_block_list.block_list_type != kNotUsed) { - GrowableArray<SuccessorBlockInfo*>::Iterator iterator(block->successor_block_list.blocks); + if (block->successor_block_list_type != kNotUsed) { + GrowableArray<SuccessorBlockInfo*>::Iterator iterator(block->successor_blocks); while (true) { SuccessorBlockInfo *successor_block_info = iterator.Next(); if (successor_block_info == NULL) { break; } - BasicBlock* succ_bb = successor_block_info->block; + BasicBlock* succ_bb = GetBasicBlock(successor_block_info->block); DoDFSPreOrderSSARename(succ_bb); /* Restore SSA map snapshot */ memcpy(vreg_to_ssa_map_, saved_ssa_map, map_size); @@ -700,7 +698,7 @@ void MIRGraph::SSATransformation() { new (arena_) ArenaBitVector(arena_, GetNumSSARegs(), false, kBitMapTempSSARegisterV); /* Insert phi-operands with latest SSA names from predecessor blocks */ - ReachableNodesIterator iter2(this, false /* not iterative */); + PreOrderDfsIterator iter2(this); for (BasicBlock* bb = iter2.Next(); bb != NULL; bb = iter2.Next()) { InsertPhiNodeOperands(bb); } diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc index 07f37bbbbb..32fac0b393 100644 --- a/compiler/dex/vreg_analysis.cc +++ b/compiler/dex/vreg_analysis.cc @@ -29,6 +29,16 @@ bool MIRGraph::SetFp(int index, bool is_fp) { return change; } +bool MIRGraph::SetFp(int index) { + bool change = false; + if (!reg_location_[index].fp) { + reg_location_[index].fp = true; + reg_location_[index].defined = true; + change = true; + } + return change; +} + bool MIRGraph::SetCore(int index, bool is_core) { bool change = false; if (is_core && !reg_location_[index].defined) { @@ -39,6 +49,16 @@ bool MIRGraph::SetCore(int index, bool is_core) { return change; } +bool MIRGraph::SetCore(int index) { + bool change = false; + if (!reg_location_[index].defined) { + reg_location_[index].core = true; + reg_location_[index].defined = true; + change = true; + } + return change; +} + bool MIRGraph::SetRef(int index, bool is_ref) { bool change = false; if (is_ref && !reg_location_[index].defined) { @@ -49,6 +69,16 @@ bool MIRGraph::SetRef(int index, bool is_ref) { return change; } +bool MIRGraph::SetRef(int index) { + bool change = false; + if (!reg_location_[index].defined) { + reg_location_[index].ref = true; + reg_location_[index].defined = true; + change = true; + } + return change; +} + bool MIRGraph::SetWide(int index, bool is_wide) { bool change = false; if (is_wide && !reg_location_[index].wide) { @@ -58,6 +88,15 @@ bool MIRGraph::SetWide(int index, bool is_wide) { return change; } +bool MIRGraph::SetWide(int index) { + bool change = false; + if (!reg_location_[index].wide) { + reg_location_[index].wide = true; + change = true; + } + return change; +} + bool MIRGraph::SetHigh(int index, bool is_high) { bool change = false; if (is_high && !reg_location_[index].high_word) { @@ -67,6 +106,16 @@ bool MIRGraph::SetHigh(int index, bool is_high) { return change; } +bool MIRGraph::SetHigh(int index) { + bool change = false; + if (!reg_location_[index].high_word) { + reg_location_[index].high_word = true; + change = true; + } + return change; +} + + /* * Infer types and sizes. We don't need to track change on sizes, * as it doesn't propagate. We're guaranteed at least one pass through @@ -84,21 +133,23 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb) { SSARepresentation *ssa_rep = mir->ssa_rep; if (ssa_rep) { int attrs = oat_data_flow_attributes_[mir->dalvikInsn.opcode]; + const int* uses = ssa_rep->uses; + const int* defs = ssa_rep->defs; // Handle defs if (attrs & DF_DA) { if (attrs & DF_CORE_A) { - changed |= SetCore(ssa_rep->defs[0], true); + changed |= SetCore(defs[0]); } if (attrs & DF_REF_A) { - changed |= SetRef(ssa_rep->defs[0], true); + changed |= SetRef(defs[0]); } if (attrs & DF_A_WIDE) { - reg_location_[ssa_rep->defs[0]].wide = true; - reg_location_[ssa_rep->defs[1]].wide = true; - reg_location_[ssa_rep->defs[1]].high_word = true; - DCHECK_EQ(SRegToVReg(ssa_rep->defs[0])+1, - SRegToVReg(ssa_rep->defs[1])); + reg_location_[defs[0]].wide = true; + reg_location_[defs[1]].wide = true; + reg_location_[defs[1]].high_word = true; + DCHECK_EQ(SRegToVReg(defs[0])+1, + SRegToVReg(defs[1])); } } @@ -106,17 +157,17 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb) { int next = 0; if (attrs & DF_UA) { if (attrs & DF_CORE_A) { - changed |= SetCore(ssa_rep->uses[next], true); + changed |= SetCore(uses[next]); } if (attrs & DF_REF_A) { - changed |= SetRef(ssa_rep->uses[next], true); + changed |= SetRef(uses[next]); } if (attrs & DF_A_WIDE) { - reg_location_[ssa_rep->uses[next]].wide = true; - reg_location_[ssa_rep->uses[next + 1]].wide = true; - reg_location_[ssa_rep->uses[next + 1]].high_word = true; - DCHECK_EQ(SRegToVReg(ssa_rep->uses[next])+1, - SRegToVReg(ssa_rep->uses[next + 1])); + reg_location_[uses[next]].wide = true; + reg_location_[uses[next + 1]].wide = true; + reg_location_[uses[next + 1]].high_word = true; + DCHECK_EQ(SRegToVReg(uses[next])+1, + SRegToVReg(uses[next + 1])); next += 2; } else { next++; @@ -124,17 +175,17 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb) { } if (attrs & DF_UB) { if (attrs & DF_CORE_B) { - changed |= SetCore(ssa_rep->uses[next], true); + changed |= SetCore(uses[next]); } if (attrs & DF_REF_B) { - changed |= SetRef(ssa_rep->uses[next], true); + changed |= SetRef(uses[next]); } if (attrs & DF_B_WIDE) { - reg_location_[ssa_rep->uses[next]].wide = true; - reg_location_[ssa_rep->uses[next + 1]].wide = true; - reg_location_[ssa_rep->uses[next + 1]].high_word = true; - DCHECK_EQ(SRegToVReg(ssa_rep->uses[next])+1, - SRegToVReg(ssa_rep->uses[next + 1])); + reg_location_[uses[next]].wide = true; + reg_location_[uses[next + 1]].wide = true; + reg_location_[uses[next + 1]].high_word = true; + DCHECK_EQ(SRegToVReg(uses[next])+1, + SRegToVReg(uses[next + 1])); next += 2; } else { next++; @@ -142,17 +193,17 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb) { } if (attrs & DF_UC) { if (attrs & DF_CORE_C) { - changed |= SetCore(ssa_rep->uses[next], true); + changed |= SetCore(uses[next]); } if (attrs & DF_REF_C) { - changed |= SetRef(ssa_rep->uses[next], true); + changed |= SetRef(uses[next]); } if (attrs & DF_C_WIDE) { - reg_location_[ssa_rep->uses[next]].wide = true; - reg_location_[ssa_rep->uses[next + 1]].wide = true; - reg_location_[ssa_rep->uses[next + 1]].high_word = true; - DCHECK_EQ(SRegToVReg(ssa_rep->uses[next])+1, - SRegToVReg(ssa_rep->uses[next + 1])); + reg_location_[uses[next]].wide = true; + reg_location_[uses[next + 1]].wide = true; + reg_location_[uses[next + 1]].high_word = true; + DCHECK_EQ(SRegToVReg(uses[next])+1, + SRegToVReg(uses[next + 1])); } } @@ -162,27 +213,27 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb) { (mir->dalvikInsn.opcode == Instruction::RETURN_OBJECT)) { switch (cu_->shorty[0]) { case 'I': - changed |= SetCore(ssa_rep->uses[0], true); + changed |= SetCore(uses[0]); break; case 'J': - changed |= SetCore(ssa_rep->uses[0], true); - changed |= SetCore(ssa_rep->uses[1], true); - reg_location_[ssa_rep->uses[0]].wide = true; - reg_location_[ssa_rep->uses[1]].wide = true; - reg_location_[ssa_rep->uses[1]].high_word = true; + changed |= SetCore(uses[0]); + changed |= SetCore(uses[1]); + reg_location_[uses[0]].wide = true; + reg_location_[uses[1]].wide = true; + reg_location_[uses[1]].high_word = true; break; case 'F': - changed |= SetFp(ssa_rep->uses[0], true); + changed |= SetFp(uses[0]); break; case 'D': - changed |= SetFp(ssa_rep->uses[0], true); - changed |= SetFp(ssa_rep->uses[1], true); - reg_location_[ssa_rep->uses[0]].wide = true; - reg_location_[ssa_rep->uses[1]].wide = true; - reg_location_[ssa_rep->uses[1]].high_word = true; + changed |= SetFp(uses[0]); + changed |= SetFp(uses[1]); + reg_location_[uses[0]].wide = true; + reg_location_[uses[1]].wide = true; + reg_location_[uses[1]].high_word = true; break; case 'L': - changed |= SetRef(ssa_rep->uses[0], true); + changed |= SetRef(uses[0]); break; default: break; } @@ -206,10 +257,10 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb) { SSARepresentation* tgt_rep = move_result_mir->ssa_rep; DCHECK(tgt_rep != NULL); tgt_rep->fp_def[0] = true; - changed |= SetFp(tgt_rep->defs[0], true); + changed |= SetFp(tgt_rep->defs[0]); if (shorty[0] == 'D') { tgt_rep->fp_def[1] = true; - changed |= SetFp(tgt_rep->defs[1], true); + changed |= SetFp(tgt_rep->defs[1]); } } } @@ -217,8 +268,8 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb) { // If this is a non-static invoke, mark implicit "this" if (((mir->dalvikInsn.opcode != Instruction::INVOKE_STATIC) && (mir->dalvikInsn.opcode != Instruction::INVOKE_STATIC_RANGE))) { - reg_location_[ssa_rep->uses[next]].defined = true; - reg_location_[ssa_rep->uses[next]].ref = true; + reg_location_[uses[next]].defined = true; + reg_location_[uses[next]].ref = true; next++; } uint32_t cpos = 1; @@ -229,28 +280,28 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb) { case 'D': ssa_rep->fp_use[i] = true; ssa_rep->fp_use[i+1] = true; - reg_location_[ssa_rep->uses[i]].wide = true; - reg_location_[ssa_rep->uses[i+1]].wide = true; - reg_location_[ssa_rep->uses[i+1]].high_word = true; - DCHECK_EQ(SRegToVReg(ssa_rep->uses[i])+1, SRegToVReg(ssa_rep->uses[i+1])); + reg_location_[uses[i]].wide = true; + reg_location_[uses[i+1]].wide = true; + reg_location_[uses[i+1]].high_word = true; + DCHECK_EQ(SRegToVReg(uses[i])+1, SRegToVReg(uses[i+1])); i++; break; case 'J': - reg_location_[ssa_rep->uses[i]].wide = true; - reg_location_[ssa_rep->uses[i+1]].wide = true; - reg_location_[ssa_rep->uses[i+1]].high_word = true; - DCHECK_EQ(SRegToVReg(ssa_rep->uses[i])+1, SRegToVReg(ssa_rep->uses[i+1])); - changed |= SetCore(ssa_rep->uses[i], true); + reg_location_[uses[i]].wide = true; + reg_location_[uses[i+1]].wide = true; + reg_location_[uses[i+1]].high_word = true; + DCHECK_EQ(SRegToVReg(uses[i])+1, SRegToVReg(uses[i+1])); + changed |= SetCore(uses[i]); i++; break; case 'F': ssa_rep->fp_use[i] = true; break; case 'L': - changed |= SetRef(ssa_rep->uses[i], true); + changed |= SetRef(uses[i]); break; default: - changed |= SetCore(ssa_rep->uses[i], true); + changed |= SetCore(uses[i]); break; } i++; @@ -260,11 +311,11 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb) { for (int i = 0; ssa_rep->fp_use && i< ssa_rep->num_uses; i++) { if (ssa_rep->fp_use[i]) - changed |= SetFp(ssa_rep->uses[i], true); + changed |= SetFp(uses[i]); } for (int i = 0; ssa_rep->fp_def && i< ssa_rep->num_defs; i++) { if (ssa_rep->fp_def[i]) - changed |= SetFp(ssa_rep->defs[i], true); + changed |= SetFp(defs[i]); } // Special-case handling for moves & Phi if (attrs & (DF_IS_MOVE | DF_NULL_TRANSFER_N)) { @@ -276,14 +327,14 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb) { */ bool is_phi = (static_cast<int>(mir->dalvikInsn.opcode) == kMirOpPhi); - RegLocation rl_temp = reg_location_[ssa_rep->defs[0]]; + RegLocation rl_temp = reg_location_[defs[0]]; bool defined_fp = rl_temp.defined && rl_temp.fp; bool defined_core = rl_temp.defined && rl_temp.core; bool defined_ref = rl_temp.defined && rl_temp.ref; bool is_wide = rl_temp.wide || ((attrs & DF_A_WIDE) != 0); bool is_high = is_phi && rl_temp.wide && rl_temp.high_word; for (int i = 0; i < ssa_rep->num_uses; i++) { - rl_temp = reg_location_[ssa_rep->uses[i]]; + rl_temp = reg_location_[uses[i]]; defined_fp |= rl_temp.defined && rl_temp.fp; defined_core |= rl_temp.defined && rl_temp.core; defined_ref |= rl_temp.defined && rl_temp.ref; @@ -303,26 +354,26 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb) { << " has both fp and core/ref uses for same def."; cu_->disable_opt |= (1 << kPromoteRegs); } - changed |= SetFp(ssa_rep->defs[0], defined_fp); - changed |= SetCore(ssa_rep->defs[0], defined_core); - changed |= SetRef(ssa_rep->defs[0], defined_ref); - changed |= SetWide(ssa_rep->defs[0], is_wide); - changed |= SetHigh(ssa_rep->defs[0], is_high); + changed |= SetFp(defs[0], defined_fp); + changed |= SetCore(defs[0], defined_core); + changed |= SetRef(defs[0], defined_ref); + changed |= SetWide(defs[0], is_wide); + changed |= SetHigh(defs[0], is_high); if (attrs & DF_A_WIDE) { - changed |= SetWide(ssa_rep->defs[1], true); - changed |= SetHigh(ssa_rep->defs[1], true); + changed |= SetWide(defs[1]); + changed |= SetHigh(defs[1]); } for (int i = 0; i < ssa_rep->num_uses; i++) { - changed |= SetFp(ssa_rep->uses[i], defined_fp); - changed |= SetCore(ssa_rep->uses[i], defined_core); - changed |= SetRef(ssa_rep->uses[i], defined_ref); - changed |= SetWide(ssa_rep->uses[i], is_wide); - changed |= SetHigh(ssa_rep->uses[i], is_high); + changed |= SetFp(uses[i], defined_fp); + changed |= SetCore(uses[i], defined_core); + changed |= SetRef(uses[i], defined_ref); + changed |= SetWide(uses[i], is_wide); + changed |= SetHigh(uses[i], is_high); } if (attrs & DF_A_WIDE) { DCHECK_EQ(ssa_rep->num_uses, 2); - changed |= SetWide(ssa_rep->uses[1], true); - changed |= SetHigh(ssa_rep->uses[1], true); + changed |= SetWide(uses[1]); + changed |= SetHigh(uses[1]); } } } @@ -444,7 +495,7 @@ void MIRGraph::BuildRegLocations() { } /* Do type & size inference pass */ - PreOrderDfsIterator iter(this, true /* iterative */); + RepeatingPreOrderDfsIterator iter(this); bool change = false; for (BasicBlock* bb = iter.Next(false); bb != NULL; bb = iter.Next(change)) { change = InferTypeAndSize(bb); diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index b876724f21..4871e162a3 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -64,7 +64,7 @@ static void DumpStat(size_t x, size_t y, const char* str) { if (x == 0 && y == 0) { return; } - VLOG(compiler) << Percentage(x, y) << "% of " << str << " for " << (x + y) << " cases"; + LOG(INFO) << Percentage(x, y) << "% of " << str << " for " << (x + y) << " cases"; } class AOTCompilationStats { @@ -336,10 +336,12 @@ extern "C" void compilerLLVMSetBitcodeFileName(art::CompilerDriver& driver, std::string const& filename); CompilerDriver::CompilerDriver(CompilerBackend compiler_backend, InstructionSet instruction_set, + InstructionSetFeatures instruction_set_features, bool image, DescriptorSet* image_classes, size_t thread_count, bool dump_stats) : compiler_backend_(compiler_backend), instruction_set_(instruction_set), + instruction_set_features_(instruction_set_features), freezing_constructor_lock_("freezing constructor lock"), compiled_classes_lock_("compiled classes lock"), compiled_methods_lock_("compiled method lock"), @@ -355,7 +357,11 @@ CompilerDriver::CompilerDriver(CompilerBackend compiler_backend, InstructionSet jni_compiler_(NULL), compiler_enable_auto_elf_loading_(NULL), compiler_get_method_code_addr_(NULL), - support_boot_image_fixup_(true) { + support_boot_image_fixup_(instruction_set == kThumb2), + dedupe_code_("dedupe code"), + dedupe_mapping_table_("dedupe mapping table"), + dedupe_vmap_table_("dedupe vmap table"), + dedupe_gc_map_("dedupe gc map") { CHECK_PTHREAD_CALL(pthread_key_create, (&tls_key_, NULL), "compiler tls key"); @@ -465,6 +471,11 @@ const std::vector<uint8_t>* CompilerDriver::CreateJniDlsymLookup() const { return CreateTrampoline(instruction_set_, kJniAbi, JNI_ENTRYPOINT_OFFSET(pDlsymLookup)); } +const std::vector<uint8_t>* CompilerDriver::CreatePortableImtConflictTrampoline() const { + return CreateTrampoline(instruction_set_, kPortableAbi, + PORTABLE_ENTRYPOINT_OFFSET(pPortableImtConflictTrampoline)); +} + const std::vector<uint8_t>* CompilerDriver::CreatePortableResolutionTrampoline() const { return CreateTrampoline(instruction_set_, kPortableAbi, PORTABLE_ENTRYPOINT_OFFSET(pPortableResolutionTrampoline)); @@ -475,6 +486,11 @@ const std::vector<uint8_t>* CompilerDriver::CreatePortableToInterpreterBridge() PORTABLE_ENTRYPOINT_OFFSET(pPortableToInterpreterBridge)); } +const std::vector<uint8_t>* CompilerDriver::CreateQuickImtConflictTrampoline() const { + return CreateTrampoline(instruction_set_, kQuickAbi, + QUICK_ENTRYPOINT_OFFSET(pQuickImtConflictTrampoline)); +} + const std::vector<uint8_t>* CompilerDriver::CreateQuickResolutionTrampoline() const { return CreateTrampoline(instruction_set_, kQuickAbi, QUICK_ENTRYPOINT_OFFSET(pQuickResolutionTrampoline)); @@ -597,7 +613,6 @@ void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const De } bool CompilerDriver::IsImageClass(const char* descriptor) const { - DCHECK(descriptor != NULL); if (!IsImage()) { return true; } else { @@ -776,7 +791,8 @@ void CompilerDriver::UpdateImageClasses(base::TimingLogger& timings) { bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) { - if (IsImage() && IsImageClass(dex_file.GetTypeDescriptor(dex_file.GetTypeId(type_idx)))) { + if (IsImage() && + IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) { if (kIsDebugBuild) { ScopedObjectAccess soa(Thread::Current()); mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file); @@ -912,9 +928,9 @@ static mirror::ArtField* ComputeFieldReferencedFromCompilingMethod(ScopedObjectA } static mirror::ArtMethod* ComputeMethodReferencedFromCompilingMethod(ScopedObjectAccess& soa, - const DexCompilationUnit* mUnit, - uint32_t method_idx, - InvokeType type) + const DexCompilationUnit* mUnit, + uint32_t method_idx, + InvokeType type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { mirror::DexCache* dex_cache = mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile()); mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()); @@ -923,11 +939,11 @@ static mirror::ArtMethod* ComputeMethodReferencedFromCompilingMethod(ScopedObjec } bool CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, - int& field_offset, bool& is_volatile, bool is_put) { + bool is_put, int* field_offset, bool* is_volatile) { ScopedObjectAccess soa(Thread::Current()); // Conservative defaults. - field_offset = -1; - is_volatile = true; + *field_offset = -1; + *is_volatile = true; // Try to resolve field and ignore if an Incompatible Class Change Error (ie is static). mirror::ArtField* resolved_field = ComputeFieldReferencedFromCompilingMethod(soa, mUnit, field_idx); if (resolved_field != NULL && !resolved_field->IsStatic()) { @@ -954,8 +970,8 @@ bool CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompi bool is_write_to_final_from_wrong_class = is_put && resolved_field->IsFinal() && fields_class != referrer_class; if (access_ok && !is_write_to_final_from_wrong_class) { - field_offset = resolved_field->GetOffset().Int32Value(); - is_volatile = resolved_field->IsVolatile(); + *field_offset = resolved_field->GetOffset().Int32Value(); + *is_volatile = resolved_field->IsVolatile(); stats_->ResolvedInstanceField(); return true; // Fast path. } @@ -970,15 +986,14 @@ bool CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompi } bool CompilerDriver::ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, - int& field_offset, int& ssb_index, - bool& is_referrers_class, bool& is_volatile, - bool is_put) { + bool is_put, int* field_offset, int* ssb_index, + bool* is_referrers_class, bool* is_volatile) { ScopedObjectAccess soa(Thread::Current()); // Conservative defaults. - field_offset = -1; - ssb_index = -1; - is_referrers_class = false; - is_volatile = true; + *field_offset = -1; + *ssb_index = -1; + *is_referrers_class = false; + *is_volatile = true; // Try to resolve field and ignore if an Incompatible Class Change Error (ie isn't static). mirror::ArtField* resolved_field = ComputeFieldReferencedFromCompilingMethod(soa, mUnit, field_idx); if (resolved_field != NULL && resolved_field->IsStatic()) { @@ -988,9 +1003,9 @@ bool CompilerDriver::ComputeStaticFieldInfo(uint32_t field_idx, const DexCompila if (referrer_class != NULL) { mirror::Class* fields_class = resolved_field->GetDeclaringClass(); if (fields_class == referrer_class) { - is_referrers_class = true; // implies no worrying about class initialization - field_offset = resolved_field->GetOffset().Int32Value(); - is_volatile = resolved_field->IsVolatile(); + *is_referrers_class = true; // implies no worrying about class initialization + *field_offset = resolved_field->GetOffset().Int32Value(); + *is_volatile = resolved_field->IsVolatile(); stats_->ResolvedLocalStaticField(); return true; // fast path } else { @@ -1021,9 +1036,9 @@ bool CompilerDriver::ComputeStaticFieldInfo(uint32_t field_idx, const DexCompila if (fields_class->GetDexCache() == dex_cache) { // common case where the dex cache of both the referrer and the field are the same, // no need to search the dex file - ssb_index = fields_class->GetDexTypeIndex(); - field_offset = resolved_field->GetOffset().Int32Value(); - is_volatile = resolved_field->IsVolatile(); + *ssb_index = fields_class->GetDexTypeIndex(); + *field_offset = resolved_field->GetOffset().Int32Value(); + *is_volatile = resolved_field->IsVolatile(); stats_->ResolvedStaticField(); return true; } @@ -1036,9 +1051,9 @@ bool CompilerDriver::ComputeStaticFieldInfo(uint32_t field_idx, const DexCompila mUnit->GetDexFile()->FindTypeId(mUnit->GetDexFile()->GetIndexForStringId(*string_id)); if (type_id != NULL) { // medium path, needs check of static storage base being initialized - ssb_index = mUnit->GetDexFile()->GetIndexForTypeId(*type_id); - field_offset = resolved_field->GetOffset().Int32Value(); - is_volatile = resolved_field->IsVolatile(); + *ssb_index = mUnit->GetDexFile()->GetIndexForTypeId(*type_id); + *field_offset = resolved_field->GetOffset().Int32Value(); + *is_volatile = resolved_field->IsVolatile(); stats_->ResolvedStaticField(); return true; } @@ -1055,81 +1070,138 @@ bool CompilerDriver::ComputeStaticFieldInfo(uint32_t field_idx, const DexCompila return false; // Incomplete knowledge needs slow path. } -void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType type, InvokeType sharp_type, +void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType sharp_type, + bool no_guarantee_of_dex_cache_entry, mirror::Class* referrer_class, mirror::ArtMethod* method, - uintptr_t& direct_code, - uintptr_t& direct_method, - bool update_stats) { + bool update_stats, + MethodReference* target_method, + uintptr_t* direct_code, + uintptr_t* direct_method) { // For direct and static methods compute possible direct_code and direct_method values, ie // an address for the Method* being invoked and an address of the code for that Method*. // For interface calls compute a value for direct_method that is the interface method being // invoked, so this can be passed to the out-of-line runtime support code. - direct_code = 0; - direct_method = 0; + *direct_code = 0; + *direct_method = 0; + bool use_dex_cache = false; + bool compiling_boot = Runtime::Current()->GetHeap()->GetContinuousSpaces().size() == 1; if (compiler_backend_ == kPortable) { if (sharp_type != kStatic && sharp_type != kDirect) { return; } + use_dex_cache = true; } else { - if (sharp_type != kStatic && sharp_type != kDirect && sharp_type != kInterface) { + if (sharp_type != kStatic && sharp_type != kDirect) { return; } + // TODO: support patching on all architectures. + use_dex_cache = compiling_boot && !support_boot_image_fixup_; } - bool method_code_in_boot = method->GetDeclaringClass()->GetClassLoader() == NULL; - if (!method_code_in_boot) { - return; + bool method_code_in_boot = (method->GetDeclaringClass()->GetClassLoader() == nullptr); + if (!use_dex_cache) { + if (!method_code_in_boot) { + use_dex_cache = true; + } else { + bool has_clinit_trampoline = + method->IsStatic() && !method->GetDeclaringClass()->IsInitialized(); + if (has_clinit_trampoline && (method->GetDeclaringClass() != referrer_class)) { + // Ensure we run the clinit trampoline unless we are invoking a static method in the same + // class. + use_dex_cache = true; + } + } } - bool has_clinit_trampoline = method->IsStatic() && !method->GetDeclaringClass()->IsInitialized(); - if (has_clinit_trampoline && (method->GetDeclaringClass() != referrer_class)) { - // Ensure we run the clinit trampoline unless we are invoking a static method in the same class. - return; + if (update_stats && method_code_in_boot) { + stats_->DirectCallsToBoot(*type); + stats_->DirectMethodsToBoot(*type); } - if (update_stats) { - if (sharp_type != kInterface) { // Interfaces always go via a trampoline. - stats_->DirectCallsToBoot(type); + if (!use_dex_cache && compiling_boot) { + MethodHelper mh(method); + if (!IsImageClass(mh.GetDeclaringClassDescriptor())) { + // We can only branch directly to Methods that are resolved in the DexCache. + // Otherwise we won't invoke the resolution trampoline. + use_dex_cache = true; } - stats_->DirectMethodsToBoot(type); } - bool compiling_boot = Runtime::Current()->GetHeap()->GetContinuousSpaces().size() == 1; - if (compiling_boot) { - if (support_boot_image_fixup_) { - MethodHelper mh(method); - if (IsImageClass(mh.GetDeclaringClassDescriptor())) { - // We can only branch directly to Methods that are resolved in the DexCache. - // Otherwise we won't invoke the resolution trampoline. - direct_method = -1; - direct_code = -1; + // The method is defined not within this dex file. We need a dex cache slot within the current + // dex file or direct pointers. + bool must_use_direct_pointers = false; + if (target_method->dex_file == method->GetDeclaringClass()->GetDexCache()->GetDexFile()) { + target_method->dex_method_index = method->GetDexMethodIndex(); + } else { + // TODO: support patching from one dex file to another in the boot image. + use_dex_cache = use_dex_cache || compiling_boot; + if (no_guarantee_of_dex_cache_entry) { + // See if the method is also declared in this dex cache. + uint32_t dex_method_idx = MethodHelper(method).FindDexMethodIndexInOtherDexFile( + *referrer_class->GetDexCache()->GetDexFile()); + if (dex_method_idx != DexFile::kDexNoIndex) { + target_method->dex_method_index = dex_method_idx; + } else { + must_use_direct_pointers = true; } } + } + if (use_dex_cache) { + if (must_use_direct_pointers) { + // Fail. Test above showed the only safe dispatch was via the dex cache, however, the direct + // pointers are required as the dex cache lacks an appropriate entry. + VLOG(compiler) << "Dex cache devirtualization failed for: " << PrettyMethod(method); + } else { + *type = sharp_type; + } } else { - if (Runtime::Current()->GetHeap()->FindSpaceFromObject(method, false)->IsImageSpace()) { - direct_method = reinterpret_cast<uintptr_t>(method); + if (compiling_boot) { + *type = sharp_type; + *direct_method = -1; + *direct_code = -1; + } else { + bool method_in_image = + Runtime::Current()->GetHeap()->FindSpaceFromObject(method, false)->IsImageSpace(); + if (method_in_image) { + CHECK(!method->IsAbstract()); + *type = sharp_type; + *direct_method = reinterpret_cast<uintptr_t>(method); + *direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromCompiledCode()); + target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile(); + target_method->dex_method_index = method->GetDexMethodIndex(); + } else if (!must_use_direct_pointers) { + // Set the code and rely on the dex cache for the method. + *type = sharp_type; + *direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromCompiledCode()); + } else { + // Direct pointers were required but none were available. + VLOG(compiler) << "Dex cache devirtualization failed for: " << PrettyMethod(method); + } } - direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromCompiledCode()); } } bool CompilerDriver::ComputeInvokeInfo(const DexCompilationUnit* mUnit, const uint32_t dex_pc, - InvokeType& invoke_type, - MethodReference& target_method, - int& vtable_idx, - uintptr_t& direct_code, uintptr_t& direct_method, - bool update_stats) { + bool update_stats, bool enable_devirtualization, + InvokeType* invoke_type, MethodReference* target_method, + int* vtable_idx, uintptr_t* direct_code, + uintptr_t* direct_method) { ScopedObjectAccess soa(Thread::Current()); - vtable_idx = -1; - direct_code = 0; - direct_method = 0; + *vtable_idx = -1; + *direct_code = 0; + *direct_method = 0; mirror::ArtMethod* resolved_method = - ComputeMethodReferencedFromCompilingMethod(soa, mUnit, target_method.dex_method_index, - invoke_type); + ComputeMethodReferencedFromCompilingMethod(soa, mUnit, target_method->dex_method_index, + *invoke_type); if (resolved_method != NULL) { + if (*invoke_type == kVirtual || *invoke_type == kSuper) { + *vtable_idx = resolved_method->GetMethodIndex(); + } else if (*invoke_type == kInterface) { + *vtable_idx = resolved_method->GetDexMethodIndex(); + } // Don't try to fast-path if we don't understand the caller's class or this appears to be an // Incompatible Class Change Error. mirror::Class* referrer_class = ComputeCompilingMethodsClass(soa, resolved_method->GetDeclaringClass()->GetDexCache(), mUnit); - bool icce = resolved_method->CheckIncompatibleClassChange(invoke_type); + bool icce = resolved_method->CheckIncompatibleClassChange(*invoke_type); if (referrer_class != NULL && !icce) { mirror::Class* methods_class = resolved_method->GetDeclaringClass(); if (!referrer_class->CanAccess(methods_class) || @@ -1140,42 +1212,43 @@ bool CompilerDriver::ComputeInvokeInfo(const DexCompilationUnit* mUnit, const ui // method public. Resort to the dex file to determine the correct class for the access // check. uint16_t class_idx = - target_method.dex_file->GetMethodId(target_method.dex_method_index).class_idx_; - methods_class = mUnit->GetClassLinker()->ResolveType(*target_method.dex_file, + target_method->dex_file->GetMethodId(target_method->dex_method_index).class_idx_; + methods_class = mUnit->GetClassLinker()->ResolveType(*target_method->dex_file, class_idx, referrer_class); } if (referrer_class->CanAccess(methods_class) && referrer_class->CanAccessMember(methods_class, resolved_method->GetAccessFlags())) { - const bool kEnableFinalBasedSharpening = true; + const bool enableFinalBasedSharpening = enable_devirtualization; // Sharpen a virtual call into a direct call when the target is known not to have been // overridden (ie is final). bool can_sharpen_virtual_based_on_type = - (invoke_type == kVirtual) && (resolved_method->IsFinal() || methods_class->IsFinal()); + (*invoke_type == kVirtual) && (resolved_method->IsFinal() || methods_class->IsFinal()); // For invoke-super, ensure the vtable index will be correct to dispatch in the vtable of // the super class. - bool can_sharpen_super_based_on_type = (invoke_type == kSuper) && + bool can_sharpen_super_based_on_type = (*invoke_type == kSuper) && (referrer_class != methods_class) && referrer_class->IsSubClass(methods_class) && resolved_method->GetMethodIndex() < methods_class->GetVTable()->GetLength() && (methods_class->GetVTable()->Get(resolved_method->GetMethodIndex()) == resolved_method); - if (kEnableFinalBasedSharpening && (can_sharpen_virtual_based_on_type || + if (enableFinalBasedSharpening && (can_sharpen_virtual_based_on_type || can_sharpen_super_based_on_type)) { // Sharpen a virtual call into a direct call. The method_idx is into referrer's // dex cache, check that this resolved method is where we expect it. - CHECK(referrer_class->GetDexCache()->GetResolvedMethod(target_method.dex_method_index) == + CHECK(referrer_class->GetDexCache()->GetResolvedMethod(target_method->dex_method_index) == resolved_method) << PrettyMethod(resolved_method); - if (update_stats) { - stats_->ResolvedMethod(invoke_type); - stats_->VirtualMadeDirect(invoke_type); + InvokeType orig_invoke_type = *invoke_type; + GetCodeAndMethodForDirectCall(invoke_type, kDirect, false, referrer_class, resolved_method, + update_stats, target_method, direct_code, direct_method); + if (update_stats && (*invoke_type == kDirect)) { + stats_->ResolvedMethod(orig_invoke_type); + stats_->VirtualMadeDirect(orig_invoke_type); } - GetCodeAndMethodForDirectCall(invoke_type, kDirect, referrer_class, resolved_method, - direct_code, direct_method, update_stats); - invoke_type = kDirect; + DCHECK_NE(*invoke_type, kSuper) << PrettyMethod(resolved_method); return true; } - const bool kEnableVerifierBasedSharpening = true; - if (kEnableVerifierBasedSharpening && (invoke_type == kVirtual || - invoke_type == kInterface)) { + const bool enableVerifierBasedSharpening = enable_devirtualization; + if (enableVerifierBasedSharpening && (*invoke_type == kVirtual || + *invoke_type == kInterface)) { // Did the verifier record a more precise invoke target based on its type information? const MethodReference caller_method(mUnit->GetDexFile(), mUnit->GetDexMethodIndex()); const MethodReference* devirt_map_target = @@ -1192,88 +1265,27 @@ bool CompilerDriver::ComputeInvokeInfo(const DexCompilationUnit* mUnit, const ui kVirtual); CHECK(called_method != NULL); CHECK(!called_method->IsAbstract()); - GetCodeAndMethodForDirectCall(invoke_type, kDirect, referrer_class, called_method, - direct_code, direct_method, update_stats); - bool compiler_needs_dex_cache = - (GetCompilerBackend() == kPortable) || - (GetCompilerBackend() == kQuick && instruction_set_ != kThumb2) || - (direct_code == 0) || (direct_code == static_cast<unsigned int>(-1)) || - (direct_method == 0) || (direct_method == static_cast<unsigned int>(-1)); - if ((devirt_map_target->dex_file != target_method.dex_file) && - compiler_needs_dex_cache) { - // We need to use the dex cache to find either the method or code, and the dex file - // containing the method isn't the one expected for the target method. Try to find - // the method within the expected target dex file. - // TODO: the -1 could be handled as direct code if the patching new the target dex - // file. - // TODO: quick only supports direct pointers with Thumb2. - // TODO: the following should be factored into a common helper routine to find - // one dex file's method within another. - const DexFile* dexfile = target_method.dex_file; - const DexFile* cm_dexfile = - called_method->GetDeclaringClass()->GetDexCache()->GetDexFile(); - const DexFile::MethodId& cm_method_id = - cm_dexfile->GetMethodId(called_method->GetDexMethodIndex()); - const char* cm_descriptor = cm_dexfile->StringByTypeIdx(cm_method_id.class_idx_); - const DexFile::StringId* descriptor = dexfile->FindStringId(cm_descriptor); - if (descriptor != NULL) { - const DexFile::TypeId* type_id = - dexfile->FindTypeId(dexfile->GetIndexForStringId(*descriptor)); - if (type_id != NULL) { - const char* cm_name = cm_dexfile->GetMethodName(cm_method_id); - const DexFile::StringId* name = dexfile->FindStringId(cm_name); - if (name != NULL) { - uint16_t return_type_idx; - std::vector<uint16_t> param_type_idxs; - bool success = dexfile->CreateTypeList(&return_type_idx, ¶m_type_idxs, - cm_dexfile->GetMethodSignature(cm_method_id)); - if (success) { - const DexFile::ProtoId* sig = - dexfile->FindProtoId(return_type_idx, param_type_idxs); - if (sig != NULL) { - const DexFile::MethodId* method_id = dexfile->FindMethodId(*type_id, - *name, *sig); - if (method_id != NULL) { - if (update_stats) { - stats_->ResolvedMethod(invoke_type); - stats_->VirtualMadeDirect(invoke_type); - stats_->PreciseTypeDevirtualization(); - } - target_method.dex_method_index = dexfile->GetIndexForMethodId(*method_id); - invoke_type = kDirect; - return true; - } - } - } - } - } - } - // TODO: the stats for direct code and method are off as we failed to find the direct - // method in the referring method's dex cache/file. - } else { - if (update_stats) { - stats_->ResolvedMethod(invoke_type); - stats_->VirtualMadeDirect(invoke_type); - stats_->PreciseTypeDevirtualization(); - } - target_method = *devirt_map_target; - invoke_type = kDirect; - return true; + InvokeType orig_invoke_type = *invoke_type; + GetCodeAndMethodForDirectCall(invoke_type, kDirect, true, referrer_class, called_method, + update_stats, target_method, direct_code, direct_method); + if (update_stats && (*invoke_type == kDirect)) { + stats_->ResolvedMethod(orig_invoke_type); + stats_->VirtualMadeDirect(orig_invoke_type); + stats_->PreciseTypeDevirtualization(); } + DCHECK_NE(*invoke_type, kSuper); + return true; } } - if (invoke_type == kSuper) { + if (*invoke_type == kSuper) { // Unsharpened super calls are suspicious so go slow-path. } else { // Sharpening failed so generate a regular resolved method dispatch. if (update_stats) { - stats_->ResolvedMethod(invoke_type); + stats_->ResolvedMethod(*invoke_type); } - if (invoke_type == kVirtual || invoke_type == kSuper) { - vtable_idx = resolved_method->GetMethodIndex(); - } - GetCodeAndMethodForDirectCall(invoke_type, invoke_type, referrer_class, resolved_method, - direct_code, direct_method, update_stats); + GetCodeAndMethodForDirectCall(invoke_type, *invoke_type, false, referrer_class, resolved_method, + update_stats, target_method, direct_code, direct_method); return true; } } @@ -1284,7 +1296,7 @@ bool CompilerDriver::ComputeInvokeInfo(const DexCompilationUnit* mUnit, const ui soa.Self()->ClearException(); } if (update_stats) { - stats_->UnresolvedMethod(invoke_type); + stats_->UnresolvedMethod(*invoke_type); } return false; // Incomplete knowledge needs slow path. } @@ -1569,8 +1581,8 @@ static void ResolveType(const ParallelCompilationManager* manager, size_t type_i CHECK(soa.Self()->IsExceptionPending()); mirror::Throwable* exception = soa.Self()->GetException(NULL); VLOG(compiler) << "Exception during type resolution: " << exception->Dump(); - if (strcmp(ClassHelper(exception->GetClass()).GetDescriptor(), - "Ljava/lang/OutOfMemoryError;") == 0) { + if (strcmp("Ljava/lang/OutOfMemoryError;", + ClassHelper(exception->GetClass()).GetDescriptor()) == 0) { // There's little point continuing compilation if the heap is exhausted. LOG(FATAL) << "Out of memory during type resolution for compilation"; } @@ -1589,13 +1601,11 @@ void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_fil if (IsImage()) { // For images we resolve all types, such as array, whereas for applications just those with // classdefs are resolved by ResolveClassFieldsAndMethods. - // TODO: strdup memory leak. - timings.NewSplit(strdup(("Resolve " + dex_file.GetLocation() + " Types").c_str())); + timings.NewSplit("Resolve Types"); context.ForAll(0, dex_file.NumTypeIds(), ResolveType, thread_count_); } - // TODO: strdup memory leak. - timings.NewSplit(strdup(("Resolve " + dex_file.GetLocation() + " MethodsAndFields").c_str())); + timings.NewSplit("Resolve MethodsAndFields"); context.ForAll(0, dex_file.NumClassDefs(), ResolveClassFieldsAndMethods, thread_count_); } @@ -1658,8 +1668,7 @@ static void VerifyClass(const ParallelCompilationManager* manager, size_t class_ void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file, ThreadPool& thread_pool, base::TimingLogger& timings) { - // TODO: strdup memory leak. - timings.NewSplit(strdup(("Verify " + dex_file.GetLocation()).c_str())); + timings.NewSplit("Verify Dex File"); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, thread_pool); context.ForAll(0, dex_file.NumClassDefs(), VerifyClass, thread_count_); @@ -2086,10 +2095,13 @@ static const char* class_initializer_black_list[] = { static void InitializeClass(const ParallelCompilationManager* manager, size_t class_def_index) LOCKS_EXCLUDED(Locks::mutator_lock_) { ATRACE_CALL(); - const DexFile::ClassDef& class_def = manager->GetDexFile()->GetClassDef(class_def_index); + const DexFile* dex_file = manager->GetDexFile(); + const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index); + const DexFile::TypeId& class_type_id = dex_file->GetTypeId(class_def.class_idx_); + const char* descriptor = dex_file->StringDataByIdx(class_type_id.descriptor_idx_); + ScopedObjectAccess soa(Thread::Current()); mirror::ClassLoader* class_loader = soa.Decode<mirror::ClassLoader*>(manager->GetClassLoader()); - const char* descriptor = manager->GetDexFile()->GetClassDescriptor(class_def); mirror::Class* klass = manager->GetClassLinker()->FindClass(descriptor, class_loader); if (klass != NULL) { // Only try to initialize classes that were successfully verified. @@ -2120,7 +2132,7 @@ static void InitializeClass(const ParallelCompilationManager* manager, size_t cl bool is_black_listed = StringPiece(descriptor).ends_with("$NoPreloadHolder;"); if (!is_black_listed) { for (size_t i = 0; i < arraysize(class_initializer_black_list); ++i) { - if (StringPiece(descriptor) == class_initializer_black_list[i]) { + if (strcmp(descriptor, class_initializer_black_list[i]) == 0) { is_black_listed = true; break; } @@ -2128,7 +2140,7 @@ static void InitializeClass(const ParallelCompilationManager* manager, size_t cl } if (!is_black_listed) { VLOG(compiler) << "Initializing: " << descriptor; - if (StringPiece(descriptor) == "Ljava/lang/Void;") { + if (strcmp("Ljava/lang/Void;", descriptor) == 0) { // Hand initialize j.l.Void to avoid Dex file operations in un-started runtime. ObjectLock lock(soa.Self(), klass); mirror::ObjectArray<mirror::ArtField>* fields = klass->GetSFields(); @@ -2159,8 +2171,7 @@ static void InitializeClass(const ParallelCompilationManager* manager, size_t cl void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file, ThreadPool& thread_pool, base::TimingLogger& timings) { - // TODO: strdup memory leak. - timings.NewSplit(strdup(("InitializeNoClinit " + dex_file.GetLocation()).c_str())); + timings.NewSplit("InitializeNoClinit"); #ifndef NDEBUG // Sanity check blacklist descriptors. if (IsImage()) { @@ -2267,8 +2278,7 @@ void CompilerDriver::CompileClass(const ParallelCompilationManager* manager, siz void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file, ThreadPool& thread_pool, base::TimingLogger& timings) { - // TODO: strdup memory leak. - timings.NewSplit(strdup(("Compile " + dex_file.GetLocation()).c_str())); + timings.NewSplit("Compile Dex File"); ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this, &dex_file, thread_pool); context.ForAll(0, dex_file.NumClassDefs(), CompilerDriver::CompileClass, thread_count_); diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 3852acfd3b..9321f06526 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -91,6 +91,7 @@ class CompilerDriver { // can assume will be in the image, with NULL implying all available // classes. explicit CompilerDriver(CompilerBackend compiler_backend, InstructionSet instruction_set, + InstructionSetFeatures instruction_set_features, bool image, DescriptorSet* image_classes, size_t thread_count, bool dump_stats); @@ -104,10 +105,14 @@ class CompilerDriver { void CompileOne(const mirror::ArtMethod* method, base::TimingLogger& timings) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - InstructionSet GetInstructionSet() const { + const InstructionSet& GetInstructionSet() const { return instruction_set_; } + const InstructionSetFeatures& GetInstructionSetFeatures() const { + return instruction_set_features_; + } + CompilerBackend GetCompilerBackend() const { return compiler_backend_; } @@ -130,10 +135,14 @@ class CompilerDriver { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); const std::vector<uint8_t>* CreateJniDlsymLookup() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + const std::vector<uint8_t>* CreatePortableImtConflictTrampoline() const + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); const std::vector<uint8_t>* CreatePortableResolutionTrampoline() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); const std::vector<uint8_t>* CreatePortableToInterpreterBridge() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + const std::vector<uint8_t>* CreateQuickImtConflictTrampoline() const + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); const std::vector<uint8_t>* CreateQuickResolutionTrampoline() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); const std::vector<uint8_t>* CreateQuickToInterpreterBridge() const @@ -170,22 +179,23 @@ class CompilerDriver { LOCKS_EXCLUDED(Locks::mutator_lock_); // Can we fast path instance field access? Computes field's offset and volatility. - bool ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, - int& field_offset, bool& is_volatile, bool is_put) + bool ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put, + int* field_offset, bool* is_volatile) LOCKS_EXCLUDED(Locks::mutator_lock_); // Can we fastpath static field access? Computes field's offset, volatility and whether the // field is within the referrer (which can avoid checking class initialization). - bool ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, - int& field_offset, int& ssb_index, - bool& is_referrers_class, bool& is_volatile, bool is_put) + bool ComputeStaticFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put, + int* field_offset, int* ssb_index, + bool* is_referrers_class, bool* is_volatile) LOCKS_EXCLUDED(Locks::mutator_lock_); // Can we fastpath a interface, super class or virtual method call? Computes method's vtable // index. bool ComputeInvokeInfo(const DexCompilationUnit* mUnit, const uint32_t dex_pc, - InvokeType& type, MethodReference& target_method, int& vtable_idx, - uintptr_t& direct_code, uintptr_t& direct_method, bool update_stats) + bool update_stats, bool enable_devirtualization, + InvokeType* type, MethodReference* target_method, int* vtable_idx, + uintptr_t* direct_code, uintptr_t* direct_method) LOCKS_EXCLUDED(Locks::mutator_lock_); bool IsSafeCast(const MethodReference& mr, uint32_t dex_pc); @@ -320,11 +330,13 @@ class CompilerDriver { private: // Compute constant code and method pointers when possible - void GetCodeAndMethodForDirectCall(InvokeType type, InvokeType sharp_type, + void GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType sharp_type, + bool no_guarantee_of_dex_cache_entry, mirror::Class* referrer_class, mirror::ArtMethod* method, - uintptr_t& direct_code, uintptr_t& direct_method, - bool update_stats) + bool update_stats, + MethodReference* target_method, + uintptr_t* direct_code, uintptr_t* direct_method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files, @@ -379,7 +391,8 @@ class CompilerDriver { CompilerBackend compiler_backend_; - InstructionSet instruction_set_; + const InstructionSet instruction_set_; + const InstructionSetFeatures instruction_set_features_; // All class references that require mutable ReaderWriterMutex freezing_constructor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; @@ -458,27 +471,40 @@ class CompilerDriver { class DedupeHashFunc { public: size_t operator()(const std::vector<uint8_t>& array) const { - // Take a random sample of bytes. + // For small arrays compute a hash using every byte. static const size_t kSmallArrayThreshold = 16; - static const size_t kRandomHashCount = 16; - size_t hash = 0; - if (array.size() < kSmallArrayThreshold) { - for (auto c : array) { - hash = hash * 54 + c; + size_t hash = 0x811c9dc5; + if (array.size() <= kSmallArrayThreshold) { + for (uint8_t b : array) { + hash = (hash * 16777619) ^ b; } } else { - for (size_t i = 0; i < kRandomHashCount; ++i) { + // For larger arrays use the 2 bytes at 6 bytes (the location of a push registers + // instruction field for quick generated code on ARM) and then select a number of other + // values at random. + static const size_t kRandomHashCount = 16; + for (size_t i = 0; i < 2; ++i) { + uint8_t b = array[i + 6]; + hash = (hash * 16777619) ^ b; + } + for (size_t i = 2; i < kRandomHashCount; ++i) { size_t r = i * 1103515245 + 12345; - hash = hash * 54 + array[r % array.size()]; + uint8_t b = array[r % array.size()]; + hash = (hash * 16777619) ^ b; } } + hash += hash << 13; + hash ^= hash >> 7; + hash += hash << 3; + hash ^= hash >> 17; + hash += hash << 5; return hash; } }; - DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc> dedupe_code_; - DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc> dedupe_mapping_table_; - DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc> dedupe_vmap_table_; - DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc> dedupe_gc_map_; + DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_code_; + DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_mapping_table_; + DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_vmap_table_; + DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_gc_map_; DISALLOW_COPY_AND_ASSIGN(CompilerDriver); }; diff --git a/compiler/elf_fixup.cc b/compiler/elf_fixup.cc index 359c4936a6..c5712880c1 100644 --- a/compiler/elf_fixup.cc +++ b/compiler/elf_fixup.cc @@ -27,8 +27,9 @@ namespace art { static const bool DEBUG_FIXUP = false; bool ElfFixup::Fixup(File* file, uintptr_t oat_data_begin) { - UniquePtr<ElfFile> elf_file(ElfFile::Open(file, true, false)); - CHECK(elf_file.get() != NULL); + std::string error_msg; + UniquePtr<ElfFile> elf_file(ElfFile::Open(file, true, false, &error_msg)); + CHECK(elf_file.get() != nullptr) << error_msg; // Lookup "oatdata" symbol address. ::llvm::ELF::Elf32_Addr oatdata_address = ElfWriter::GetOatDataAddress(elf_file.get()); diff --git a/compiler/elf_stripper.cc b/compiler/elf_stripper.cc index 7fc662ca1d..7ee8d3cae1 100644 --- a/compiler/elf_stripper.cc +++ b/compiler/elf_stripper.cc @@ -27,9 +27,11 @@ namespace art { -bool ElfStripper::Strip(File* file) { - UniquePtr<ElfFile> elf_file(ElfFile::Open(file, true, false)); - CHECK(elf_file.get() != NULL); +bool ElfStripper::Strip(File* file, std::string* error_msg) { + UniquePtr<ElfFile> elf_file(ElfFile::Open(file, true, false, error_msg)); + if (elf_file.get() == nullptr) { + return false; + } // ELF files produced by MCLinker look roughly like this // @@ -120,7 +122,8 @@ bool ElfStripper::Strip(File* file) { elf_file->GetHeader().e_shoff = shoff; int result = ftruncate(file->Fd(), offset); if (result != 0) { - PLOG(ERROR) << "Failed to truncate while stripping ELF file: " << file->GetPath(); + *error_msg = StringPrintf("Failed to truncate while stripping ELF file: '%s': %s", + file->GetPath().c_str(), strerror(errno)); return false; } return true; diff --git a/compiler/elf_stripper.h b/compiler/elf_stripper.h index 6015b30cb2..f1a1d4605d 100644 --- a/compiler/elf_stripper.h +++ b/compiler/elf_stripper.h @@ -17,6 +17,8 @@ #ifndef ART_COMPILER_ELF_STRIPPER_H_ #define ART_COMPILER_ELF_STRIPPER_H_ +#include <string> + #include "base/macros.h" #include "os.h" @@ -26,7 +28,7 @@ class ElfStripper { public: // Strip an ELF file of unneeded debugging information. // Returns true on success, false on failure. - static bool Strip(File* file); + static bool Strip(File* file, std::string* error_msg); private: DISALLOW_IMPLICIT_CONSTRUCTORS(ElfStripper); diff --git a/compiler/elf_writer.cc b/compiler/elf_writer.cc index d3c13dd791..0bfe4a424c 100644 --- a/compiler/elf_writer.cc +++ b/compiler/elf_writer.cc @@ -47,8 +47,9 @@ llvm::ELF::Elf32_Addr ElfWriter::GetOatDataAddress(ElfFile* elf_file) { void ElfWriter::GetOatElfInformation(File* file, size_t& oat_loaded_size, size_t& oat_data_offset) { - UniquePtr<ElfFile> elf_file(ElfFile::Open(file, false, false)); - CHECK(elf_file.get() != NULL); + std::string error_msg; + UniquePtr<ElfFile> elf_file(ElfFile::Open(file, false, false, &error_msg)); + CHECK(elf_file.get() != NULL) << error_msg; oat_loaded_size = elf_file->GetLoadedSize(); CHECK_NE(0U, oat_loaded_size); diff --git a/compiler/elf_writer_mclinker.cc b/compiler/elf_writer_mclinker.cc index e496ace27a..8e19ef6195 100644 --- a/compiler/elf_writer_mclinker.cc +++ b/compiler/elf_writer_mclinker.cc @@ -153,8 +153,9 @@ void ElfWriterMclinker::Init() { void ElfWriterMclinker::AddOatInput(std::vector<uint8_t>& oat_contents) { // Add an artificial memory input. Based on LinkerTest. - UniquePtr<OatFile> oat_file(OatFile::OpenMemory(oat_contents, elf_file_->GetPath())); - CHECK(oat_file.get() != NULL) << elf_file_->GetPath(); + std::string error_msg; + UniquePtr<OatFile> oat_file(OatFile::OpenMemory(oat_contents, elf_file_->GetPath(), &error_msg)); + CHECK(oat_file.get() != NULL) << elf_file_->GetPath() << ": " << error_msg; const char* oat_data_start = reinterpret_cast<const char*>(&oat_file->GetOatHeader()); const size_t oat_data_length = oat_file->GetOatHeader().GetExecutableOffset(); @@ -344,8 +345,9 @@ bool ElfWriterMclinker::Link() { #if defined(ART_USE_PORTABLE_COMPILER) void ElfWriterMclinker::FixupOatMethodOffsets(const std::vector<const DexFile*>& dex_files) { - UniquePtr<ElfFile> elf_file(ElfFile::Open(elf_file_, true, false)); - CHECK(elf_file.get() != NULL) << elf_file_->GetPath(); + std::string error_msg; + UniquePtr<ElfFile> elf_file(ElfFile::Open(elf_file_, true, false, &error_msg)); + CHECK(elf_file.get() != NULL) << elf_file_->GetPath() << ": " << error_msg; llvm::ELF::Elf32_Addr oatdata_address = GetOatDataAddress(elf_file.get()); DexMethodIterator it(dex_files); diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc index ffe1f72926..eca67a8a6e 100644 --- a/compiler/elf_writer_test.cc +++ b/compiler/elf_writer_test.cc @@ -65,23 +65,26 @@ TEST_F(ElfWriterTest, dlsym) { UniquePtr<File> file(OS::OpenFileForReading(elf_filename.c_str())); ASSERT_TRUE(file.get() != NULL); { - UniquePtr<ElfFile> ef(ElfFile::Open(file.get(), false, false)); - CHECK(ef.get() != NULL); + std::string error_msg; + UniquePtr<ElfFile> ef(ElfFile::Open(file.get(), false, false, &error_msg)); + CHECK(ef.get() != nullptr) << error_msg; EXPECT_ELF_FILE_ADDRESS(ef, dl_oatdata, "oatdata", false); EXPECT_ELF_FILE_ADDRESS(ef, dl_oatexec, "oatexec", false); EXPECT_ELF_FILE_ADDRESS(ef, dl_oatlastword, "oatlastword", false); } { - UniquePtr<ElfFile> ef(ElfFile::Open(file.get(), false, false)); - CHECK(ef.get() != NULL); + std::string error_msg; + UniquePtr<ElfFile> ef(ElfFile::Open(file.get(), false, false, &error_msg)); + CHECK(ef.get() != nullptr) << error_msg; EXPECT_ELF_FILE_ADDRESS(ef, dl_oatdata, "oatdata", true); EXPECT_ELF_FILE_ADDRESS(ef, dl_oatexec, "oatexec", true); EXPECT_ELF_FILE_ADDRESS(ef, dl_oatlastword, "oatlastword", true); } { - UniquePtr<ElfFile> ef(ElfFile::Open(file.get(), false, true)); - CHECK(ef.get() != NULL); - ef->Load(false); + std::string error_msg; + UniquePtr<ElfFile> ef(ElfFile::Open(file.get(), false, true, &error_msg)); + CHECK(ef.get() != nullptr) << error_msg; + CHECK(ef->Load(false, &error_msg)) << error_msg; EXPECT_EQ(dl_oatdata, ef->FindDynamicSymbolAddress("oatdata")); EXPECT_EQ(dl_oatexec, ef->FindDynamicSymbolAddress("oatexec")); EXPECT_EQ(dl_oatlastword, ef->FindDynamicSymbolAddress("oatlastword")); diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 6464a4c78e..a8b7c881f4 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -23,6 +23,8 @@ #include "compiler/oat_writer.h" #include "gc/space/image_space.h" #include "image.h" +#include "lock_word.h" +#include "mirror/object-inl.h" #include "signal_catcher.h" #include "UniquePtr.h" #include "utils.h" @@ -110,8 +112,11 @@ TEST_F(ImageTest, WriteRead) { runtime_.reset(); java_lang_dex_file_ = NULL; - UniquePtr<const DexFile> dex(DexFile::Open(GetLibCoreDexFileName(), GetLibCoreDexFileName())); - ASSERT_TRUE(dex.get() != NULL); + std::string error_msg; + UniquePtr<const DexFile> dex(DexFile::Open(GetLibCoreDexFileName().c_str(), + GetLibCoreDexFileName().c_str(), + &error_msg)); + ASSERT_TRUE(dex.get() != nullptr) << error_msg; // Remove the reservation of the memory for use to load the image. UnreserveImageSpace(); @@ -158,7 +163,7 @@ TEST_F(ImageTest, WriteRead) { // non image classes should be in a space after the image. EXPECT_GT(reinterpret_cast<byte*>(klass), image_end) << descriptor; } - EXPECT_TRUE(Monitor::IsValidLockWord(*klass->GetRawLockWordAddress())); + EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord())); } } diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index f82c6fb40f..75be2c9c43 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -36,6 +36,7 @@ #include "globals.h" #include "image.h" #include "intern_table.h" +#include "lock_word.h" #include "mirror/art_field-inl.h" #include "mirror/art_method-inl.h" #include "mirror/array-inl.h" @@ -82,12 +83,14 @@ bool ImageWriter::Write(const std::string& image_filename, LOG(ERROR) << "Failed to open oat file " << oat_filename << " for " << oat_location; return false; } - oat_file_ = OatFile::OpenWritable(oat_file.get(), oat_location); - if (oat_file_ == NULL) { - LOG(ERROR) << "Failed to open writable oat file " << oat_filename << " for " << oat_location; + std::string error_msg; + oat_file_ = OatFile::OpenWritable(oat_file.get(), oat_location, &error_msg); + if (oat_file_ == nullptr) { + LOG(ERROR) << "Failed to open writable oat file " << oat_filename << " for " << oat_location + << ": " << error_msg; return false; } - class_linker->RegisterOatFile(*oat_file_); + CHECK_EQ(class_linker->RegisterOatFile(oat_file_), oat_file_); interpreter_to_interpreter_bridge_offset_ = oat_file_->GetOatHeader().GetInterpreterToInterpreterBridgeOffset(); @@ -96,11 +99,15 @@ bool ImageWriter::Write(const std::string& image_filename, jni_dlsym_lookup_offset_ = oat_file_->GetOatHeader().GetJniDlsymLookupOffset(); + portable_imt_conflict_trampoline_offset_ = + oat_file_->GetOatHeader().GetPortableImtConflictTrampolineOffset(); portable_resolution_trampoline_offset_ = oat_file_->GetOatHeader().GetPortableResolutionTrampolineOffset(); portable_to_interpreter_bridge_offset_ = oat_file_->GetOatHeader().GetPortableToInterpreterBridgeOffset(); + quick_imt_conflict_trampoline_offset_ = + oat_file_->GetOatHeader().GetQuickImtConflictTrampolineOffset(); quick_resolution_trampoline_offset_ = oat_file_->GetOatHeader().GetQuickResolutionTrampolineOffset(); quick_to_interpreter_bridge_offset_ = @@ -192,9 +199,10 @@ bool ImageWriter::AllocMemory() { int prot = PROT_READ | PROT_WRITE; size_t length = RoundUp(size, kPageSize); - image_.reset(MemMap::MapAnonymous("image writer image", NULL, length, prot)); - if (image_.get() == NULL) { - LOG(ERROR) << "Failed to allocate memory for image file generation"; + std::string error_msg; + image_.reset(MemMap::MapAnonymous("image writer image", NULL, length, prot, &error_msg)); + if (UNLIKELY(image_.get() == nullptr)) { + LOG(ERROR) << "Failed to allocate memory for image file generation: " << error_msg; return false; } return true; @@ -387,6 +395,8 @@ ObjectArray<Object>* ImageWriter::CreateImageRoots() const { ObjectArray<Object>::Alloc(self, object_array_class, ImageHeader::kImageRootsMax)); image_roots->Set(ImageHeader::kResolutionMethod, runtime->GetResolutionMethod()); + image_roots->Set(ImageHeader::kImtConflictMethod, runtime->GetImtConflictMethod()); + image_roots->Set(ImageHeader::kDefaultImt, runtime->GetDefaultImt()); image_roots->Set(ImageHeader::kCalleeSaveMethod, runtime->GetCalleeSaveMethod(Runtime::kSaveAll)); image_roots->Set(ImageHeader::kRefsOnlySaveMethod, @@ -486,7 +496,35 @@ void ImageWriter::CopyAndFixupObjectsCallback(Object* object, void* arg) { DCHECK_LT(offset + n, image_writer->image_->Size()); memcpy(dst, src, n); Object* copy = reinterpret_cast<Object*>(dst); - copy->SetField32(Object::MonitorOffset(), 0, false); // We may have inflated the lock during compilation. + // Write in a hash code of objects which have inflated monitors or a hash code in their monitor + // word. + LockWord lw(copy->GetLockWord()); + switch (lw.GetState()) { + case LockWord::kFatLocked: { + Monitor* monitor = lw.FatLockMonitor(); + CHECK(monitor != nullptr); + CHECK(!monitor->IsLocked()); + if (monitor->HasHashCode()) { + copy->SetLockWord(LockWord::FromHashCode(monitor->GetHashCode())); + } else { + copy->SetLockWord(LockWord()); + } + break; + } + case LockWord::kThinLocked: { + LOG(FATAL) << "Thin locked object " << obj << " found during object copy"; + break; + } + case LockWord::kUnlocked: + break; + case LockWord::kHashCode: + // Do nothing since we can just keep the same hash code. + CHECK_NE(lw.GetHashCode(), 0); + break; + default: + LOG(FATAL) << "Unreachable."; + break; + } image_writer->FixupObject(obj, copy); } @@ -524,6 +562,12 @@ void ImageWriter::FixupMethod(const ArtMethod* orig, ArtMethod* copy) { #else copy->SetEntryPointFromCompiledCode(GetOatAddress(quick_resolution_trampoline_offset_)); #endif + } else if (UNLIKELY(orig == Runtime::Current()->GetImtConflictMethod())) { +#if defined(ART_USE_PORTABLE_COMPILER) + copy->SetEntryPointFromCompiledCode(GetOatAddress(portable_imt_conflict_trampoline_offset_)); +#else + copy->SetEntryPointFromCompiledCode(GetOatAddress(quick_imt_conflict_trampoline_offset_)); +#endif } else { // We assume all methods have code. If they don't currently then we set them to the use the // resolution trampoline. Abstract methods never have code and so we need to make sure their diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 0d85f36a5b..0b408e85cc 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -40,7 +40,8 @@ class ImageWriter { explicit ImageWriter(const CompilerDriver& compiler_driver) : compiler_driver_(compiler_driver), oat_file_(NULL), image_end_(0), image_begin_(NULL), oat_data_begin_(NULL), interpreter_to_interpreter_bridge_offset_(0), - interpreter_to_compiled_code_bridge_offset_(0), portable_resolution_trampoline_offset_(0), + interpreter_to_compiled_code_bridge_offset_(0), portable_imt_conflict_trampoline_offset_(0), + portable_resolution_trampoline_offset_(0), quick_imt_conflict_trampoline_offset_(0), quick_resolution_trampoline_offset_(0) {} ~ImageWriter() {} @@ -204,8 +205,10 @@ class ImageWriter { uint32_t interpreter_to_interpreter_bridge_offset_; uint32_t interpreter_to_compiled_code_bridge_offset_; uint32_t jni_dlsym_lookup_offset_; + uint32_t portable_imt_conflict_trampoline_offset_; uint32_t portable_resolution_trampoline_offset_; uint32_t portable_to_interpreter_bridge_offset_; + uint32_t quick_imt_conflict_trampoline_offset_; uint32_t quick_resolution_trampoline_offset_; uint32_t quick_to_interpreter_bridge_offset_; diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index a653ab42a9..667b913039 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -152,7 +152,7 @@ TEST_F(JniCompilerTest, CompileAndRunIntMethodThroughStub) { std::string reason; ASSERT_TRUE( Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", soa.Decode<mirror::ClassLoader*>(class_loader_), - reason)) << reason; + &reason)) << reason; jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 24); EXPECT_EQ(25, result); @@ -167,7 +167,7 @@ TEST_F(JniCompilerTest, CompileAndRunStaticIntMethodThroughStub) { std::string reason; ASSERT_TRUE( Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", soa.Decode<mirror::ClassLoader*>(class_loader_), - reason)) << reason; + &reason)) << reason; jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 42); EXPECT_EQ(43, result); diff --git a/compiler/jni/portable/jni_compiler.cc b/compiler/jni/portable/jni_compiler.cc index 43408a7d64..0c14346ad8 100644 --- a/compiler/jni/portable/jni_compiler.cc +++ b/compiler/jni/portable/jni_compiler.cc @@ -50,9 +50,9 @@ using ::art::llvm::runtime_support::JniMethodStartSynchronized; using ::art::llvm::runtime_support::RuntimeId; JniCompiler::JniCompiler(LlvmCompilationUnit* cunit, - CompilerDriver& driver, + CompilerDriver* driver, const DexCompilationUnit* dex_compilation_unit) - : cunit_(cunit), driver_(&driver), module_(cunit_->GetModule()), + : cunit_(cunit), driver_(driver), module_(cunit_->GetModule()), context_(cunit_->GetLLVMContext()), irb_(*cunit_->GetIRBuilder()), dex_compilation_unit_(dex_compilation_unit), func_(NULL), elf_func_idx_(0) { diff --git a/compiler/jni/portable/jni_compiler.h b/compiler/jni/portable/jni_compiler.h index d20c63bc1e..ffabfe61c2 100644 --- a/compiler/jni/portable/jni_compiler.h +++ b/compiler/jni/portable/jni_compiler.h @@ -54,7 +54,7 @@ class IRBuilder; class JniCompiler { public: JniCompiler(LlvmCompilationUnit* cunit, - CompilerDriver& driver, + CompilerDriver* driver, const DexCompilationUnit* dex_compilation_unit); CompiledMethod* Compile(); @@ -67,7 +67,7 @@ class JniCompiler { private: LlvmCompilationUnit* cunit_; - CompilerDriver* driver_; + CompilerDriver* const driver_; ::llvm::Module* module_; ::llvm::LLVMContext* context_; diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index 1417fb9e40..1c9aed83c3 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -24,7 +24,6 @@ #include "compiled_method.h" #include "dex_file-inl.h" #include "driver/compiler_driver.h" -#include "disassembler.h" #include "entrypoints/quick/quick_entrypoints.h" #include "jni_internal.h" #include "utils/assembler.h" @@ -82,10 +81,8 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver& compiler, UniquePtr<JniCallingConvention> end_jni_conv( JniCallingConvention::Create(is_static, is_synchronized, jni_end_shorty, instruction_set)); - // Assembler that holds generated instructions UniquePtr<Assembler> jni_asm(Assembler::Create(instruction_set)); - bool should_disassemble = false; // Offsets into data structures // TODO: if cross compiling these offsets are for the host not the target @@ -356,9 +353,9 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver& compiler, // 15. Process pending exceptions from JNI call or monitor exit. __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), 0); - // 16. Remove activation - no need to restore callee save registers because we didn't clobber + // 16. Remove activation - need to restore callee save registers since the GC may have changed // them. - __ RemoveFrame(frame_size, std::vector<ManagedRegister>()); + __ RemoveFrame(frame_size, callee_save_regs); // 17. Finalize code generation __ EmitSlowPaths(); @@ -366,10 +363,6 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver& compiler, std::vector<uint8_t> managed_code(cs); MemoryRegion code(&managed_code[0], managed_code.size()); __ FinalizeInstructions(code); - if (should_disassemble) { - UniquePtr<Disassembler> disassembler(Disassembler::Create(instruction_set)); - disassembler->Dump(LOG(INFO), &managed_code[0], &managed_code[managed_code.size()]); - } return new CompiledMethod(compiler, instruction_set, managed_code, diff --git a/compiler/llvm/compiler_llvm.cc b/compiler/llvm/compiler_llvm.cc index a917cdc6de..d59afd48b7 100644 --- a/compiler/llvm/compiler_llvm.cc +++ b/compiler/llvm/compiler_llvm.cc @@ -26,6 +26,7 @@ #include "ir_builder.h" #include "jni/portable/jni_compiler.h" #include "llvm_compilation_unit.h" +#include "thread-inl.h" #include "utils_llvm.h" #include "verifier/method_verifier.h" @@ -164,7 +165,7 @@ CompileNativeMethod(DexCompilationUnit* dex_compilation_unit) { UniquePtr<LlvmCompilationUnit> cunit(AllocateCompilationUnit()); UniquePtr<JniCompiler> jni_compiler( - new JniCompiler(cunit.get(), *compiler_driver_, dex_compilation_unit)); + new JniCompiler(cunit.get(), compiler_driver_, dex_compilation_unit)); return jni_compiler->Compile(); } diff --git a/compiler/llvm/gbc_expander.cc b/compiler/llvm/gbc_expander.cc index 4f6fa0a2df..b206a25f25 100644 --- a/compiler/llvm/gbc_expander.cc +++ b/compiler/llvm/gbc_expander.cc @@ -846,10 +846,10 @@ llvm::Value* GBCExpanderPass::EmitInvoke(llvm::CallInst& call_inst) { uintptr_t direct_code = 0; uintptr_t direct_method = 0; bool is_fast_path = driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_pc, - invoke_type, target_method, - vtable_idx, - direct_code, direct_method, - true); + true, true, + &invoke_type, &target_method, + &vtable_idx, + &direct_code, &direct_method); // Load the method object llvm::Value* callee_method_object_addr = NULL; @@ -1630,7 +1630,7 @@ llvm::Value* GBCExpanderPass::Expand_HLIGet(llvm::CallInst& call_inst, int field_offset; bool is_volatile; bool is_fast_path = driver_->ComputeInstanceFieldInfo( - field_idx, dex_compilation_unit_, field_offset, is_volatile, false); + field_idx, dex_compilation_unit_, false, &field_offset, &is_volatile); if (!is_fast_path) { llvm::Function* runtime_func; @@ -1692,7 +1692,7 @@ void GBCExpanderPass::Expand_HLIPut(llvm::CallInst& call_inst, int field_offset; bool is_volatile; bool is_fast_path = driver_->ComputeInstanceFieldInfo( - field_idx, dex_compilation_unit_, field_offset, is_volatile, true); + field_idx, dex_compilation_unit_, true, &field_offset, &is_volatile); if (!is_fast_path) { llvm::Function* runtime_func; @@ -1897,8 +1897,8 @@ llvm::Value* GBCExpanderPass::Expand_HLSget(llvm::CallInst& call_inst, bool is_volatile; bool is_fast_path = driver_->ComputeStaticFieldInfo( - field_idx, dex_compilation_unit_, field_offset, ssb_index, - is_referrers_class, is_volatile, false); + field_idx, dex_compilation_unit_, false, + &field_offset, &ssb_index, &is_referrers_class, &is_volatile); llvm::Value* static_field_value; @@ -1981,8 +1981,8 @@ void GBCExpanderPass::Expand_HLSput(llvm::CallInst& call_inst, bool is_volatile; bool is_fast_path = driver_->ComputeStaticFieldInfo( - field_idx, dex_compilation_unit_, field_offset, ssb_index, - is_referrers_class, is_volatile, true); + field_idx, dex_compilation_unit_, true, + &field_offset, &ssb_index, &is_referrers_class, &is_volatile); if (!is_fast_path) { llvm::Function* runtime_func; diff --git a/compiler/llvm/llvm_compilation_unit.cc b/compiler/llvm/llvm_compilation_unit.cc index 139100bee9..feb495e35f 100644 --- a/compiler/llvm/llvm_compilation_unit.cc +++ b/compiler/llvm/llvm_compilation_unit.cc @@ -82,7 +82,6 @@ #include "ir_builder.h" #include "os.h" #include "runtime_support_builder_arm.h" -#include "runtime_support_builder_thumb2.h" #include "runtime_support_builder_x86.h" #include "utils_llvm.h" @@ -118,12 +117,10 @@ LlvmCompilationUnit::LlvmCompilationUnit(const CompilerLLVM* compiler_llvm, size default: runtime_support_.reset(new RuntimeSupportBuilder(*context_, *module_, *irb_)); break; + case kThumb2: case kArm: runtime_support_.reset(new RuntimeSupportBuilderARM(*context_, *module_, *irb_)); break; - case kThumb2: - runtime_support_.reset(new RuntimeSupportBuilderThumb2(*context_, *module_, *irb_)); - break; case kX86: runtime_support_.reset(new RuntimeSupportBuilderX86(*context_, *module_, *irb_)); break; @@ -214,6 +211,7 @@ bool LlvmCompilationUnit::MaterializeToRawOStream(::llvm::raw_ostream& out_strea ::llvm::TargetOptions target_options; target_options.FloatABIType = ::llvm::FloatABI::Soft; target_options.NoFramePointerElim = true; + target_options.NoFramePointerElimNonLeaf = true; target_options.UseSoftFloat = false; target_options.EnableFastISel = false; @@ -257,7 +255,7 @@ bool LlvmCompilationUnit::MaterializeToRawOStream(::llvm::raw_ostream& out_strea ::llvm::OwningPtr< ::llvm::tool_output_file> out_file( new ::llvm::tool_output_file(bitcode_filename_.c_str(), errmsg, - ::llvm::sys::fs::F_Binary)); + ::llvm::raw_fd_ostream::F_Binary)); if (!errmsg.empty()) { @@ -277,6 +275,7 @@ bool LlvmCompilationUnit::MaterializeToRawOStream(::llvm::raw_ostream& out_strea // pm_builder.Inliner = ::llvm::createAlwaysInlinerPass(); // pm_builder.Inliner = ::llvm::createPartialInliningPass(); pm_builder.OptLevel = 3; + pm_builder.DisableSimplifyLibCalls = 1; pm_builder.DisableUnitAtATime = 1; pm_builder.populateFunctionPassManager(fpm); pm_builder.populateModulePassManager(pm); diff --git a/compiler/llvm/runtime_support_builder.cc b/compiler/llvm/runtime_support_builder.cc index 24e283d309..c825fbf190 100644 --- a/compiler/llvm/runtime_support_builder.cc +++ b/compiler/llvm/runtime_support_builder.cc @@ -164,89 +164,13 @@ void RuntimeSupportBuilder::EmitTestSuspend() { /* Monitor */ void RuntimeSupportBuilder::EmitLockObject(::llvm::Value* object) { - Value* monitor = - irb_.LoadFromObjectOffset(object, - mirror::Object::MonitorOffset().Int32Value(), - irb_.getJIntTy(), - kTBAARuntimeInfo); - - Value* real_monitor = - irb_.CreateAnd(monitor, ~(LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT)); - - // Is thin lock, unheld and not recursively acquired. - Value* unheld = irb_.CreateICmpEQ(real_monitor, irb_.getInt32(0)); - - Function* parent_func = irb_.GetInsertBlock()->getParent(); - BasicBlock* bb_fast = BasicBlock::Create(context_, "lock_fast", parent_func); - BasicBlock* bb_slow = BasicBlock::Create(context_, "lock_slow", parent_func); - BasicBlock* bb_cont = BasicBlock::Create(context_, "lock_cont", parent_func); - irb_.CreateCondBr(unheld, bb_fast, bb_slow, kLikely); - - irb_.SetInsertPoint(bb_fast); - - // Calculate new monitor: new = old | (lock_id << LW_LOCK_OWNER_SHIFT) - Value* lock_id = - EmitLoadFromThreadOffset(Thread::ThinLockIdOffset().Int32Value(), - irb_.getInt32Ty(), kTBAARuntimeInfo); - - Value* owner = irb_.CreateShl(lock_id, LW_LOCK_OWNER_SHIFT); - Value* new_monitor = irb_.CreateOr(monitor, owner); - - // Atomically update monitor. - Value* old_monitor = - irb_.CompareExchangeObjectOffset(object, - mirror::Object::MonitorOffset().Int32Value(), - monitor, new_monitor, kTBAARuntimeInfo); - - Value* retry_slow_path = irb_.CreateICmpEQ(old_monitor, monitor); - irb_.CreateCondBr(retry_slow_path, bb_cont, bb_slow, kLikely); - - irb_.SetInsertPoint(bb_slow); Function* slow_func = GetRuntimeSupportFunction(runtime_support::LockObject); irb_.CreateCall2(slow_func, object, EmitGetCurrentThread()); - irb_.CreateBr(bb_cont); - - irb_.SetInsertPoint(bb_cont); } void RuntimeSupportBuilder::EmitUnlockObject(::llvm::Value* object) { - Value* lock_id = - EmitLoadFromThreadOffset(Thread::ThinLockIdOffset().Int32Value(), - irb_.getJIntTy(), - kTBAARuntimeInfo); - Value* monitor = - irb_.LoadFromObjectOffset(object, - mirror::Object::MonitorOffset().Int32Value(), - irb_.getJIntTy(), - kTBAARuntimeInfo); - - Value* my_monitor = irb_.CreateShl(lock_id, LW_LOCK_OWNER_SHIFT); - Value* hash_state = irb_.CreateAnd(monitor, (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT)); - Value* real_monitor = irb_.CreateAnd(monitor, ~(LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT)); - - // Is thin lock, held by us and not recursively acquired - Value* is_fast_path = irb_.CreateICmpEQ(real_monitor, my_monitor); - - Function* parent_func = irb_.GetInsertBlock()->getParent(); - BasicBlock* bb_fast = BasicBlock::Create(context_, "unlock_fast", parent_func); - BasicBlock* bb_slow = BasicBlock::Create(context_, "unlock_slow", parent_func); - BasicBlock* bb_cont = BasicBlock::Create(context_, "unlock_cont", parent_func); - irb_.CreateCondBr(is_fast_path, bb_fast, bb_slow, kLikely); - - irb_.SetInsertPoint(bb_fast); - // Set all bits to zero (except hash state) - irb_.StoreToObjectOffset(object, - mirror::Object::MonitorOffset().Int32Value(), - hash_state, - kTBAARuntimeInfo); - irb_.CreateBr(bb_cont); - - irb_.SetInsertPoint(bb_slow); Function* slow_func = GetRuntimeSupportFunction(runtime_support::UnlockObject); irb_.CreateCall2(slow_func, object, EmitGetCurrentThread()); - irb_.CreateBr(bb_cont); - - irb_.SetInsertPoint(bb_cont); } diff --git a/compiler/llvm/runtime_support_builder.h b/compiler/llvm/runtime_support_builder.h index e92ac0a908..898611af75 100644 --- a/compiler/llvm/runtime_support_builder.h +++ b/compiler/llvm/runtime_support_builder.h @@ -64,8 +64,8 @@ class RuntimeSupportBuilder { virtual void EmitTestSuspend(); /* Monitor */ - virtual void EmitLockObject(::llvm::Value* object); - virtual void EmitUnlockObject(::llvm::Value* object); + void EmitLockObject(::llvm::Value* object); + void EmitUnlockObject(::llvm::Value* object); /* MarkGCCard */ virtual void EmitMarkGCCard(::llvm::Value* value, ::llvm::Value* target_addr); diff --git a/compiler/llvm/runtime_support_builder_arm.cc b/compiler/llvm/runtime_support_builder_arm.cc index 569d825272..cad46247fd 100644 --- a/compiler/llvm/runtime_support_builder_arm.cc +++ b/compiler/llvm/runtime_support_builder_arm.cc @@ -116,24 +116,5 @@ Value* RuntimeSupportBuilderARM::EmitSetCurrentThread(Value* thread) { return old_thread_register; } - -/* Monitor */ - -void RuntimeSupportBuilderARM::EmitLockObject(Value* object) { - RuntimeSupportBuilder::EmitLockObject(object); - FunctionType* func_ty = FunctionType::get(/*Result=*/Type::getVoidTy(context_), - /*isVarArg=*/false); - InlineAsm* func = InlineAsm::get(func_ty, "dmb sy", "", true); - irb_.CreateCall(func); -} - -void RuntimeSupportBuilderARM::EmitUnlockObject(Value* object) { - RuntimeSupportBuilder::EmitUnlockObject(object); - FunctionType* func_ty = FunctionType::get(/*Result=*/Type::getVoidTy(context_), - /*isVarArg=*/false); - InlineAsm* func = InlineAsm::get(func_ty, "dmb sy", "", true); - irb_.CreateCall(func); -} - } // namespace llvm } // namespace art diff --git a/compiler/llvm/runtime_support_builder_arm.h b/compiler/llvm/runtime_support_builder_arm.h index 5a353d7f30..0d01509be0 100644 --- a/compiler/llvm/runtime_support_builder_arm.h +++ b/compiler/llvm/runtime_support_builder_arm.h @@ -34,10 +34,6 @@ class RuntimeSupportBuilderARM : public RuntimeSupportBuilder { virtual void EmitStoreToThreadOffset(int64_t offset, ::llvm::Value* value, TBAASpecialType s_ty); virtual ::llvm::Value* EmitSetCurrentThread(::llvm::Value* thread); - - /* Monitor */ - virtual void EmitLockObject(::llvm::Value* object); - virtual void EmitUnlockObject(::llvm::Value* object); }; } // namespace llvm diff --git a/compiler/llvm/runtime_support_builder_thumb2.cc b/compiler/llvm/runtime_support_builder_thumb2.cc deleted file mode 100644 index eff29c8b04..0000000000 --- a/compiler/llvm/runtime_support_builder_thumb2.cc +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "runtime_support_builder_thumb2.h" - -#include "ir_builder.h" -#include "mirror/object.h" -#include "monitor.h" -#include "thread.h" -#include "utils_llvm.h" - -#include <llvm/IR/DerivedTypes.h> -#include <llvm/IR/Function.h> -#include <llvm/IR/InlineAsm.h> -#include <llvm/IR/Module.h> -#include <llvm/IR/Type.h> - -#include <inttypes.h> -#include <vector> - -using ::llvm::BasicBlock; -using ::llvm::Function; -using ::llvm::FunctionType; -using ::llvm::InlineAsm; -using ::llvm::Type; -using ::llvm::Value; - -namespace art { -namespace llvm { - - -void RuntimeSupportBuilderThumb2::EmitLockObject(Value* object) { - FunctionType* func_ty = FunctionType::get(/*Result=*/irb_.getInt32Ty(), - /*Params=*/irb_.getJObjectTy(), - /*isVarArg=*/false); - // $0: result - // $1: object - // $2: temp - // $3: temp - std::string asms; - StringAppendF(&asms, "add $3, $1, #%" PRId32 "\n", mirror::Object::MonitorOffset().Int32Value()); - StringAppendF(&asms, "ldr $2, [r9, #%" PRId32 "]\n", Thread::ThinLockIdOffset().Int32Value()); - StringAppendF(&asms, "ldrex $0, [$3]\n"); - StringAppendF(&asms, "lsl $2, $2, %d\n", LW_LOCK_OWNER_SHIFT); - StringAppendF(&asms, "bfi $2, $0, #0, #%d\n", LW_LOCK_OWNER_SHIFT - 1); - StringAppendF(&asms, "bfc $0, #%d, #%d\n", LW_HASH_STATE_SHIFT, LW_LOCK_OWNER_SHIFT - 1); - StringAppendF(&asms, "cmp $0, #0\n"); - StringAppendF(&asms, "it eq\n"); - StringAppendF(&asms, "strexeq $0, $2, [$3]\n"); - - InlineAsm* func = InlineAsm::get(func_ty, asms, "=&l,l,~l,~l", true); - - Value* retry_slow_path = irb_.CreateCall(func, object); - retry_slow_path = irb_.CreateICmpNE(retry_slow_path, irb_.getJInt(0)); - - Function* parent_func = irb_.GetInsertBlock()->getParent(); - BasicBlock* basic_block_lock = BasicBlock::Create(context_, "lock", parent_func); - BasicBlock* basic_block_cont = BasicBlock::Create(context_, "lock_cont", parent_func); - irb_.CreateCondBr(retry_slow_path, basic_block_lock, basic_block_cont, kUnlikely); - - irb_.SetInsertPoint(basic_block_lock); - Function* slow_func = GetRuntimeSupportFunction(runtime_support::LockObject); - irb_.CreateCall2(slow_func, object, EmitGetCurrentThread()); - irb_.CreateBr(basic_block_cont); - - irb_.SetInsertPoint(basic_block_cont); - { // Memory barrier - FunctionType* asm_ty = FunctionType::get(/*Result=*/Type::getVoidTy(context_), - /*isVarArg=*/false); - InlineAsm* func = InlineAsm::get(asm_ty, "dmb sy", "", true); - irb_.CreateCall(func); - } -} - - -} // namespace llvm -} // namespace art diff --git a/compiler/llvm/runtime_support_builder_thumb2.h b/compiler/llvm/runtime_support_builder_thumb2.h deleted file mode 100644 index c47a2744ef..0000000000 --- a/compiler/llvm/runtime_support_builder_thumb2.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LLVM_RUNTIME_SUPPORT_BUILDER_THUMB2_H_ -#define ART_COMPILER_LLVM_RUNTIME_SUPPORT_BUILDER_THUMB2_H_ - -#include "runtime_support_builder_arm.h" - -namespace art { -namespace llvm { - -class RuntimeSupportBuilderThumb2 : public RuntimeSupportBuilderARM { - public: - RuntimeSupportBuilderThumb2(::llvm::LLVMContext& context, ::llvm::Module& module, IRBuilder& irb) - : RuntimeSupportBuilderARM(context, module, irb) {} - - /* Monitor */ - virtual void EmitLockObject(::llvm::Value* object); -}; - -} // namespace llvm -} // namespace art - -#endif // ART_COMPILER_LLVM_RUNTIME_SUPPORT_BUILDER_THUMB2_H_ diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index bfba9c0c0c..6213b45c41 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -28,6 +28,8 @@ namespace art { class OatTest : public CommonTest { protected: + static const bool kCompile = false; // DISABLED_ due to the time to compile libcore + void CheckMethod(mirror::ArtMethod* method, const OatFile::OatMethod& oat_method, const DexFile* dex_file) @@ -40,7 +42,7 @@ class OatTest : public CommonTest { EXPECT_TRUE(oat_method.GetCode() == NULL) << PrettyMethod(method) << " " << oat_method.GetCode(); #if !defined(ART_USE_PORTABLE_COMPILER) - EXPECT_EQ(oat_method.GetFrameSizeInBytes(), static_cast<uint32_t>(kStackAlignment)); + EXPECT_EQ(oat_method.GetFrameSizeInBytes(), kCompile ? kStackAlignment : 0); EXPECT_EQ(oat_method.GetCoreSpillMask(), 0U); EXPECT_EQ(oat_method.GetFpSpillMask(), 0U); #endif @@ -65,7 +67,6 @@ class OatTest : public CommonTest { }; TEST_F(OatTest, WriteRead) { - const bool compile = false; // DISABLED_ due to the time to compile libcore ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); // TODO: make selectable @@ -75,9 +76,12 @@ TEST_F(OatTest, WriteRead) { CompilerBackend compiler_backend = kQuick; #endif InstructionSet insn_set = kIsTargetBuild ? kThumb2 : kX86; - compiler_driver_.reset(new CompilerDriver(compiler_backend, insn_set, false, NULL, 2, true)); + + InstructionSetFeatures insn_features; + compiler_driver_.reset(new CompilerDriver(compiler_backend, insn_set, + insn_features, false, NULL, 2, true)); jobject class_loader = NULL; - if (compile) { + if (kCompile) { base::TimingLogger timings("OatTest::WriteRead", false, false); compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), timings); } @@ -96,37 +100,42 @@ TEST_F(OatTest, WriteRead) { tmp.GetFile()); ASSERT_TRUE(success); - if (compile) { // OatWriter strips the code, regenerate to compare + if (kCompile) { // OatWriter strips the code, regenerate to compare base::TimingLogger timings("CommonTest::WriteRead", false, false); compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), timings); } - UniquePtr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(), tmp.GetFilename(), NULL, false)); - ASSERT_TRUE(oat_file.get() != NULL); + std::string error_msg; + UniquePtr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(), tmp.GetFilename(), NULL, false, + &error_msg)); + ASSERT_TRUE(oat_file.get() != nullptr) << error_msg; const OatHeader& oat_header = oat_file->GetOatHeader(); ASSERT_TRUE(oat_header.IsValid()); - ASSERT_EQ(2U, oat_header.GetDexFileCount()); // core and conscrypt + ASSERT_EQ(1U, oat_header.GetDexFileCount()); // core ASSERT_EQ(42U, oat_header.GetImageFileLocationOatChecksum()); ASSERT_EQ(4096U, oat_header.GetImageFileLocationOatDataBegin()); ASSERT_EQ("lue.art", oat_header.GetImageFileLocation()); const DexFile* dex_file = java_lang_dex_file_; uint32_t dex_file_checksum = dex_file->GetLocationChecksum(); - const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_file->GetLocation(), + const OatFile::OatDexFile* oat_dex_file = oat_file->GetOatDexFile(dex_file->GetLocation().c_str(), &dex_file_checksum); + ASSERT_TRUE(oat_dex_file != nullptr); CHECK_EQ(dex_file->GetLocationChecksum(), oat_dex_file->GetDexFileLocationChecksum()); for (size_t i = 0; i < dex_file->NumClassDefs(); i++) { const DexFile::ClassDef& class_def = dex_file->GetClassDef(i); const byte* class_data = dex_file->GetClassData(class_def); - size_t num_virtual_methods =0; + size_t num_virtual_methods = 0; if (class_data != NULL) { ClassDataItemIterator it(*dex_file, class_data); num_virtual_methods = it.NumVirtualMethods(); } const char* descriptor = dex_file->GetClassDescriptor(class_def); + mirror::Class* klass = class_linker->FindClass(descriptor, NULL); UniquePtr<const OatFile::OatClass> oat_class(oat_dex_file->GetOatClass(i)); - - mirror::Class* klass = class_linker->FindClass(descriptor, NULL); + CHECK_EQ(mirror::Class::Status::kStatusNotReady, oat_class->GetStatus()) << descriptor; + CHECK_EQ(kCompile ? OatClassType::kOatClassAllCompiled : OatClassType::kOatClassNoneCompiled, + oat_class->GetType()) << descriptor; size_t method_index = 0; for (size_t i = 0; i < klass->NumDirectMethods(); i++, method_index++) { @@ -143,17 +152,19 @@ TEST_F(OatTest, WriteRead) { TEST_F(OatTest, OatHeaderSizeCheck) { // If this test is failing and you have to update these constants, // it is time to update OatHeader::kOatVersion - EXPECT_EQ(64U, sizeof(OatHeader)); + EXPECT_EQ(76U, sizeof(OatHeader)); EXPECT_EQ(28U, sizeof(OatMethodOffsets)); } TEST_F(OatTest, OatHeaderIsValid) { InstructionSet instruction_set = kX86; + InstructionSetFeatures instruction_set_features; std::vector<const DexFile*> dex_files; uint32_t image_file_location_oat_checksum = 0; uint32_t image_file_location_oat_begin = 0; const std::string image_file_location; OatHeader oat_header(instruction_set, + instruction_set_features, &dex_files, image_file_location_oat_checksum, image_file_location_oat_begin, diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index f9d6e4192d..f3bb11272e 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -18,6 +18,7 @@ #include <zlib.h> +#include "base/bit_vector.h" #include "base/stl_util.h" #include "base/unix_file/fd_file.h" #include "class_linker.h" @@ -54,8 +55,10 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, size_interpreter_to_interpreter_bridge_(0), size_interpreter_to_compiled_code_bridge_(0), size_jni_dlsym_lookup_(0), + size_portable_imt_conflict_trampoline_(0), size_portable_resolution_trampoline_(0), size_portable_to_interpreter_bridge_(0), + size_quick_imt_conflict_trampoline_(0), size_quick_resolution_trampoline_(0), size_quick_to_interpreter_bridge_(0), size_trampoline_alignment_(0), @@ -70,7 +73,9 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, size_oat_dex_file_location_checksum_(0), size_oat_dex_file_offset_(0), size_oat_dex_file_methods_offsets_(0), + size_oat_class_type_(0), size_oat_class_status_(0), + size_oat_class_method_bitmaps_(0), size_oat_class_method_offsets_(0) { size_t offset = InitOatHeader(); offset = InitOatDexFiles(offset); @@ -93,6 +98,7 @@ OatWriter::~OatWriter() { size_t OatWriter::InitOatHeader() { // create the OatHeader oat_header_ = new OatHeader(compiler_driver_->GetInstructionSet(), + compiler_driver_->GetInstructionSetFeatures(), dex_files_, image_file_location_oat_checksum_, image_file_location_oat_begin_, @@ -142,12 +148,48 @@ size_t OatWriter::InitOatClasses(size_t offset) { oat_dex_files_[i]->methods_offsets_[class_def_index] = offset; const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index); const byte* class_data = dex_file->GetClassData(class_def); - uint32_t num_methods = 0; + uint32_t num_non_null_compiled_methods = 0; + UniquePtr<std::vector<CompiledMethod*> > compiled_methods(new std::vector<CompiledMethod*>()); if (class_data != NULL) { // ie not an empty class, such as a marker interface ClassDataItemIterator it(*dex_file, class_data); size_t num_direct_methods = it.NumDirectMethods(); size_t num_virtual_methods = it.NumVirtualMethods(); - num_methods = num_direct_methods + num_virtual_methods; + size_t num_methods = num_direct_methods + num_virtual_methods; + + // Fill in the compiled_methods_ array for methods that have a + // CompiledMethod. We track the number of non-null entries in + // num_non_null_compiled_methods since we only want to allocate + // OatMethodOffsets for the compiled methods. + compiled_methods->reserve(num_methods); + while (it.HasNextStaticField()) { + it.Next(); + } + while (it.HasNextInstanceField()) { + it.Next(); + } + size_t class_def_method_index = 0; + while (it.HasNextDirectMethod()) { + uint32_t method_idx = it.GetMemberIndex(); + CompiledMethod* compiled_method = + compiler_driver_->GetCompiledMethod(MethodReference(dex_file, method_idx)); + compiled_methods->push_back(compiled_method); + if (compiled_method != NULL) { + num_non_null_compiled_methods++; + } + class_def_method_index++; + it.Next(); + } + while (it.HasNextVirtualMethod()) { + uint32_t method_idx = it.GetMemberIndex(); + CompiledMethod* compiled_method = + compiler_driver_->GetCompiledMethod(MethodReference(dex_file, method_idx)); + compiled_methods->push_back(compiled_method); + if (compiled_method != NULL) { + num_non_null_compiled_methods++; + } + class_def_method_index++; + it.Next(); + } } ClassReference class_ref(dex_file, class_def_index); @@ -161,7 +203,8 @@ size_t OatWriter::InitOatClasses(size_t offset) { status = mirror::Class::kStatusNotReady; } - OatClass* oat_class = new OatClass(offset, status, num_methods); + OatClass* oat_class = new OatClass(offset, compiled_methods.release(), + num_non_null_compiled_methods, status); oat_classes_.push_back(oat_class); offset += oat_class->SizeOf(); } @@ -189,8 +232,10 @@ size_t OatWriter::InitOatCode(size_t offset) { DO_TRAMPOLINE(interpreter_to_interpreter_bridge_, InterpreterToInterpreterBridge); DO_TRAMPOLINE(interpreter_to_compiled_code_bridge_, InterpreterToCompiledCodeBridge); DO_TRAMPOLINE(jni_dlsym_lookup_, JniDlsymLookup); + DO_TRAMPOLINE(portable_imt_conflict_trampoline_, PortableImtConflictTrampoline); DO_TRAMPOLINE(portable_resolution_trampoline_, PortableResolutionTrampoline); DO_TRAMPOLINE(portable_to_interpreter_bridge_, PortableToInterpreterBridge); + DO_TRAMPOLINE(quick_imt_conflict_trampoline_, QuickImtConflictTrampoline); DO_TRAMPOLINE(quick_resolution_trampoline_, QuickResolutionTrampoline); DO_TRAMPOLINE(quick_to_interpreter_bridge_, QuickToInterpreterBridge); @@ -199,8 +244,10 @@ size_t OatWriter::InitOatCode(size_t offset) { oat_header_->SetInterpreterToInterpreterBridgeOffset(0); oat_header_->SetInterpreterToCompiledCodeBridgeOffset(0); oat_header_->SetJniDlsymLookupOffset(0); + oat_header_->SetPortableImtConflictTrampolineOffset(0); oat_header_->SetPortableResolutionTrampolineOffset(0); oat_header_->SetPortableToInterpreterBridgeOffset(0); + oat_header_->SetQuickImtConflictTrampolineOffset(0); oat_header_->SetQuickResolutionTrampolineOffset(0); oat_header_->SetQuickToInterpreterBridgeOffset(0); } @@ -212,20 +259,20 @@ size_t OatWriter::InitOatCodeDexFiles(size_t offset) { for (size_t i = 0; i != dex_files_->size(); ++i) { const DexFile* dex_file = (*dex_files_)[i]; CHECK(dex_file != NULL); - offset = InitOatCodeDexFile(offset, oat_class_index, *dex_file); + offset = InitOatCodeDexFile(offset, &oat_class_index, *dex_file); } return offset; } size_t OatWriter::InitOatCodeDexFile(size_t offset, - size_t& oat_class_index, + size_t* oat_class_index, const DexFile& dex_file) { for (size_t class_def_index = 0; class_def_index < dex_file.NumClassDefs(); - class_def_index++, oat_class_index++) { + class_def_index++, (*oat_class_index)++) { const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - offset = InitOatCodeClassDef(offset, oat_class_index, class_def_index, dex_file, class_def); - oat_classes_[oat_class_index]->UpdateChecksum(*oat_header_); + offset = InitOatCodeClassDef(offset, *oat_class_index, class_def_index, dex_file, class_def); + oat_classes_[*oat_class_index]->UpdateChecksum(*oat_header_); } return offset; } @@ -240,7 +287,7 @@ size_t OatWriter::InitOatCodeClassDef(size_t offset, return offset; } ClassDataItemIterator it(dex_file, class_data); - CHECK_EQ(oat_classes_[oat_class_index]->method_offsets_.size(), + CHECK_LE(oat_classes_[oat_class_index]->method_offsets_.size(), it.NumDirectMethods() + it.NumVirtualMethods()); // Skip fields while (it.HasNextStaticField()) { @@ -251,32 +298,35 @@ size_t OatWriter::InitOatCodeClassDef(size_t offset, } // Process methods size_t class_def_method_index = 0; + size_t method_offsets_index = 0; while (it.HasNextDirectMethod()) { bool is_native = (it.GetMemberAccessFlags() & kAccNative) != 0; offset = InitOatCodeMethod(offset, oat_class_index, class_def_index, class_def_method_index, - is_native, it.GetMethodInvokeType(class_def), it.GetMemberIndex(), - &dex_file); + &method_offsets_index, is_native, + it.GetMethodInvokeType(class_def), it.GetMemberIndex(), dex_file); class_def_method_index++; it.Next(); } while (it.HasNextVirtualMethod()) { bool is_native = (it.GetMemberAccessFlags() & kAccNative) != 0; offset = InitOatCodeMethod(offset, oat_class_index, class_def_index, class_def_method_index, - is_native, it.GetMethodInvokeType(class_def), it.GetMemberIndex(), - &dex_file); + &method_offsets_index, is_native, + it.GetMethodInvokeType(class_def), it.GetMemberIndex(), dex_file); class_def_method_index++; it.Next(); } DCHECK(!it.HasNext()); + CHECK_LE(method_offsets_index, class_def_method_index); return offset; } size_t OatWriter::InitOatCodeMethod(size_t offset, size_t oat_class_index, size_t __attribute__((unused)) class_def_index, size_t class_def_method_index, + size_t* method_offsets_index, bool __attribute__((unused)) is_native, InvokeType invoke_type, - uint32_t method_idx, const DexFile* dex_file) { + uint32_t method_idx, const DexFile& dex_file) { // derived from CompiledMethod if available uint32_t code_offset = 0; uint32_t frame_size_in_bytes = kStackAlignment; @@ -292,8 +342,7 @@ size_t OatWriter::InitOatCodeMethod(size_t offset, size_t oat_class_index, oat_class->GetOatMethodOffsetsOffsetFromOatHeader(class_def_method_index); #endif - CompiledMethod* compiled_method = - compiler_driver_->GetCompiledMethod(MethodReference(dex_file, method_idx)); + CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index); if (compiled_method != NULL) { #if defined(ART_USE_PORTABLE_COMPILER) compiled_method->AddOatdataOffsetToCompliledCodeOffset( @@ -358,7 +407,7 @@ size_t OatWriter::InitOatCodeMethod(size_t offset, size_t oat_class_index, #if !defined(NDEBUG) // We expect GC maps except when the class hasn't been verified or the method is native - ClassReference class_ref(dex_file, class_def_index); + ClassReference class_ref(&dex_file, class_def_index); CompiledClass* compiled_class = compiler_driver_->GetCompiledClass(class_ref); mirror::Class::Status status; if (compiled_class != NULL) { @@ -371,7 +420,7 @@ size_t OatWriter::InitOatCodeMethod(size_t offset, size_t oat_class_index, CHECK(gc_map_size != 0 || is_native || status < mirror::Class::kStatusVerified) << &gc_map << " " << gc_map_size << " " << (is_native ? "true" : "false") << " " << (status < mirror::Class::kStatusVerified) << " " << status << " " - << PrettyMethod(method_idx, *dex_file); + << PrettyMethod(method_idx, dex_file); #endif // Deduplicate GC maps @@ -384,24 +433,26 @@ size_t OatWriter::InitOatCodeMethod(size_t offset, size_t oat_class_index, offset += gc_map_size; oat_header_->UpdateChecksum(&gc_map[0], gc_map_size); } + + oat_class->method_offsets_[*method_offsets_index] = + OatMethodOffsets(code_offset, + frame_size_in_bytes, + core_spill_mask, + fp_spill_mask, + mapping_table_offset, + vmap_table_offset, + gc_map_offset); + (*method_offsets_index)++; } - oat_class->method_offsets_[class_def_method_index] = - OatMethodOffsets(code_offset, - frame_size_in_bytes, - core_spill_mask, - fp_spill_mask, - mapping_table_offset, - vmap_table_offset, - gc_map_offset); if (compiler_driver_->IsImage()) { ClassLinker* linker = Runtime::Current()->GetClassLinker(); - mirror::DexCache* dex_cache = linker->FindDexCache(*dex_file); + mirror::DexCache* dex_cache = linker->FindDexCache(dex_file); // Unchecked as we hold mutator_lock_ on entry. ScopedObjectAccessUnchecked soa(Thread::Current()); - mirror::ArtMethod* method = linker->ResolveMethod(*dex_file, method_idx, dex_cache, - NULL, NULL, invoke_type); + mirror::ArtMethod* method = linker->ResolveMethod(dex_file, method_idx, dex_cache, + NULL, NULL, invoke_type); CHECK(method != NULL); method->SetFrameSizeInBytes(frame_size_in_bytes); method->SetCoreSpillMask(core_spill_mask); @@ -475,8 +526,10 @@ bool OatWriter::Write(OutputStream& out) { DO_STAT(size_interpreter_to_interpreter_bridge_); DO_STAT(size_interpreter_to_compiled_code_bridge_); DO_STAT(size_jni_dlsym_lookup_); + DO_STAT(size_portable_imt_conflict_trampoline_); DO_STAT(size_portable_resolution_trampoline_); DO_STAT(size_portable_to_interpreter_bridge_); + DO_STAT(size_quick_imt_conflict_trampoline_); DO_STAT(size_quick_resolution_trampoline_); DO_STAT(size_quick_to_interpreter_bridge_); DO_STAT(size_trampoline_alignment_); @@ -491,7 +544,9 @@ bool OatWriter::Write(OutputStream& out) { DO_STAT(size_oat_dex_file_location_checksum_); DO_STAT(size_oat_dex_file_offset_); DO_STAT(size_oat_dex_file_methods_offsets_); + DO_STAT(size_oat_class_type_); DO_STAT(size_oat_class_status_); + DO_STAT(size_oat_class_method_bitmaps_); DO_STAT(size_oat_class_method_offsets_); #undef DO_STAT @@ -570,8 +625,10 @@ size_t OatWriter::WriteCode(OutputStream& out, const size_t file_offset) { DO_TRAMPOLINE(interpreter_to_interpreter_bridge_); DO_TRAMPOLINE(interpreter_to_compiled_code_bridge_); DO_TRAMPOLINE(jni_dlsym_lookup_); + DO_TRAMPOLINE(portable_imt_conflict_trampoline_); DO_TRAMPOLINE(portable_resolution_trampoline_); DO_TRAMPOLINE(portable_to_interpreter_bridge_); + DO_TRAMPOLINE(quick_imt_conflict_trampoline_); DO_TRAMPOLINE(quick_resolution_trampoline_); DO_TRAMPOLINE(quick_to_interpreter_bridge_); #undef DO_TRAMPOLINE @@ -586,7 +643,7 @@ size_t OatWriter::WriteCodeDexFiles(OutputStream& out, for (size_t i = 0; i != oat_dex_files_.size(); ++i) { const DexFile* dex_file = (*dex_files_)[i]; CHECK(dex_file != NULL); - relative_offset = WriteCodeDexFile(out, file_offset, relative_offset, oat_class_index, + relative_offset = WriteCodeDexFile(out, file_offset, relative_offset, &oat_class_index, *dex_file); if (relative_offset == 0) { return 0; @@ -596,12 +653,12 @@ size_t OatWriter::WriteCodeDexFiles(OutputStream& out, } size_t OatWriter::WriteCodeDexFile(OutputStream& out, const size_t file_offset, - size_t relative_offset, size_t& oat_class_index, + size_t relative_offset, size_t* oat_class_index, const DexFile& dex_file) { for (size_t class_def_index = 0; class_def_index < dex_file.NumClassDefs(); - class_def_index++, oat_class_index++) { + class_def_index++, (*oat_class_index)++) { const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - relative_offset = WriteCodeClassDef(out, file_offset, relative_offset, oat_class_index, + relative_offset = WriteCodeClassDef(out, file_offset, relative_offset, *oat_class_index, dex_file, class_def); if (relative_offset == 0) { return 0; @@ -637,11 +694,12 @@ size_t OatWriter::WriteCodeClassDef(OutputStream& out, } // Process methods size_t class_def_method_index = 0; + size_t method_offsets_index = 0; while (it.HasNextDirectMethod()) { bool is_static = (it.GetMemberAccessFlags() & kAccStatic) != 0; relative_offset = WriteCodeMethod(out, file_offset, relative_offset, oat_class_index, - class_def_method_index, is_static, it.GetMemberIndex(), - dex_file); + class_def_method_index, &method_offsets_index, is_static, + it.GetMemberIndex(), dex_file); if (relative_offset == 0) { return 0; } @@ -650,28 +708,30 @@ size_t OatWriter::WriteCodeClassDef(OutputStream& out, } while (it.HasNextVirtualMethod()) { relative_offset = WriteCodeMethod(out, file_offset, relative_offset, oat_class_index, - class_def_method_index, false, it.GetMemberIndex(), dex_file); + class_def_method_index, &method_offsets_index, false, + it.GetMemberIndex(), dex_file); if (relative_offset == 0) { return 0; } class_def_method_index++; it.Next(); } + DCHECK(!it.HasNext()); + CHECK_LE(method_offsets_index, class_def_method_index); return relative_offset; } size_t OatWriter::WriteCodeMethod(OutputStream& out, const size_t file_offset, size_t relative_offset, size_t oat_class_index, - size_t class_def_method_index, bool is_static, - uint32_t method_idx, const DexFile& dex_file) { - const CompiledMethod* compiled_method = - compiler_driver_->GetCompiledMethod(MethodReference(&dex_file, method_idx)); - - OatMethodOffsets method_offsets = - oat_classes_[oat_class_index]->method_offsets_[class_def_method_index]; - + size_t class_def_method_index, size_t* method_offsets_index, + bool is_static, uint32_t method_idx, const DexFile& dex_file) { + OatClass* oat_class = oat_classes_[oat_class_index]; + const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index); if (compiled_method != NULL) { // ie. not an abstract method + const OatMethodOffsets method_offsets = oat_class->method_offsets_[*method_offsets_index]; + (*method_offsets_index)++; + #if !defined(ART_USE_PORTABLE_COMPILER) uint32_t aligned_offset = compiled_method->AlignCode(relative_offset); uint32_t aligned_code_delta = aligned_offset - relative_offset; @@ -854,29 +914,96 @@ bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, return true; } -OatWriter::OatClass::OatClass(size_t offset, mirror::Class::Status status, uint32_t methods_count) { +OatWriter::OatClass::OatClass(size_t offset, + std::vector<CompiledMethod*>* compiled_methods, + uint32_t num_non_null_compiled_methods, + mirror::Class::Status status) { + CHECK(compiled_methods != NULL); + uint32_t num_methods = compiled_methods->size(); + CHECK_LE(num_non_null_compiled_methods, num_methods); + offset_ = offset; + compiled_methods_ = compiled_methods; + oat_method_offsets_offsets_from_oat_class_.resize(num_methods); + + // Since both kOatClassNoneCompiled and kOatClassAllCompiled could + // apply when there are 0 methods, we just arbitrarily say that 0 + // methods means kOatClassNoneCompiled and that we won't use + // kOatClassAllCompiled unless there is at least one compiled + // method. This means in an interpretter only system, we can assert + // that all classes are kOatClassNoneCompiled. + if (num_non_null_compiled_methods == 0) { + type_ = kOatClassNoneCompiled; + } else if (num_non_null_compiled_methods == num_methods) { + type_ = kOatClassAllCompiled; + } else { + type_ = kOatClassSomeCompiled; + } + status_ = status; - method_offsets_.resize(methods_count); + method_offsets_.resize(num_non_null_compiled_methods); + + uint32_t oat_method_offsets_offset_from_oat_class = sizeof(type_) + sizeof(status_); + if (type_ == kOatClassSomeCompiled) { + method_bitmap_ = new BitVector(num_methods, false, Allocator::GetMallocAllocator()); + method_bitmap_size_ = method_bitmap_->GetSizeOf(); + oat_method_offsets_offset_from_oat_class += sizeof(method_bitmap_size_); + oat_method_offsets_offset_from_oat_class += method_bitmap_size_; + } else { + method_bitmap_ = NULL; + method_bitmap_size_ = 0; + } + + for (size_t i = 0; i < num_methods; i++) { + CompiledMethod* compiled_method = (*compiled_methods_)[i]; + if (compiled_method == NULL) { + oat_method_offsets_offsets_from_oat_class_[i] = 0; + } else { + oat_method_offsets_offsets_from_oat_class_[i] = oat_method_offsets_offset_from_oat_class; + oat_method_offsets_offset_from_oat_class += sizeof(OatMethodOffsets); + if (type_ == kOatClassSomeCompiled) { + method_bitmap_->SetBit(i); + } + } + } } +OatWriter::OatClass::~OatClass() { + delete compiled_methods_; +} + +#if defined(ART_USE_PORTABLE_COMPILER) size_t OatWriter::OatClass::GetOatMethodOffsetsOffsetFromOatHeader( size_t class_def_method_index_) const { - return offset_ + GetOatMethodOffsetsOffsetFromOatClass(class_def_method_index_); + uint32_t method_offset = GetOatMethodOffsetsOffsetFromOatClass(class_def_method_index_); + if (method_offset == 0) { + return 0; + } + return offset_ + method_offset; } size_t OatWriter::OatClass::GetOatMethodOffsetsOffsetFromOatClass( size_t class_def_method_index_) const { - return sizeof(status_) - + (sizeof(method_offsets_[0]) * class_def_method_index_); + return oat_method_offsets_offsets_from_oat_class_[class_def_method_index_]; } +#endif size_t OatWriter::OatClass::SizeOf() const { - return GetOatMethodOffsetsOffsetFromOatClass(method_offsets_.size()); + return sizeof(status_) + + sizeof(type_) + + ((method_bitmap_size_ == 0) ? 0 : sizeof(method_bitmap_size_)) + + method_bitmap_size_ + + (sizeof(method_offsets_[0]) * method_offsets_.size()); } void OatWriter::OatClass::UpdateChecksum(OatHeader& oat_header) const { oat_header.UpdateChecksum(&status_, sizeof(status_)); + oat_header.UpdateChecksum(&type_, sizeof(type_)); + if (method_bitmap_size_ != 0) { + CHECK_EQ(kOatClassSomeCompiled, type_); + oat_header.UpdateChecksum(&method_bitmap_size_, sizeof(method_bitmap_size_)); + oat_header.UpdateChecksum(method_bitmap_->GetRawStorage(), method_bitmap_size_); + } oat_header.UpdateChecksum(&method_offsets_[0], sizeof(method_offsets_[0]) * method_offsets_.size()); } @@ -890,17 +1017,30 @@ bool OatWriter::OatClass::Write(OatWriter* oat_writer, return false; } oat_writer->size_oat_class_status_ += sizeof(status_); - DCHECK_EQ(static_cast<off_t>(file_offset + GetOatMethodOffsetsOffsetFromOatHeader(0)), - out.Seek(0, kSeekCurrent)); + if (!out.WriteFully(&type_, sizeof(type_))) { + PLOG(ERROR) << "Failed to write oat class type to " << out.GetLocation(); + return false; + } + oat_writer->size_oat_class_type_ += sizeof(type_); + if (method_bitmap_size_ != 0) { + CHECK_EQ(kOatClassSomeCompiled, type_); + if (!out.WriteFully(&method_bitmap_size_, sizeof(method_bitmap_size_))) { + PLOG(ERROR) << "Failed to write method bitmap size to " << out.GetLocation(); + return false; + } + oat_writer->size_oat_class_method_bitmaps_ += sizeof(method_bitmap_size_); + if (!out.WriteFully(method_bitmap_->GetRawStorage(), method_bitmap_size_)) { + PLOG(ERROR) << "Failed to write method bitmap to " << out.GetLocation(); + return false; + } + oat_writer->size_oat_class_method_bitmaps_ += method_bitmap_size_; + } if (!out.WriteFully(&method_offsets_[0], sizeof(method_offsets_[0]) * method_offsets_.size())) { PLOG(ERROR) << "Failed to write method offsets to " << out.GetLocation(); return false; } oat_writer->size_oat_class_method_offsets_ += sizeof(method_offsets_[0]) * method_offsets_.size(); - DCHECK_EQ(static_cast<off_t>(file_offset + - GetOatMethodOffsetsOffsetFromOatHeader(method_offsets_.size())), - out.Seek(0, kSeekCurrent)); return true; } diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index d5f7e21a1a..5d947cfaea 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -30,6 +30,7 @@ namespace art { +class BitVector; class OutputStream; // OatHeader variable length with count of D OatDexFiles @@ -90,7 +91,7 @@ class OatWriter { size_t InitOatCodeDexFiles(size_t offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); size_t InitOatCodeDexFile(size_t offset, - size_t& oat_class_index, + size_t* oat_class_index, const DexFile& dex_file) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); size_t InitOatCodeClassDef(size_t offset, @@ -99,21 +100,22 @@ class OatWriter { const DexFile::ClassDef& class_def) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); size_t InitOatCodeMethod(size_t offset, size_t oat_class_index, size_t class_def_index, - size_t class_def_method_index, bool is_native, InvokeType type, - uint32_t method_idx, const DexFile*) + size_t class_def_method_index, size_t* method_offsets_index, + bool is_native, InvokeType type, uint32_t method_idx, const DexFile&) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); bool WriteTables(OutputStream& out, const size_t file_offset); size_t WriteCode(OutputStream& out, const size_t file_offset); size_t WriteCodeDexFiles(OutputStream& out, const size_t file_offset, size_t relative_offset); size_t WriteCodeDexFile(OutputStream& out, const size_t file_offset, size_t relative_offset, - size_t& oat_class_index, const DexFile& dex_file); + size_t* oat_class_index, const DexFile& dex_file); size_t WriteCodeClassDef(OutputStream& out, const size_t file_offset, size_t relative_offset, size_t oat_class_index, const DexFile& dex_file, const DexFile::ClassDef& class_def); size_t WriteCodeMethod(OutputStream& out, const size_t file_offset, size_t relative_offset, - size_t oat_class_index, size_t class_def_method_index, bool is_static, - uint32_t method_idx, const DexFile& dex_file); + size_t oat_class_index, size_t class_def_method_index, + size_t* method_offsets_index, bool is_static, uint32_t method_idx, + const DexFile& dex_file); void ReportWriteFailure(const char* what, uint32_t method_idx, const DexFile& dex_file, OutputStream& out) const; @@ -142,13 +144,24 @@ class OatWriter { class OatClass { public: - explicit OatClass(size_t offset, mirror::Class::Status status, uint32_t methods_count); + explicit OatClass(size_t offset, + std::vector<CompiledMethod*>* compiled_methods, + uint32_t num_non_null_compiled_methods, + mirror::Class::Status status); + ~OatClass(); +#if defined(ART_USE_PORTABLE_COMPILER) size_t GetOatMethodOffsetsOffsetFromOatHeader(size_t class_def_method_index_) const; size_t GetOatMethodOffsetsOffsetFromOatClass(size_t class_def_method_index_) const; +#endif size_t SizeOf() const; void UpdateChecksum(OatHeader& oat_header) const; bool Write(OatWriter* oat_writer, OutputStream& out, const size_t file_offset) const; + CompiledMethod* GetCompiledMethod(size_t class_def_method_index) const { + DCHECK(compiled_methods_ != NULL); + return (*compiled_methods_)[class_def_method_index]; + } + // Offset of start of OatClass from beginning of OatHeader. It is // used to validate file position when writing. For Portable, it // is also used to calculate the position of the OatMethodOffsets @@ -156,8 +169,37 @@ class OatWriter { // patched to point to code in the Portable .o ELF objects. size_t offset_; + // CompiledMethods for each class_def_method_index, or NULL if no method is available. + std::vector<CompiledMethod*>* compiled_methods_; + + // Offset from OatClass::offset_ to the OatMethodOffsets for the + // class_def_method_index. If 0, it means the corresponding + // CompiledMethod entry in OatClass::compiled_methods_ should be + // NULL and that the OatClass::type_ should be kOatClassBitmap. + std::vector<uint32_t> oat_method_offsets_offsets_from_oat_class_; + // data to write - mirror::Class::Status status_; + + COMPILE_ASSERT(mirror::Class::Status::kStatusMax < (2 ^ 16), class_status_wont_fit_in_16bits); + int16_t status_; + + COMPILE_ASSERT(OatClassType::kOatClassMax < (2 ^ 16), oat_class_type_wont_fit_in_16bits); + uint16_t type_; + + uint32_t method_bitmap_size_; + + // bit vector indexed by ClassDef method index. When + // OatClassType::type_ is kOatClassBitmap, a set bit indicates the + // method has an OatMethodOffsets in methods_offsets_, otherwise + // the entry was ommited to save space. If OatClassType::type_ is + // not is kOatClassBitmap, the bitmap will be NULL. + BitVector* method_bitmap_; + + // OatMethodOffsets for each CompiledMethod present in the + // OatClass. Note that some may be missing if + // OatClass::compiled_methods_ contains NULL values (and + // oat_method_offsets_offsets_from_oat_class_ should contain 0 + // values in this case). std::vector<OatMethodOffsets> method_offsets_; private: @@ -184,8 +226,10 @@ class OatWriter { UniquePtr<const std::vector<uint8_t> > interpreter_to_interpreter_bridge_; UniquePtr<const std::vector<uint8_t> > interpreter_to_compiled_code_bridge_; UniquePtr<const std::vector<uint8_t> > jni_dlsym_lookup_; + UniquePtr<const std::vector<uint8_t> > portable_imt_conflict_trampoline_; UniquePtr<const std::vector<uint8_t> > portable_resolution_trampoline_; UniquePtr<const std::vector<uint8_t> > portable_to_interpreter_bridge_; + UniquePtr<const std::vector<uint8_t> > quick_imt_conflict_trampoline_; UniquePtr<const std::vector<uint8_t> > quick_resolution_trampoline_; UniquePtr<const std::vector<uint8_t> > quick_to_interpreter_bridge_; @@ -198,8 +242,10 @@ class OatWriter { uint32_t size_interpreter_to_interpreter_bridge_; uint32_t size_interpreter_to_compiled_code_bridge_; uint32_t size_jni_dlsym_lookup_; + uint32_t size_portable_imt_conflict_trampoline_; uint32_t size_portable_resolution_trampoline_; uint32_t size_portable_to_interpreter_bridge_; + uint32_t size_quick_imt_conflict_trampoline_; uint32_t size_quick_resolution_trampoline_; uint32_t size_quick_to_interpreter_bridge_; uint32_t size_trampoline_alignment_; @@ -214,7 +260,9 @@ class OatWriter { uint32_t size_oat_dex_file_location_checksum_; uint32_t size_oat_dex_file_offset_; uint32_t size_oat_dex_file_methods_offsets_; + uint32_t size_oat_class_type_; uint32_t size_oat_class_status_; + uint32_t size_oat_class_method_bitmaps_; uint32_t size_oat_class_method_offsets_; // Code mappings for deduplication. Deduplication is already done on a pointer basis by the diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h index f3d35d728c..638e0ec457 100644 --- a/compiler/utils/dedupe_set.h +++ b/compiler/utils/dedupe_set.h @@ -18,62 +18,66 @@ #define ART_COMPILER_UTILS_DEDUPE_SET_H_ #include <set> +#include <string> #include "base/mutex.h" #include "base/stl_util.h" +#include "base/stringprintf.h" namespace art { -// A simple data structure to handle hashed deduplication. Add is thread safe. -template <typename Key, typename HashType, typename HashFunc> +// A set of Keys that support a HashFunc returning HashType. Used to find duplicates of Key in the +// Add method. The data-structure is thread-safe through the use of internal locks, it also +// supports the lock being sharded. +template <typename Key, typename HashType, typename HashFunc, HashType kShard = 1> class DedupeSet { typedef std::pair<HashType, Key*> HashedKey; class Comparator { public: bool operator()(const HashedKey& a, const HashedKey& b) const { - if (a.first < b.first) return true; - if (a.first > b.first) return true; - return *a.second < *b.second; + if (a.first != b.first) { + return a.first < b.first; + } else { + return *a.second < *b.second; + } } }; - typedef std::set<HashedKey, Comparator> Keys; - public: - typedef typename Keys::iterator iterator; - typedef typename Keys::const_iterator const_iterator; - typedef typename Keys::size_type size_type; - typedef typename Keys::value_type value_type; - - iterator begin() { return keys_.begin(); } - const_iterator begin() const { return keys_.begin(); } - iterator end() { return keys_.end(); } - const_iterator end() const { return keys_.end(); } - Key* Add(Thread* self, const Key& key) { - HashType hash = HashFunc()(key); - HashedKey hashed_key(hash, const_cast<Key*>(&key)); - MutexLock lock(self, lock_); - auto it = keys_.find(hashed_key); - if (it != keys_.end()) { + HashType raw_hash = HashFunc()(key); + HashType shard_hash = raw_hash / kShard; + HashType shard_bin = raw_hash % kShard; + HashedKey hashed_key(shard_hash, const_cast<Key*>(&key)); + MutexLock lock(self, *lock_[shard_bin]); + auto it = keys_[shard_bin].find(hashed_key); + if (it != keys_[shard_bin].end()) { return it->second; } hashed_key.second = new Key(key); - keys_.insert(hashed_key); + keys_[shard_bin].insert(hashed_key); return hashed_key.second; } - DedupeSet() : lock_("dedupe lock") { + explicit DedupeSet(const char* set_name) { + for (HashType i = 0; i < kShard; ++i) { + lock_name_[i] = StringPrintf("%s lock %d", set_name, i); + lock_[i].reset(new Mutex(lock_name_[i].c_str())); + } } ~DedupeSet() { - STLDeleteValues(&keys_); + for (HashType i = 0; i < kShard; ++i) { + STLDeleteValues(&keys_[i]); + } } private: - Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; - Keys keys_; + std::string lock_name_[kShard]; + UniquePtr<Mutex> lock_[kShard]; + std::set<HashedKey, Comparator> keys_[kShard]; + DISALLOW_COPY_AND_ASSIGN(DedupeSet); }; diff --git a/compiler/utils/dedupe_set_test.cc b/compiler/utils/dedupe_set_test.cc index 9f5e292f53..8abe6debc1 100644 --- a/compiler/utils/dedupe_set_test.cc +++ b/compiler/utils/dedupe_set_test.cc @@ -14,15 +14,12 @@ * limitations under the License. */ -#include "common_test.h" #include "dedupe_set.h" +#include "gtest/gtest.h" +#include "thread-inl.h" namespace art { -class DedupeSetTest : public testing::Test { - public: -}; - class DedupeHashFunc { public: size_t operator()(const std::vector<uint8_t>& array) const { @@ -35,10 +32,10 @@ class DedupeHashFunc { return hash; } }; -TEST_F(DedupeSetTest, Test) { +TEST(DedupeSetTest, Test) { Thread* self = Thread::Current(); typedef std::vector<uint8_t> ByteArray; - DedupeSet<ByteArray, size_t, DedupeHashFunc> deduplicator; + DedupeSet<ByteArray, size_t, DedupeHashFunc> deduplicator("test"); ByteArray* array1; { ByteArray test1; |