diff options
| author | 2014-04-07 17:34:12 +0000 | |
|---|---|---|
| committer | 2014-04-07 17:34:13 +0000 | |
| commit | 12c0273fd394d72ae93424784b1450daea417472 (patch) | |
| tree | 0410f0225424213516270a3796731d174c00d90c /compiler/driver/compiler_driver.h | |
| parent | 1415359a4ca80a78507173103274a06cd85d223d (diff) | |
| parent | 754ddad084ccb610d0cf486f6131bdc69bae5bc6 (diff) | |
Merge "Use trampolines for calls to helpers"
Diffstat (limited to 'compiler/driver/compiler_driver.h')
| -rw-r--r-- | compiler/driver/compiler_driver.h | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 802f859da4..6df5d0c09f 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -634,6 +634,112 @@ class CompilerDriver { // Should the compiler run on this method given profile information? bool SkipCompilation(const std::string& method_name); + // Entrypoint trampolines. + // + // The idea here is that we can save code size by collecting the branches + // to the entrypoints (helper functions called by the generated code) into a + // table and then branching relative to that table from the code. On ARM 32 this + // will save 2 bytes per call. Only the entrypoints used by the program (the whole + // program - these are global) are in this table and are in no particular order. + // + // The trampolines will be placed right at the start of the .text section in the file + // and will consist of a table of instructions, each of which will branch relative to + // the thread register (r9 on ARM) to an entrypoint. On ARM this would look like: + // + // trampolines: + // 1: ldr pc, [r9, #40] + // 2: ldr pc, [r9, #8] + // ... + // + // Then a call to an entrypoint would be an immediate BL instruction to the appropriate + // label (1 or 2 in the above example). Because the entrypoint table has the lower bit + // of the address already set, the ldr pc will switch from ARM to Thumb for the entrypoint as + // necessary. + // + // On ARM, the range of a BL instruction is +-32M to this is more than enough for an + // immediate BL instruction in the generated code. + // + // The actual address of the trampoline for a particular entrypoint is not known until + // the OAT file is written and we know the addresses of all the branch instructions in + // the program. At this point we can rewrite the BL instruction to have the correct relative + // offset. + class EntrypointTrampolines { + public: + EntrypointTrampolines() : current_offset_(0), lock_("Entrypoint Trampolines") {} + ~EntrypointTrampolines() {} + + // Add a trampoline and return the offset added. If it already exists + // return the offset it was added at previously. + uint32_t AddEntrypoint(Thread* self, uint32_t ep) LOCKS_EXCLUDED(lock_) { + MutexLock mu(self, lock_); + Trampolines::iterator tramp = trampolines_.find(ep); + if (tramp == trampolines_.end()) { + trampolines_[ep] = current_offset_; + trampoline_table_.push_back(ep); + LOG(DEBUG) << "adding new trampoline for " << ep << " at offset " << current_offset_; + return current_offset_++; + } else { + return tramp->second; + } + } + + const std::vector<uint32_t>& GetTrampolineTable() const { + return trampoline_table_; + } + + uint32_t GetTrampolineTableSize() const { + return current_offset_; + } + + private: + uint32_t current_offset_; + // Mapping of entrypoint offset vs offset into trampoline table. + typedef std::map<uint32_t, uint32_t> Trampolines; + Trampolines trampolines_ GUARDED_BY(lock_); + + // Table of all registered offsets in order of registration. + std::vector<uint32_t> trampoline_table_; + Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + }; + + uint32_t AddEntrypointTrampoline(uint32_t entrypoint); + + const std::vector<uint32_t>& GetEntrypointTrampolineTable() const { + return entrypoint_trampolines_.GetTrampolineTable(); + } + + uint32_t GetEntrypointTrampolineTableSize() const { + uint32_t size = entrypoint_trampolines_.GetTrampolineTableSize(); + if (instruction_set_ == kThumb2) { + return size * 4; + } + return size; + } + + // Get the maximum offset between entrypoint trampoline islands. Different architectures + // have limitations on the max offset for a call instruction. This function is used + // to determine when we need to generate a new trampoline island in the output to keep + // subsequent calls in range. + size_t GetMaxEntrypointTrampolineOffset() const { + if (instruction_set_ == kThumb2) { + // On Thumb2, the max range of a BL instruction is 16MB. Give it a little wiggle room. + return 15*MB; + } + // Returning 0 means we won't generate a trampoline island. + return 0; + } + + void BuildEntrypointTrampolineCode(); + + // Architecture specific Entrypoint trampoline builder. + void BuildArmEntrypointTrampolineCall(ThreadOffset<4> offset); + + const std::vector<uint8_t>& GetEntrypointTrampolineTableCode() const { + return entrypoint_trampoline_code_; + } + + FinalEntrypointRelocationSet* AllocateFinalEntrypointRelocationSet(CompilationUnit* cu) const; + private: // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics. // The only external contract is that unresolved method has flags 0 and resolved non-0. @@ -671,6 +777,7 @@ class CompilerDriver { LOCKS_EXCLUDED(Locks::mutator_lock_); void LoadImageClasses(TimingLogger* timings); + void PostCompile() LOCKS_EXCLUDED(Locks::mutator_lock_); // Attempt to resolve all type, methods, fields, and strings // referenced from code in the dex file following PathClassLoader @@ -831,6 +938,10 @@ class CompilerDriver { DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_gc_map_; DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_cfi_info_; + EntrypointTrampolines entrypoint_trampolines_; + + std::vector<uint8_t> entrypoint_trampoline_code_; + DISALLOW_COPY_AND_ASSIGN(CompilerDriver); }; |