/* * Copyright (C) 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ #include "arch/instruction_set.h" #include "arch/instruction_set_features.h" #include "base/bit_field.h" #include "driver/compiler_options.h" #include "globals.h" #include "locations.h" #include "memory_region.h" #include "nodes.h" #include "stack_map_stream.h" namespace art { static size_t constexpr kVRegSize = 4; static size_t constexpr kUninitializedFrameSize = 0; // Binary encoding of 2^32 for type double. static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); // Binary encoding of 2^31 for type double. static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); // Maximum value for a primitive integer. static int32_t constexpr kPrimIntMax = 0x7fffffff; // Maximum value for a primitive long. static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff; class Assembler; class CodeGenerator; class DexCompilationUnit; class ParallelMoveResolver; class SrcMapElem; template class SrcMap; using DefaultSrcMap = SrcMap>; class CodeAllocator { public: CodeAllocator() {} virtual ~CodeAllocator() {} virtual uint8_t* Allocate(size_t size) = 0; private: DISALLOW_COPY_AND_ASSIGN(CodeAllocator); }; struct PcInfo { uint32_t dex_pc; uintptr_t native_pc; }; class SlowPathCode : public ArenaObject { public: SlowPathCode() {} virtual ~SlowPathCode() {} virtual void EmitNativeCode(CodeGenerator* codegen) = 0; private: DISALLOW_COPY_AND_ASSIGN(SlowPathCode); }; class CodeGenerator { public: // Compiles the graph to executable instructions. Returns whether the compilation // succeeded. void CompileBaseline(CodeAllocator* allocator, bool is_leaf = false); void CompileOptimized(CodeAllocator* allocator); static CodeGenerator* Create(HGraph* graph, InstructionSet instruction_set, const InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options); virtual ~CodeGenerator() {} HGraph* GetGraph() const { return graph_; } bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; size_t GetStackSlotOfParameter(HParameterValue* parameter) const { // Note that this follows the current calling convention. return GetFrameSize() + kVRegSize // Art method + parameter->GetIndex() * kVRegSize; } virtual void Initialize() = 0; virtual void Finalize(CodeAllocator* allocator); virtual void GenerateFrameEntry() = 0; virtual void GenerateFrameExit() = 0; virtual void Bind(HBasicBlock* block) = 0; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0; virtual HGraphVisitor* GetLocationBuilder() = 0; virtual HGraphVisitor* GetInstructionVisitor() = 0; virtual Assembler* GetAssembler() = 0; virtual size_t GetWordSize() const = 0; virtual size_t GetFloatingPointSpillSlotSize() const = 0; virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0; void ComputeFrameSize(size_t number_of_spill_slots, size_t maximum_number_of_live_core_registers, size_t maximum_number_of_live_fp_registers, size_t number_of_out_slots); int32_t GetStackSlot(HLocal* local) const; Location GetTemporaryLocation(HTemporary* temp) const; uint32_t GetFrameSize() const { return frame_size_; } void SetFrameSize(uint32_t size) { frame_size_ = size; } uint32_t GetCoreSpillMask() const { return core_spill_mask_; } uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } virtual void SetupBlockedRegisters(bool is_baseline) const = 0; virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; virtual InstructionSet GetInstructionSet() const = 0; const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } // Saves the register in the stack. Returns the size taken on stack. virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; // Restores the register from the stack. Returns the size taken on stack. virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { UNUSED(stack_index, reg_id); UNIMPLEMENTED(FATAL); UNREACHABLE(); } virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { UNUSED(stack_index, reg_id); UNIMPLEMENTED(FATAL); UNREACHABLE(); } virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; bool IsCoreCalleeSaveRegister(int reg) const { return (core_callee_save_mask_ & (1 << reg)) != 0; } bool IsFloatingPointCalleeSaveRegister(int reg) const { return (fpu_callee_save_mask_ & (1 << reg)) != 0; } void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc); bool CanMoveNullCheckToUser(HNullCheck* null_check); void MaybeRecordImplicitNullCheck(HInstruction* instruction); void AddSlowPath(SlowPathCode* slow_path) { slow_paths_.Add(slow_path); } void GenerateSlowPaths(); void BuildMappingTable(std::vector* vector, DefaultSrcMap* src_map) const; void BuildVMapTable(std::vector* vector) const; void BuildNativeGCMap( std::vector* vector, const DexCompilationUnit& dex_compilation_unit) const; void BuildStackMaps(std::vector* vector); void SaveLiveRegisters(LocationSummary* locations); void RestoreLiveRegisters(LocationSummary* locations); bool IsLeafMethod() const { return is_leaf_; } void MarkNotLeaf() { is_leaf_ = false; } // Clears the spill slots taken by loop phis in the `LocationSummary` of the // suspend check. This is called when the code generator generates code // for the suspend check at the back edge (instead of where the suspend check // is, which is the loop entry). At this point, the spill slots for the phis // have not been written to. void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } // Helper that returns the pointer offset of an index in an object array. // Note: this method assumes we always have the same pointer size, regardless // of the architecture. static size_t GetCacheOffset(uint32_t index); void EmitParallelMoves(Location from1, Location to1, Location from2, Location to2); static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { if (kIsDebugBuild) { if (type == Primitive::kPrimNot && value->IsIntConstant()) { CHECK_EQ(value->AsIntConstant()->GetValue(), 0); } } return type == Primitive::kPrimNot && !value->IsIntConstant(); } void AddAllocatedRegister(Location location) { allocated_registers_.Add(location); } protected: CodeGenerator(HGraph* graph, size_t number_of_core_registers, size_t number_of_fpu_registers, size_t number_of_register_pairs, uint32_t core_callee_save_mask, uint32_t fpu_callee_save_mask, const CompilerOptions& compiler_options) : frame_size_(kUninitializedFrameSize), core_spill_mask_(0), fpu_spill_mask_(0), first_register_slot_in_slow_path_(0), blocked_core_registers_(graph->GetArena()->AllocArray(number_of_core_registers)), blocked_fpu_registers_(graph->GetArena()->AllocArray(number_of_fpu_registers)), blocked_register_pairs_(graph->GetArena()->AllocArray(number_of_register_pairs)), number_of_core_registers_(number_of_core_registers), number_of_fpu_registers_(number_of_fpu_registers), number_of_register_pairs_(number_of_register_pairs), core_callee_save_mask_(core_callee_save_mask), fpu_callee_save_mask_(fpu_callee_save_mask), graph_(graph), compiler_options_(compiler_options), pc_infos_(graph->GetArena(), 32), slow_paths_(graph->GetArena(), 8), is_leaf_(true), stack_map_stream_(graph->GetArena()) {} // Register allocation logic. void AllocateRegistersLocally(HInstruction* instruction) const; // Backend specific implementation for allocating a register. virtual Location AllocateFreeRegister(Primitive::Type type) const = 0; static size_t FindFreeEntry(bool* array, size_t length); static size_t FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t length); virtual Location GetStackLocation(HLoadLocal* load) const = 0; virtual ParallelMoveResolver* GetMoveResolver() = 0; // Returns the location of the first spilled entry for floating point registers, // relative to the stack pointer. uint32_t GetFpuSpillStart() const { DCHECK_NE(frame_size_, kUninitializedFrameSize); return GetFrameSize() - FrameEntrySpillSize(); } uint32_t GetFpuSpillSize() const { return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); } uint32_t GetCoreSpillSize() const { return POPCOUNT(core_spill_mask_) * GetWordSize(); } uint32_t FrameEntrySpillSize() const { return GetFpuSpillSize() + GetCoreSpillSize(); } // Frame size required for this method. uint32_t frame_size_; uint32_t core_spill_mask_; uint32_t fpu_spill_mask_; uint32_t first_register_slot_in_slow_path_; // Registers that were allocated during linear scan. RegisterSet allocated_registers_; // Arrays used when doing register allocation to know which // registers we can allocate. `SetupBlockedRegisters` updates the // arrays. bool* const blocked_core_registers_; bool* const blocked_fpu_registers_; bool* const blocked_register_pairs_; size_t number_of_core_registers_; size_t number_of_fpu_registers_; size_t number_of_register_pairs_; const uint32_t core_callee_save_mask_; const uint32_t fpu_callee_save_mask_; private: void InitLocations(HInstruction* instruction); size_t GetStackOffsetOfSavedRegister(size_t index); HGraph* const graph_; const CompilerOptions& compiler_options_; GrowableArray pc_infos_; GrowableArray slow_paths_; bool is_leaf_; StackMapStream stack_map_stream_; DISALLOW_COPY_AND_ASSIGN(CodeGenerator); }; template class CallingConvention { public: CallingConvention(const C* registers, size_t number_of_registers, const F* fpu_registers, size_t number_of_fpu_registers) : registers_(registers), number_of_registers_(number_of_registers), fpu_registers_(fpu_registers), number_of_fpu_registers_(number_of_fpu_registers) {} size_t GetNumberOfRegisters() const { return number_of_registers_; } size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } C GetRegisterAt(size_t index) const { DCHECK_LT(index, number_of_registers_); return registers_[index]; } F GetFpuRegisterAt(size_t index) const { DCHECK_LT(index, number_of_fpu_registers_); return fpu_registers_[index]; } size_t GetStackOffsetOf(size_t index) const { // We still reserve the space for parameters passed by registers. // Add one for the method pointer. return (index + 1) * kVRegSize; } private: const C* registers_; const size_t number_of_registers_; const F* fpu_registers_; const size_t number_of_fpu_registers_; DISALLOW_COPY_AND_ASSIGN(CallingConvention); }; } // namespace art #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_