| /* |
| * Copyright (C) 2014 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ |
| #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ |
| |
| #include "arch/instruction_set.h" |
| #include "arch/instruction_set_features.h" |
| #include "base/arena_containers.h" |
| #include "base/arena_object.h" |
| #include "base/array_ref.h" |
| #include "base/bit_field.h" |
| #include "base/bit_utils.h" |
| #include "base/enums.h" |
| #include "base/globals.h" |
| #include "base/memory_region.h" |
| #include "class_root.h" |
| #include "dex/string_reference.h" |
| #include "dex/type_reference.h" |
| #include "graph_visualizer.h" |
| #include "locations.h" |
| #include "nodes.h" |
| #include "optimizing_compiler_stats.h" |
| #include "read_barrier_option.h" |
| #include "stack.h" |
| #include "utils/assembler.h" |
| #include "utils/label.h" |
| |
| namespace art { |
| |
| // Binary encoding of 2^32 for type double. |
| static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); |
| // Binary encoding of 2^31 for type double. |
| static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); |
| |
| // Minimum value for a primitive integer. |
| static int32_t constexpr kPrimIntMin = 0x80000000; |
| // Minimum value for a primitive long. |
| static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000); |
| |
| // Maximum value for a primitive integer. |
| static int32_t constexpr kPrimIntMax = 0x7fffffff; |
| // Maximum value for a primitive long. |
| static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); |
| |
| static constexpr ReadBarrierOption kCompilerReadBarrierOption = |
| kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; |
| |
| class Assembler; |
| class CodeGenerator; |
| class CompilerOptions; |
| class StackMapStream; |
| class ParallelMoveResolver; |
| |
| namespace linker { |
| class LinkerPatch; |
| } // namespace linker |
| |
| class CodeAllocator { |
| public: |
| CodeAllocator() {} |
| virtual ~CodeAllocator() {} |
| |
| virtual uint8_t* Allocate(size_t size) = 0; |
| virtual ArrayRef<const uint8_t> GetMemory() const = 0; |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(CodeAllocator); |
| }; |
| |
| class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> { |
| public: |
| explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) { |
| for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) { |
| saved_core_stack_offsets_[i] = kRegisterNotSaved; |
| saved_fpu_stack_offsets_[i] = kRegisterNotSaved; |
| } |
| } |
| |
| virtual ~SlowPathCode() {} |
| |
| virtual void EmitNativeCode(CodeGenerator* codegen) = 0; |
| |
| // Save live core and floating-point caller-save registers and |
| // update the stack mask in `locations` for registers holding object |
| // references. |
| virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); |
| // Restore live core and floating-point caller-save registers. |
| virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations); |
| |
| bool IsCoreRegisterSaved(int reg) const { |
| return saved_core_stack_offsets_[reg] != kRegisterNotSaved; |
| } |
| |
| bool IsFpuRegisterSaved(int reg) const { |
| return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved; |
| } |
| |
| uint32_t GetStackOffsetOfCoreRegister(int reg) const { |
| return saved_core_stack_offsets_[reg]; |
| } |
| |
| uint32_t GetStackOffsetOfFpuRegister(int reg) const { |
| return saved_fpu_stack_offsets_[reg]; |
| } |
| |
| virtual bool IsFatal() const { return false; } |
| |
| virtual const char* GetDescription() const = 0; |
| |
| Label* GetEntryLabel() { return &entry_label_; } |
| Label* GetExitLabel() { return &exit_label_; } |
| |
| HInstruction* GetInstruction() const { |
| return instruction_; |
| } |
| |
| uint32_t GetDexPc() const { |
| return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc; |
| } |
| |
| protected: |
| static constexpr size_t kMaximumNumberOfExpectedRegisters = 32; |
| static constexpr uint32_t kRegisterNotSaved = -1; |
| // The instruction where this slow path is happening. |
| HInstruction* instruction_; |
| uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters]; |
| uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters]; |
| |
| private: |
| Label entry_label_; |
| Label exit_label_; |
| |
| DISALLOW_COPY_AND_ASSIGN(SlowPathCode); |
| }; |
| |
| class InvokeDexCallingConventionVisitor { |
| public: |
| virtual Location GetNextLocation(DataType::Type type) = 0; |
| virtual Location GetReturnLocation(DataType::Type type) const = 0; |
| virtual Location GetMethodLocation() const = 0; |
| |
| protected: |
| InvokeDexCallingConventionVisitor() {} |
| virtual ~InvokeDexCallingConventionVisitor() {} |
| |
| // The current index for core registers. |
| uint32_t gp_index_ = 0u; |
| // The current index for floating-point registers. |
| uint32_t float_index_ = 0u; |
| // The current stack index. |
| uint32_t stack_index_ = 0u; |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); |
| }; |
| |
| class FieldAccessCallingConvention { |
| public: |
| virtual Location GetObjectLocation() const = 0; |
| virtual Location GetFieldIndexLocation() const = 0; |
| virtual Location GetReturnLocation(DataType::Type type) const = 0; |
| virtual Location GetSetValueLocation(DataType::Type type, bool is_instance) const = 0; |
| virtual Location GetFpuLocation(DataType::Type type) const = 0; |
| virtual ~FieldAccessCallingConvention() {} |
| |
| protected: |
| FieldAccessCallingConvention() {} |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention); |
| }; |
| |
| class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { |
| public: |
| // Compiles the graph to executable instructions. |
| void Compile(CodeAllocator* allocator); |
| static std::unique_ptr<CodeGenerator> Create(HGraph* graph, |
| const CompilerOptions& compiler_options, |
| OptimizingCompilerStats* stats = nullptr); |
| virtual ~CodeGenerator(); |
| |
| // Get the graph. This is the outermost graph, never the graph of a method being inlined. |
| HGraph* GetGraph() const { return graph_; } |
| |
| HBasicBlock* GetNextBlockToEmit() const; |
| HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; |
| bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; |
| |
| size_t GetStackSlotOfParameter(HParameterValue* parameter) const { |
| // Note that this follows the current calling convention. |
| return GetFrameSize() |
| + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet())) // Art method |
| + parameter->GetIndex() * kVRegSize; |
| } |
| |
| virtual void Initialize() = 0; |
| virtual void Finalize(CodeAllocator* allocator); |
| virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches); |
| virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const; |
| virtual void EmitThunkCode(const linker::LinkerPatch& patch, |
| /*out*/ ArenaVector<uint8_t>* code, |
| /*out*/ std::string* debug_name); |
| virtual void GenerateFrameEntry() = 0; |
| virtual void GenerateFrameExit() = 0; |
| virtual void Bind(HBasicBlock* block) = 0; |
| virtual void MoveConstant(Location destination, int32_t value) = 0; |
| virtual void MoveLocation(Location dst, Location src, DataType::Type dst_type) = 0; |
| virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0; |
| |
| virtual Assembler* GetAssembler() = 0; |
| virtual const Assembler& GetAssembler() const = 0; |
| virtual size_t GetWordSize() const = 0; |
| |
| // Returns whether the target supports predicated SIMD instructions. |
| virtual bool SupportsPredicatedSIMD() const { return false; } |
| |
| // Get FP register width in bytes for spilling/restoring in the slow paths. |
| // |
| // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers |
| // alias and live SIMD registers are forced to be spilled in full size in the slow paths. |
| virtual size_t GetSlowPathFPWidth() const { |
| // Default implementation. |
| return GetCalleePreservedFPWidth(); |
| } |
| |
| // Get FP register width required to be preserved by the target ABI. |
| virtual size_t GetCalleePreservedFPWidth() const = 0; |
| |
| // Get the size of the target SIMD register in bytes. |
| virtual size_t GetSIMDRegisterWidth() const = 0; |
| virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0; |
| void InitializeCodeGeneration(size_t number_of_spill_slots, |
| size_t maximum_safepoint_spill_size, |
| size_t number_of_out_slots, |
| const ArenaVector<HBasicBlock*>& block_order); |
| // Backends can override this as necessary. For most, no special alignment is required. |
| virtual uint32_t GetPreferredSlotsAlignment() const { return 1; } |
| |
| uint32_t GetFrameSize() const { return frame_size_; } |
| void SetFrameSize(uint32_t size) { frame_size_ = size; } |
| uint32_t GetCoreSpillMask() const { return core_spill_mask_; } |
| uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } |
| |
| size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } |
| size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } |
| virtual void SetupBlockedRegisters() const = 0; |
| |
| virtual void ComputeSpillMask() { |
| core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; |
| DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; |
| fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; |
| } |
| |
| static uint32_t ComputeRegisterMask(const int* registers, size_t length) { |
| uint32_t mask = 0; |
| for (size_t i = 0, e = length; i < e; ++i) { |
| mask |= (1 << registers[i]); |
| } |
| return mask; |
| } |
| |
| virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0; |
| virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0; |
| virtual InstructionSet GetInstructionSet() const = 0; |
| |
| const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } |
| |
| // Saves the register in the stack. Returns the size taken on stack. |
| virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; |
| // Restores the register from the stack. Returns the size taken on stack. |
| virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0; |
| |
| virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; |
| virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; |
| |
| virtual bool NeedsTwoRegisters(DataType::Type type) const = 0; |
| // Returns whether we should split long moves in parallel moves. |
| virtual bool ShouldSplitLongMoves() const { return false; } |
| |
| size_t GetNumberOfCoreCalleeSaveRegisters() const { |
| return POPCOUNT(core_callee_save_mask_); |
| } |
| |
| size_t GetNumberOfCoreCallerSaveRegisters() const { |
| DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters()); |
| return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters(); |
| } |
| |
| bool IsCoreCalleeSaveRegister(int reg) const { |
| return (core_callee_save_mask_ & (1 << reg)) != 0; |
| } |
| |
| bool IsFloatingPointCalleeSaveRegister(int reg) const { |
| return (fpu_callee_save_mask_ & (1 << reg)) != 0; |
| } |
| |
| uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const { |
| DCHECK(locations->OnlyCallsOnSlowPath() || |
| (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() && |
| !locations->HasCustomSlowPathCallingConvention())); |
| uint32_t live_registers = core_registers |
| ? locations->GetLiveRegisters()->GetCoreRegisters() |
| : locations->GetLiveRegisters()->GetFloatingPointRegisters(); |
| if (locations->HasCustomSlowPathCallingConvention()) { |
| // Save only the live registers that the custom calling convention wants us to save. |
| uint32_t caller_saves = core_registers |
| ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters() |
| : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters(); |
| return live_registers & caller_saves; |
| } else { |
| // Default ABI, we need to spill non-callee-save live registers. |
| uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_; |
| return live_registers & ~callee_saves; |
| } |
| } |
| |
| size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const { |
| return POPCOUNT(GetSlowPathSpills(locations, core_registers)); |
| } |
| |
| size_t GetStackOffsetOfShouldDeoptimizeFlag() const { |
| DCHECK(GetGraph()->HasShouldDeoptimizeFlag()); |
| DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize); |
| return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize; |
| } |
| |
| // Record native to dex mapping for a suspend point. Required by runtime. |
| void RecordPcInfo(HInstruction* instruction, |
| uint32_t dex_pc, |
| uint32_t native_pc, |
| SlowPathCode* slow_path = nullptr, |
| bool native_debug_info = false); |
| |
| // Record native to dex mapping for a suspend point. |
| // The native_pc is used from Assembler::CodePosition. |
| // |
| // Note: As Assembler::CodePosition is target dependent, it does not guarantee the exact native_pc |
| // for the instruction. If the exact native_pc is required it must be provided explicitly. |
| void RecordPcInfo(HInstruction* instruction, |
| uint32_t dex_pc, |
| SlowPathCode* slow_path = nullptr, |
| bool native_debug_info = false); |
| |
| // Check whether we have already recorded mapping at this PC. |
| bool HasStackMapAtCurrentPc(); |
| |
| // Record extra stack maps if we support native debugging. |
| // |
| // ARM specific behaviour: The recorded native PC might be a branch over pools to instructions |
| // corresponding the dex PC. |
| void MaybeRecordNativeDebugInfo(HInstruction* instruction, |
| uint32_t dex_pc, |
| SlowPathCode* slow_path = nullptr); |
| |
| bool CanMoveNullCheckToUser(HNullCheck* null_check); |
| virtual void MaybeRecordImplicitNullCheck(HInstruction* instruction); |
| LocationSummary* CreateThrowingSlowPathLocations( |
| HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty()); |
| void GenerateNullCheck(HNullCheck* null_check); |
| virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0; |
| virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0; |
| |
| // Records a stack map which the runtime might use to set catch phi values |
| // during exception delivery. |
| // TODO: Replace with a catch-entering instruction that records the environment. |
| void RecordCatchBlockInfo(); |
| |
| // Get the ScopedArenaAllocator used for codegen memory allocation. |
| ScopedArenaAllocator* GetScopedAllocator(); |
| |
| void AddSlowPath(SlowPathCode* slow_path); |
| |
| ScopedArenaVector<uint8_t> BuildStackMaps(const dex::CodeItem* code_item_for_osr_check); |
| size_t GetNumberOfJitRoots() const; |
| |
| // Fills the `literals` array with literals collected during code generation. |
| // Also emits literal patches. |
| void EmitJitRoots(uint8_t* code, |
| const uint8_t* roots_data, |
| /*out*/std::vector<Handle<mirror::Object>>* roots) |
| REQUIRES_SHARED(Locks::mutator_lock_); |
| |
| bool IsLeafMethod() const { |
| return is_leaf_; |
| } |
| |
| void MarkNotLeaf() { |
| is_leaf_ = false; |
| requires_current_method_ = true; |
| } |
| |
| void SetRequiresCurrentMethod() { |
| requires_current_method_ = true; |
| } |
| |
| bool RequiresCurrentMethod() const { |
| return requires_current_method_; |
| } |
| |
| // Clears the spill slots taken by loop phis in the `LocationSummary` of the |
| // suspend check. This is called when the code generator generates code |
| // for the suspend check at the back edge (instead of where the suspend check |
| // is, which is the loop entry). At this point, the spill slots for the phis |
| // have not been written to. |
| void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check, |
| HParallelMove* spills) const; |
| |
| bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } |
| bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } |
| |
| bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; } |
| bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; } |
| |
| // Helper that returns the offset of the array's length field. |
| // Note: Besides the normal arrays, we also use the HArrayLength for |
| // accessing the String's `count` field in String intrinsics. |
| static uint32_t GetArrayLengthOffset(HArrayLength* array_length); |
| |
| // Helper that returns the offset of the array's data. |
| // Note: Besides the normal arrays, we also use the HArrayGet for |
| // accessing the String's `value` field in String intrinsics. |
| static uint32_t GetArrayDataOffset(HArrayGet* array_get); |
| |
| void EmitParallelMoves(Location from1, |
| Location to1, |
| DataType::Type type1, |
| Location from2, |
| Location to2, |
| DataType::Type type2); |
| |
| static bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) { |
| // Used only for kExactCheck, kAbstractClassCheck, kClassHierarchyCheck and kArrayObjectCheck. |
| DCHECK(instance_of->GetTypeCheckKind() == TypeCheckKind::kExactCheck || |
| instance_of->GetTypeCheckKind() == TypeCheckKind::kAbstractClassCheck || |
| instance_of->GetTypeCheckKind() == TypeCheckKind::kClassHierarchyCheck || |
| instance_of->GetTypeCheckKind() == TypeCheckKind::kArrayObjectCheck) |
| << instance_of->GetTypeCheckKind(); |
| // If the target class is in the boot image, it's non-moveable and it doesn't matter |
| // if we compare it with a from-space or to-space reference, the result is the same. |
| // It's OK to traverse a class hierarchy jumping between from-space and to-space. |
| return kEmitCompilerReadBarrier && !instance_of->GetTargetClass()->IsInBootImage(); |
| } |
| |
| static ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) { |
| return InstanceOfNeedsReadBarrier(instance_of) ? kWithReadBarrier : kWithoutReadBarrier; |
| } |
| |
| static bool IsTypeCheckSlowPathFatal(HCheckCast* check_cast) { |
| switch (check_cast->GetTypeCheckKind()) { |
| case TypeCheckKind::kExactCheck: |
| case TypeCheckKind::kAbstractClassCheck: |
| case TypeCheckKind::kClassHierarchyCheck: |
| case TypeCheckKind::kArrayObjectCheck: |
| case TypeCheckKind::kInterfaceCheck: { |
| bool needs_read_barrier = |
| kEmitCompilerReadBarrier && !check_cast->GetTargetClass()->IsInBootImage(); |
| // We do not emit read barriers for HCheckCast, so we can get false negatives |
| // and the slow path shall re-check and simply return if the cast is actually OK. |
| return !needs_read_barrier; |
| } |
| case TypeCheckKind::kArrayCheck: |
| case TypeCheckKind::kUnresolvedCheck: |
| return false; |
| case TypeCheckKind::kBitstringCheck: |
| return true; |
| } |
| LOG(FATAL) << "Unreachable"; |
| UNREACHABLE(); |
| } |
| |
| static LocationSummary::CallKind GetCheckCastCallKind(HCheckCast* check_cast) { |
| return (IsTypeCheckSlowPathFatal(check_cast) && !check_cast->CanThrowIntoCatchBlock()) |
| ? LocationSummary::kNoCall // In fact, call on a fatal (non-returning) slow path. |
| : LocationSummary::kCallOnSlowPath; |
| } |
| |
| static bool StoreNeedsWriteBarrier(DataType::Type type, HInstruction* value) { |
| // Check that null value is not represented as an integer constant. |
| DCHECK(type != DataType::Type::kReference || !value->IsIntConstant()); |
| return type == DataType::Type::kReference && !value->IsNullConstant(); |
| } |
| |
| |
| // Performs checks pertaining to an InvokeRuntime call. |
| void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint, |
| HInstruction* instruction, |
| SlowPathCode* slow_path); |
| |
| // Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call. |
| static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction, |
| SlowPathCode* slow_path); |
| |
| void AddAllocatedRegister(Location location) { |
| allocated_registers_.Add(location); |
| } |
| |
| bool HasAllocatedRegister(bool is_core, int reg) const { |
| return is_core |
| ? allocated_registers_.ContainsCoreRegister(reg) |
| : allocated_registers_.ContainsFloatingPointRegister(reg); |
| } |
| |
| void AllocateLocations(HInstruction* instruction); |
| |
| // Tells whether the stack frame of the compiled method is |
| // considered "empty", that is either actually having a size of zero, |
| // or just containing the saved return address register. |
| bool HasEmptyFrame() const { |
| return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); |
| } |
| |
| static int8_t GetInt8ValueOf(HConstant* constant) { |
| DCHECK(constant->IsIntConstant()); |
| return constant->AsIntConstant()->GetValue(); |
| } |
| |
| static int16_t GetInt16ValueOf(HConstant* constant) { |
| DCHECK(constant->IsIntConstant()); |
| return constant->AsIntConstant()->GetValue(); |
| } |
| |
| static int32_t GetInt32ValueOf(HConstant* constant) { |
| if (constant->IsIntConstant()) { |
| return constant->AsIntConstant()->GetValue(); |
| } else if (constant->IsNullConstant()) { |
| return 0; |
| } else { |
| DCHECK(constant->IsFloatConstant()); |
| return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); |
| } |
| } |
| |
| static int64_t GetInt64ValueOf(HConstant* constant) { |
| if (constant->IsIntConstant()) { |
| return constant->AsIntConstant()->GetValue(); |
| } else if (constant->IsNullConstant()) { |
| return 0; |
| } else if (constant->IsFloatConstant()) { |
| return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue()); |
| } else if (constant->IsLongConstant()) { |
| return constant->AsLongConstant()->GetValue(); |
| } else { |
| DCHECK(constant->IsDoubleConstant()); |
| return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue()); |
| } |
| } |
| |
| size_t GetFirstRegisterSlotInSlowPath() const { |
| return first_register_slot_in_slow_path_; |
| } |
| |
| uint32_t FrameEntrySpillSize() const { |
| return GetFpuSpillSize() + GetCoreSpillSize(); |
| } |
| |
| virtual ParallelMoveResolver* GetMoveResolver() = 0; |
| |
| static void CreateCommonInvokeLocationSummary( |
| HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor); |
| |
| template <typename CriticalNativeCallingConventionVisitor, |
| size_t kNativeStackAlignment, |
| size_t GetCriticalNativeDirectCallFrameSize(const char* shorty, uint32_t shorty_len)> |
| size_t PrepareCriticalNativeCall(HInvokeStaticOrDirect* invoke) { |
| DCHECK(!invoke->GetLocations()->Intrinsified()); |
| CriticalNativeCallingConventionVisitor calling_convention_visitor( |
| /*for_register_allocation=*/ false); |
| HParallelMove parallel_move(GetGraph()->GetAllocator()); |
| PrepareCriticalNativeArgumentMoves(invoke, &calling_convention_visitor, ¶llel_move); |
| size_t out_frame_size = |
| RoundUp(calling_convention_visitor.GetStackOffset(), kNativeStackAlignment); |
| if (kIsDebugBuild) { |
| uint32_t shorty_len; |
| const char* shorty = GetCriticalNativeShorty(invoke, &shorty_len); |
| DCHECK_EQ(GetCriticalNativeDirectCallFrameSize(shorty, shorty_len), out_frame_size); |
| } |
| if (out_frame_size != 0u) { |
| FinishCriticalNativeFrameSetup(out_frame_size, ¶llel_move); |
| } |
| return out_frame_size; |
| } |
| |
| void GenerateInvokeStaticOrDirectRuntimeCall( |
| HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path); |
| |
| void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); |
| |
| void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke, SlowPathCode* slow_path = nullptr); |
| |
| void GenerateInvokeCustomCall(HInvokeCustom* invoke); |
| |
| void CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction, Location out); |
| |
| void CreateUnresolvedFieldLocationSummary( |
| HInstruction* field_access, |
| DataType::Type field_type, |
| const FieldAccessCallingConvention& calling_convention); |
| |
| void GenerateUnresolvedFieldAccess( |
| HInstruction* field_access, |
| DataType::Type field_type, |
| uint32_t field_index, |
| uint32_t dex_pc, |
| const FieldAccessCallingConvention& calling_convention); |
| |
| static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, |
| Location runtime_type_index_location, |
| Location runtime_return_location); |
| void GenerateLoadClassRuntimeCall(HLoadClass* cls); |
| |
| static void CreateLoadMethodHandleRuntimeCallLocationSummary(HLoadMethodHandle* method_handle, |
| Location runtime_handle_index_location, |
| Location runtime_return_location); |
| void GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle); |
| |
| static void CreateLoadMethodTypeRuntimeCallLocationSummary(HLoadMethodType* method_type, |
| Location runtime_type_index_location, |
| Location runtime_return_location); |
| void GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type); |
| |
| static uint32_t GetBootImageOffset(ObjPtr<mirror::Object> object) |
| REQUIRES_SHARED(Locks::mutator_lock_); |
| static uint32_t GetBootImageOffset(HLoadClass* load_class); |
| static uint32_t GetBootImageOffset(HLoadString* load_string); |
| static uint32_t GetBootImageOffset(HInvoke* invoke); |
| static uint32_t GetBootImageOffset(ClassRoot class_root); |
| static uint32_t GetBootImageOffsetOfIntrinsicDeclaringClass(HInvoke* invoke); |
| |
| static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); |
| |
| void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } |
| DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; } |
| |
| virtual void InvokeRuntime(QuickEntrypointEnum entrypoint, |
| HInstruction* instruction, |
| uint32_t dex_pc, |
| SlowPathCode* slow_path = nullptr) = 0; |
| |
| // Check if the desired_string_load_kind is supported. If it is, return it, |
| // otherwise return a fall-back kind that should be used instead. |
| virtual HLoadString::LoadKind GetSupportedLoadStringKind( |
| HLoadString::LoadKind desired_string_load_kind) = 0; |
| |
| // Check if the desired_class_load_kind is supported. If it is, return it, |
| // otherwise return a fall-back kind that should be used instead. |
| virtual HLoadClass::LoadKind GetSupportedLoadClassKind( |
| HLoadClass::LoadKind desired_class_load_kind) = 0; |
| |
| static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) { |
| switch (load->GetLoadKind()) { |
| case HLoadString::LoadKind::kBssEntry: |
| DCHECK(load->NeedsEnvironment()); |
| return LocationSummary::kCallOnSlowPath; |
| case HLoadString::LoadKind::kRuntimeCall: |
| DCHECK(load->NeedsEnvironment()); |
| return LocationSummary::kCallOnMainOnly; |
| case HLoadString::LoadKind::kJitTableAddress: |
| DCHECK(!load->NeedsEnvironment()); |
| return kEmitCompilerReadBarrier |
| ? LocationSummary::kCallOnSlowPath |
| : LocationSummary::kNoCall; |
| break; |
| default: |
| DCHECK(!load->NeedsEnvironment()); |
| return LocationSummary::kNoCall; |
| } |
| } |
| |
| // Check if the desired_dispatch_info is supported. If it is, return it, |
| // otherwise return a fall-back info that should be used instead. |
| virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( |
| const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, |
| ArtMethod* method) = 0; |
| |
| // Generate a call to a static or direct method. |
| virtual void GenerateStaticOrDirectCall( |
| HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; |
| // Generate a call to a virtual method. |
| virtual void GenerateVirtualCall( |
| HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; |
| |
| // Copy the result of a call into the given target. |
| virtual void MoveFromReturnRegister(Location trg, DataType::Type type) = 0; |
| |
| virtual void IncreaseFrame(size_t adjustment) = 0; |
| virtual void DecreaseFrame(size_t adjustment) = 0; |
| |
| virtual void GenerateNop() = 0; |
| |
| static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array); |
| static ScaleFactor ScaleFactorForType(DataType::Type type); |
| |
| protected: |
| // Patch info used for recording locations of required linker patches and their targets, |
| // i.e. target method, string, type or code identified by their dex file and index, |
| // or .data.bimg.rel.ro entries identified by the boot image offset. |
| template <typename LabelType> |
| struct PatchInfo { |
| PatchInfo(const DexFile* dex_file, uint32_t off_or_idx) |
| : target_dex_file(dex_file), offset_or_index(off_or_idx), label() { } |
| |
| // Target dex file or null for .data.bmig.rel.ro patches. |
| const DexFile* target_dex_file; |
| // Either the boot image offset (to write to .data.bmig.rel.ro) or string/type/method index. |
| uint32_t offset_or_index; |
| // Label for the instruction to patch. |
| LabelType label; |
| }; |
| |
| CodeGenerator(HGraph* graph, |
| size_t number_of_core_registers, |
| size_t number_of_fpu_registers, |
| size_t number_of_register_pairs, |
| uint32_t core_callee_save_mask, |
| uint32_t fpu_callee_save_mask, |
| const CompilerOptions& compiler_options, |
| OptimizingCompilerStats* stats); |
| |
| virtual HGraphVisitor* GetLocationBuilder() = 0; |
| virtual HGraphVisitor* GetInstructionVisitor() = 0; |
| |
| // Returns the location of the first spilled entry for floating point registers, |
| // relative to the stack pointer. |
| uint32_t GetFpuSpillStart() const { |
| return GetFrameSize() - FrameEntrySpillSize(); |
| } |
| |
| uint32_t GetFpuSpillSize() const { |
| return POPCOUNT(fpu_spill_mask_) * GetCalleePreservedFPWidth(); |
| } |
| |
| uint32_t GetCoreSpillSize() const { |
| return POPCOUNT(core_spill_mask_) * GetWordSize(); |
| } |
| |
| virtual bool HasAllocatedCalleeSaveRegisters() const { |
| // We check the core registers against 1 because it always comprises the return PC. |
| return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) |
| || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); |
| } |
| |
| bool CallPushesPC() const { |
| InstructionSet instruction_set = GetInstructionSet(); |
| return instruction_set == InstructionSet::kX86 || instruction_set == InstructionSet::kX86_64; |
| } |
| |
| // Arm64 has its own type for a label, so we need to templatize these methods |
| // to share the logic. |
| |
| template <typename LabelType> |
| LabelType* CommonInitializeLabels() { |
| // We use raw array allocations instead of ArenaVector<> because Labels are |
| // non-constructible and non-movable and as such cannot be held in a vector. |
| size_t size = GetGraph()->GetBlocks().size(); |
| LabelType* labels = |
| GetGraph()->GetAllocator()->AllocArray<LabelType>(size, kArenaAllocCodeGenerator); |
| for (size_t i = 0; i != size; ++i) { |
| new(labels + i) LabelType(); |
| } |
| return labels; |
| } |
| |
| template <typename LabelType> |
| LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const { |
| block = FirstNonEmptyBlock(block); |
| return raw_pointer_to_labels_array + block->GetBlockId(); |
| } |
| |
| SlowPathCode* GetCurrentSlowPath() { |
| return current_slow_path_; |
| } |
| |
| StackMapStream* GetStackMapStream(); |
| |
| void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string); |
| uint64_t GetJitStringRootIndex(StringReference string_reference); |
| void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass); |
| uint64_t GetJitClassRootIndex(TypeReference type_reference); |
| |
| // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code. |
| virtual void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data); |
| |
| // Frame size required for this method. |
| uint32_t frame_size_; |
| uint32_t core_spill_mask_; |
| uint32_t fpu_spill_mask_; |
| uint32_t first_register_slot_in_slow_path_; |
| |
| // Registers that were allocated during linear scan. |
| RegisterSet allocated_registers_; |
| |
| // Arrays used when doing register allocation to know which |
| // registers we can allocate. `SetupBlockedRegisters` updates the |
| // arrays. |
| bool* const blocked_core_registers_; |
| bool* const blocked_fpu_registers_; |
| size_t number_of_core_registers_; |
| size_t number_of_fpu_registers_; |
| size_t number_of_register_pairs_; |
| const uint32_t core_callee_save_mask_; |
| const uint32_t fpu_callee_save_mask_; |
| |
| // The order to use for code generation. |
| const ArenaVector<HBasicBlock*>* block_order_; |
| |
| DisassemblyInformation* disasm_info_; |
| |
| private: |
| class CodeGenerationData; |
| |
| void InitializeCodeGenerationData(); |
| size_t GetStackOffsetOfSavedRegister(size_t index); |
| void GenerateSlowPaths(); |
| void BlockIfInRegister(Location location, bool is_out = false) const; |
| void EmitEnvironment(HEnvironment* environment, |
| SlowPathCode* slow_path, |
| bool needs_vreg_info = true); |
| void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path); |
| |
| static void PrepareCriticalNativeArgumentMoves( |
| HInvokeStaticOrDirect* invoke, |
| /*inout*/InvokeDexCallingConventionVisitor* visitor, |
| /*out*/HParallelMove* parallel_move); |
| |
| void FinishCriticalNativeFrameSetup(size_t out_frame_size, /*inout*/HParallelMove* parallel_move); |
| |
| static const char* GetCriticalNativeShorty(HInvokeStaticOrDirect* invoke, uint32_t* shorty_len); |
| |
| OptimizingCompilerStats* stats_; |
| |
| HGraph* const graph_; |
| const CompilerOptions& compiler_options_; |
| |
| // The current slow-path that we're generating code for. |
| SlowPathCode* current_slow_path_; |
| |
| // The current block index in `block_order_` of the block |
| // we are generating code for. |
| size_t current_block_index_; |
| |
| // Whether the method is a leaf method. |
| bool is_leaf_; |
| |
| // Whether an instruction in the graph accesses the current method. |
| // TODO: Rename: this actually indicates that some instruction in the method |
| // needs the environment including a valid stack frame. |
| bool requires_current_method_; |
| |
| // The CodeGenerationData contains a ScopedArenaAllocator intended for reusing the |
| // ArenaStack memory allocated in previous passes instead of adding to the memory |
| // held by the ArenaAllocator. This ScopedArenaAllocator is created in |
| // CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed. |
| std::unique_ptr<CodeGenerationData> code_generation_data_; |
| |
| friend class OptimizingCFITest; |
| ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD); |
| ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD); |
| |
| DISALLOW_COPY_AND_ASSIGN(CodeGenerator); |
| }; |
| |
| template <typename C, typename F> |
| class CallingConvention { |
| public: |
| CallingConvention(const C* registers, |
| size_t number_of_registers, |
| const F* fpu_registers, |
| size_t number_of_fpu_registers, |
| PointerSize pointer_size) |
| : registers_(registers), |
| number_of_registers_(number_of_registers), |
| fpu_registers_(fpu_registers), |
| number_of_fpu_registers_(number_of_fpu_registers), |
| pointer_size_(pointer_size) {} |
| |
| size_t GetNumberOfRegisters() const { return number_of_registers_; } |
| size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } |
| |
| C GetRegisterAt(size_t index) const { |
| DCHECK_LT(index, number_of_registers_); |
| return registers_[index]; |
| } |
| |
| F GetFpuRegisterAt(size_t index) const { |
| DCHECK_LT(index, number_of_fpu_registers_); |
| return fpu_registers_[index]; |
| } |
| |
| size_t GetStackOffsetOf(size_t index) const { |
| // We still reserve the space for parameters passed by registers. |
| // Add space for the method pointer. |
| return static_cast<size_t>(pointer_size_) + index * kVRegSize; |
| } |
| |
| private: |
| const C* registers_; |
| const size_t number_of_registers_; |
| const F* fpu_registers_; |
| const size_t number_of_fpu_registers_; |
| const PointerSize pointer_size_; |
| |
| DISALLOW_COPY_AND_ASSIGN(CallingConvention); |
| }; |
| |
| /** |
| * A templated class SlowPathGenerator with a templated method NewSlowPath() |
| * that can be used by any code generator to share equivalent slow-paths with |
| * the objective of reducing generated code size. |
| * |
| * InstructionType: instruction that requires SlowPathCodeType |
| * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *) |
| */ |
| template <typename InstructionType> |
| class SlowPathGenerator { |
| static_assert(std::is_base_of<HInstruction, InstructionType>::value, |
| "InstructionType is not a subclass of art::HInstruction"); |
| |
| public: |
| SlowPathGenerator(HGraph* graph, CodeGenerator* codegen) |
| : graph_(graph), |
| codegen_(codegen), |
| slow_path_map_(std::less<uint32_t>(), |
| graph->GetAllocator()->Adapter(kArenaAllocSlowPaths)) {} |
| |
| // Creates and adds a new slow-path, if needed, or returns existing one otherwise. |
| // Templating the method (rather than the whole class) on the slow-path type enables |
| // keeping this code at a generic, non architecture-specific place. |
| // |
| // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType. |
| // To relax this requirement, we would need some RTTI on the stored slow-paths, |
| // or template the class as a whole on SlowPathType. |
| template <typename SlowPathCodeType> |
| SlowPathCodeType* NewSlowPath(InstructionType* instruction) { |
| static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value, |
| "SlowPathCodeType is not a subclass of art::SlowPathCode"); |
| static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value, |
| "SlowPathCodeType is not constructible from InstructionType*"); |
| // Iterate over potential candidates for sharing. Currently, only same-typed |
| // slow-paths with exactly the same dex-pc are viable candidates. |
| // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing? |
| const uint32_t dex_pc = instruction->GetDexPc(); |
| auto iter = slow_path_map_.find(dex_pc); |
| if (iter != slow_path_map_.end()) { |
| const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second; |
| for (const auto& it : candidates) { |
| InstructionType* other_instruction = it.first; |
| SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second); |
| // Determine if the instructions allow for slow-path sharing. |
| if (HaveSameLiveRegisters(instruction, other_instruction) && |
| HaveSameStackMap(instruction, other_instruction)) { |
| // Can share: reuse existing one. |
| return other_slow_path; |
| } |
| } |
| } else { |
| // First time this dex-pc is seen. |
| iter = slow_path_map_.Put(dex_pc, |
| {{}, {graph_->GetAllocator()->Adapter(kArenaAllocSlowPaths)}}); |
| } |
| // Cannot share: create and add new slow-path for this particular dex-pc. |
| SlowPathCodeType* slow_path = |
| new (codegen_->GetScopedAllocator()) SlowPathCodeType(instruction); |
| iter->second.emplace_back(std::make_pair(instruction, slow_path)); |
| codegen_->AddSlowPath(slow_path); |
| return slow_path; |
| } |
| |
| private: |
| // Tests if both instructions have same set of live physical registers. This ensures |
| // the slow-path has exactly the same preamble on saving these registers to stack. |
| bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const { |
| const uint32_t core_spill = ~codegen_->GetCoreSpillMask(); |
| const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask(); |
| RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters(); |
| RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters(); |
| return (((live1->GetCoreRegisters() & core_spill) == |
| (live2->GetCoreRegisters() & core_spill)) && |
| ((live1->GetFloatingPointRegisters() & fpu_spill) == |
| (live2->GetFloatingPointRegisters() & fpu_spill))); |
| } |
| |
| // Tests if both instructions have the same stack map. This ensures the interpreter |
| // will find exactly the same dex-registers at the same entries. |
| bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const { |
| DCHECK(i1->HasEnvironment()); |
| DCHECK(i2->HasEnvironment()); |
| // We conservatively test if the two instructions find exactly the same instructions |
| // and location in each dex-register. This guarantees they will have the same stack map. |
| HEnvironment* e1 = i1->GetEnvironment(); |
| HEnvironment* e2 = i2->GetEnvironment(); |
| if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) { |
| return false; |
| } |
| for (size_t i = 0, sz = e1->Size(); i < sz; ++i) { |
| if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) || |
| !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| HGraph* const graph_; |
| CodeGenerator* const codegen_; |
| |
| // Map from dex-pc to vector of already existing instruction/slow-path pairs. |
| ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_; |
| |
| DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator); |
| }; |
| |
| class InstructionCodeGenerator : public HGraphVisitor { |
| public: |
| InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen) |
| : HGraphVisitor(graph), |
| deopt_slow_paths_(graph, codegen) {} |
| |
| protected: |
| // Add slow-path generator for each instruction/slow-path combination that desires sharing. |
| // TODO: under current regime, only deopt sharing make sense; extend later. |
| SlowPathGenerator<HDeoptimize> deopt_slow_paths_; |
| }; |
| |
| } // namespace art |
| |
| #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ |