blob: c54c96c40f22581cfcf39bf643a112a4d7051bc0 [file] [log] [blame]
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
#include "arch/instruction_set.h"
#include "arch/instruction_set_features.h"
#include "base/arena_containers.h"
#include "base/arena_object.h"
#include "base/array_ref.h"
#include "base/bit_field.h"
#include "base/bit_utils.h"
#include "base/enums.h"
#include "base/globals.h"
#include "base/macros.h"
#include "base/memory_region.h"
#include "class_root.h"
#include "dex/string_reference.h"
#include "dex/type_reference.h"
#include "graph_visualizer.h"
#include "locations.h"
#include "nodes.h"
#include "oat/oat_quick_method_header.h"
#include "optimizing_compiler_stats.h"
#include "read_barrier_option.h"
#include "stack.h"
#include "subtype_check.h"
#include "utils/assembler.h"
#include "utils/label.h"
namespace art HIDDEN {
// Binary encoding of 2^32 for type double.
static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
// Binary encoding of 2^31 for type double.
static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
// Minimum value for a primitive integer.
static int32_t constexpr kPrimIntMin = 0x80000000;
// Minimum value for a primitive long.
static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);
// Maximum value for a primitive integer.
static int32_t constexpr kPrimIntMax = 0x7fffffff;
// Maximum value for a primitive long.
static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
constexpr size_t status_byte_offset =
mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
constexpr uint32_t shifted_visibly_initialized_value =
enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
constexpr uint32_t shifted_initializing_value =
enum_cast<uint32_t>(ClassStatus::kInitializing) << (status_lsb_position % kBitsPerByte);
constexpr uint32_t shifted_initialized_value =
enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
class Assembler;
class CodeGenerationData;
class CodeGenerator;
class CompilerOptions;
class StackMapStream;
class ParallelMoveResolver;
namespace linker {
class LinkerPatch;
} // namespace linker
class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
public:
explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
saved_core_stack_offsets_[i] = kRegisterNotSaved;
saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
}
}
virtual ~SlowPathCode() {}
virtual void EmitNativeCode(CodeGenerator* codegen) = 0;
// Save live core and floating-point caller-save registers and
// update the stack mask in `locations` for registers holding object
// references.
virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
// Restore live core and floating-point caller-save registers.
virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
bool IsCoreRegisterSaved(int reg) const {
return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
}
bool IsFpuRegisterSaved(int reg) const {
return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
}
uint32_t GetStackOffsetOfCoreRegister(int reg) const {
return saved_core_stack_offsets_[reg];
}
uint32_t GetStackOffsetOfFpuRegister(int reg) const {
return saved_fpu_stack_offsets_[reg];
}
virtual bool IsFatal() const { return false; }
virtual const char* GetDescription() const = 0;
Label* GetEntryLabel() { return &entry_label_; }
Label* GetExitLabel() { return &exit_label_; }
HInstruction* GetInstruction() const {
return instruction_;
}
uint32_t GetDexPc() const {
return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
}
protected:
static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
static constexpr uint32_t kRegisterNotSaved = -1;
// The instruction where this slow path is happening.
HInstruction* instruction_;
uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];
private:
Label entry_label_;
Label exit_label_;
DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
};
class InvokeDexCallingConventionVisitor {
public:
virtual Location GetNextLocation(DataType::Type type) = 0;
virtual Location GetReturnLocation(DataType::Type type) const = 0;
virtual Location GetMethodLocation() const = 0;
protected:
InvokeDexCallingConventionVisitor() {}
virtual ~InvokeDexCallingConventionVisitor() {}
// The current index for core registers.
uint32_t gp_index_ = 0u;
// The current index for floating-point registers.
uint32_t float_index_ = 0u;
// The current stack index.
uint32_t stack_index_ = 0u;
private:
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
};
class FieldAccessCallingConvention {
public:
virtual Location GetObjectLocation() const = 0;
virtual Location GetFieldIndexLocation() const = 0;
virtual Location GetReturnLocation(DataType::Type type) const = 0;
virtual Location GetSetValueLocation(DataType::Type type, bool is_instance) const = 0;
virtual Location GetFpuLocation(DataType::Type type) const = 0;
virtual ~FieldAccessCallingConvention() {}
protected:
FieldAccessCallingConvention() {}
private:
DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
};
class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
public:
// Compiles the graph to executable instructions.
void Compile();
static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats = nullptr);
virtual ~CodeGenerator();
// Get the graph. This is the outermost graph, never the graph of a method being inlined.
HGraph* GetGraph() const { return graph_; }
HBasicBlock* GetNextBlockToEmit() const;
HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
// Note that this follows the current calling convention.
return GetFrameSize()
+ static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet())) // Art method
+ parameter->GetIndex() * kVRegSize;
}
virtual void Initialize() = 0;
virtual void Finalize();
virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches);
virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const;
virtual void EmitThunkCode(const linker::LinkerPatch& patch,
/*out*/ ArenaVector<uint8_t>* code,
/*out*/ std::string* debug_name);
virtual void GenerateFrameEntry() = 0;
virtual void GenerateFrameExit() = 0;
virtual void Bind(HBasicBlock* block) = 0;
virtual void MoveConstant(Location destination, int32_t value) = 0;
virtual void MoveLocation(Location dst, Location src, DataType::Type dst_type) = 0;
virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;
virtual Assembler* GetAssembler() = 0;
virtual const Assembler& GetAssembler() const = 0;
virtual size_t GetWordSize() const = 0;
// Returns whether the target supports predicated SIMD instructions.
virtual bool SupportsPredicatedSIMD() const { return false; }
// Get FP register width in bytes for spilling/restoring in the slow paths.
//
// Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers
// alias and live SIMD registers are forced to be spilled in full size in the slow paths.
virtual size_t GetSlowPathFPWidth() const {
// Default implementation.
return GetCalleePreservedFPWidth();
}
// Get FP register width required to be preserved by the target ABI.
virtual size_t GetCalleePreservedFPWidth() const = 0;
// Get the size of the target SIMD register in bytes.
virtual size_t GetSIMDRegisterWidth() const = 0;
virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
void InitializeCodeGeneration(size_t number_of_spill_slots,
size_t maximum_safepoint_spill_size,
size_t number_of_out_slots,
const ArenaVector<HBasicBlock*>& block_order);
// Backends can override this as necessary. For most, no special alignment is required.
virtual uint32_t GetPreferredSlotsAlignment() const { return 1; }
uint32_t GetFrameSize() const { return frame_size_; }
void SetFrameSize(uint32_t size) { frame_size_ = size; }
uint32_t GetMaximumFrameSize() const {
return GetStackOverflowReservedBytes(GetInstructionSet());
}
uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }
size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
virtual void SetupBlockedRegisters() const = 0;
virtual void ComputeSpillMask() {
core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
}
virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
virtual InstructionSet GetInstructionSet() const = 0;
// Saves the register in the stack. Returns the size taken on stack.
virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
// Restores the register from the stack. Returns the size taken on stack.
virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
virtual bool NeedsTwoRegisters(DataType::Type type) const = 0;
// Returns whether we should split long moves in parallel moves.
virtual bool ShouldSplitLongMoves() const { return false; }
// Returns true if `invoke` is an implemented intrinsic in this codegen's arch.
bool IsImplementedIntrinsic(HInvoke* invoke) const {
return invoke->IsIntrinsic() &&
!unimplemented_intrinsics_[static_cast<size_t>(invoke->GetIntrinsic())];
}
size_t GetNumberOfCoreCalleeSaveRegisters() const {
return POPCOUNT(core_callee_save_mask_);
}
size_t GetNumberOfCoreCallerSaveRegisters() const {
DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
}
bool IsCoreCalleeSaveRegister(int reg) const {
return (core_callee_save_mask_ & (1 << reg)) != 0;
}
bool IsFloatingPointCalleeSaveRegister(int reg) const {
return (fpu_callee_save_mask_ & (1 << reg)) != 0;
}
uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const {
DCHECK(locations->OnlyCallsOnSlowPath() ||
(locations->Intrinsified() && locations->CallsOnMainAndSlowPath() &&
!locations->HasCustomSlowPathCallingConvention()));
uint32_t live_registers = core_registers
? locations->GetLiveRegisters()->GetCoreRegisters()
: locations->GetLiveRegisters()->GetFloatingPointRegisters();
if (locations->HasCustomSlowPathCallingConvention()) {
// Save only the live registers that the custom calling convention wants us to save.
uint32_t caller_saves = core_registers
? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters()
: locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters();
return live_registers & caller_saves;
} else {
// Default ABI, we need to spill non-callee-save live registers.
uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_;
return live_registers & ~callee_saves;
}
}
size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const {
return POPCOUNT(GetSlowPathSpills(locations, core_registers));
}
size_t GetStackOffsetOfShouldDeoptimizeFlag() const {
DCHECK(GetGraph()->HasShouldDeoptimizeFlag());
DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize);
return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize;
}
// Record native to dex mapping for a suspend point. Required by runtime.
void RecordPcInfo(HInstruction* instruction,
uint32_t dex_pc,
uint32_t native_pc,
SlowPathCode* slow_path = nullptr,
bool native_debug_info = false);
// Record native to dex mapping for a suspend point.
// The native_pc is used from Assembler::CodePosition.
//
// Note: As Assembler::CodePosition is target dependent, it does not guarantee the exact native_pc
// for the instruction. If the exact native_pc is required it must be provided explicitly.
void RecordPcInfo(HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path = nullptr,
bool native_debug_info = false);
// Check whether we have already recorded mapping at this PC.
bool HasStackMapAtCurrentPc();
// Record extra stack maps if we support native debugging.
//
// ARM specific behaviour: The recorded native PC might be a branch over pools to instructions
// corresponding the dex PC.
void MaybeRecordNativeDebugInfo(HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path = nullptr);
bool CanMoveNullCheckToUser(HNullCheck* null_check);
virtual void MaybeRecordImplicitNullCheck(HInstruction* instruction);
LocationSummary* CreateThrowingSlowPathLocations(
HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty());
void GenerateNullCheck(HNullCheck* null_check);
virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;
// Records a stack map which the runtime might use to set catch phi values
// during exception delivery.
// TODO: Replace with a catch-entering instruction that records the environment.
void RecordCatchBlockInfo();
const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }
bool EmitReadBarrier() const;
bool EmitBakerReadBarrier() const;
bool EmitNonBakerReadBarrier() const;
ReadBarrierOption GetCompilerReadBarrierOption() const;
// Returns true if we should check the GC card for consistency purposes.
bool ShouldCheckGCCard(DataType::Type type,
HInstruction* value,
WriteBarrierKind write_barrier_kind) const;
// Get the ScopedArenaAllocator used for codegen memory allocation.
ScopedArenaAllocator* GetScopedAllocator();
void AddSlowPath(SlowPathCode* slow_path);
ScopedArenaVector<uint8_t> BuildStackMaps(const dex::CodeItem* code_item_for_osr_check);
size_t GetNumberOfJitRoots() const;
// Fills the `literals` array with literals collected during code generation.
// Also emits literal patches.
void EmitJitRoots(uint8_t* code,
const uint8_t* roots_data,
/*out*/std::vector<Handle<mirror::Object>>* roots)
REQUIRES_SHARED(Locks::mutator_lock_);
bool IsLeafMethod() const {
return is_leaf_;
}
void MarkNotLeaf() {
is_leaf_ = false;
requires_current_method_ = true;
}
bool NeedsSuspendCheckEntry() const {
return needs_suspend_check_entry_;
}
void MarkNeedsSuspendCheckEntry() {
needs_suspend_check_entry_ = true;
}
void SetRequiresCurrentMethod() {
requires_current_method_ = true;
}
bool RequiresCurrentMethod() const {
return requires_current_method_;
}
// Clears the spill slots taken by loop phis in the `LocationSummary` of the
// suspend check. This is called when the code generator generates code
// for the suspend check at the back edge (instead of where the suspend check
// is, which is the loop entry). At this point, the spill slots for the phis
// have not been written to.
void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check,
HParallelMove* spills) const;
bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }
bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; }
bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; }
// Helper that returns the offset of the array's length field.
// Note: Besides the normal arrays, we also use the HArrayLength for
// accessing the String's `count` field in String intrinsics.
static uint32_t GetArrayLengthOffset(HArrayLength* array_length);
// Helper that returns the offset of the array's data.
// Note: Besides the normal arrays, we also use the HArrayGet for
// accessing the String's `value` field in String intrinsics.
static uint32_t GetArrayDataOffset(HArrayGet* array_get);
void EmitParallelMoves(Location from1,
Location to1,
DataType::Type type1,
Location from2,
Location to2,
DataType::Type type2);
bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) {
// Used only for kExactCheck, kAbstractClassCheck, kClassHierarchyCheck and kArrayObjectCheck.
DCHECK(instance_of->GetTypeCheckKind() == TypeCheckKind::kExactCheck ||
instance_of->GetTypeCheckKind() == TypeCheckKind::kAbstractClassCheck ||
instance_of->GetTypeCheckKind() == TypeCheckKind::kClassHierarchyCheck ||
instance_of->GetTypeCheckKind() == TypeCheckKind::kArrayObjectCheck)
<< instance_of->GetTypeCheckKind();
// If the target class is in the boot image, it's non-moveable and it doesn't matter
// if we compare it with a from-space or to-space reference, the result is the same.
// It's OK to traverse a class hierarchy jumping between from-space and to-space.
return EmitReadBarrier() && !instance_of->GetTargetClass()->IsInBootImage();
}
ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) {
return InstanceOfNeedsReadBarrier(instance_of) ? kWithReadBarrier : kWithoutReadBarrier;
}
bool IsTypeCheckSlowPathFatal(HCheckCast* check_cast) {
switch (check_cast->GetTypeCheckKind()) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck:
case TypeCheckKind::kInterfaceCheck: {
bool needs_read_barrier =
EmitReadBarrier() && !check_cast->GetTargetClass()->IsInBootImage();
// We do not emit read barriers for HCheckCast, so we can get false negatives
// and the slow path shall re-check and simply return if the cast is actually OK.
return !needs_read_barrier;
}
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
return false;
case TypeCheckKind::kBitstringCheck:
return true;
}
LOG(FATAL) << "Unreachable";
UNREACHABLE();
}
LocationSummary::CallKind GetCheckCastCallKind(HCheckCast* check_cast) {
return (IsTypeCheckSlowPathFatal(check_cast) && !check_cast->CanThrowIntoCatchBlock())
? LocationSummary::kNoCall // In fact, call on a fatal (non-returning) slow path.
: LocationSummary::kCallOnSlowPath;
}
static bool StoreNeedsWriteBarrier(DataType::Type type, HInstruction* value) {
// Check that null value is not represented as an integer constant.
DCHECK_IMPLIES(type == DataType::Type::kReference, !value->IsIntConstant());
return type == DataType::Type::kReference && !value->IsNullConstant();
}
// If we are compiling a graph with the WBE pass enabled, we want to honor the WriteBarrierKind
// set during the WBE pass.
bool StoreNeedsWriteBarrier(DataType::Type type,
HInstruction* value,
WriteBarrierKind write_barrier_kind) const;
// Performs checks pertaining to an InvokeRuntime call.
void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
SlowPathCode* slow_path);
// Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
SlowPathCode* slow_path);
void AddAllocatedRegister(Location location) {
allocated_registers_.Add(location);
}
bool HasAllocatedRegister(bool is_core, int reg) const {
return is_core
? allocated_registers_.ContainsCoreRegister(reg)
: allocated_registers_.ContainsFloatingPointRegister(reg);
}
void AllocateLocations(HInstruction* instruction);
// Tells whether the stack frame of the compiled method is
// considered "empty", that is either actually having a size of zero,
// or just containing the saved return address register.
bool HasEmptyFrame() const {
return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
}
static int8_t GetInt8ValueOf(HConstant* constant) {
DCHECK(constant->IsIntConstant());
return constant->AsIntConstant()->GetValue();
}
static int16_t GetInt16ValueOf(HConstant* constant) {
DCHECK(constant->IsIntConstant());
return constant->AsIntConstant()->GetValue();
}
static int32_t GetInt32ValueOf(HConstant* constant) {
if (constant->IsIntConstant()) {
return constant->AsIntConstant()->GetValue();
} else if (constant->IsNullConstant()) {
return 0;
} else {
DCHECK(constant->IsFloatConstant());
return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
}
}
static int64_t GetInt64ValueOf(HConstant* constant) {
if (constant->IsIntConstant()) {
return constant->AsIntConstant()->GetValue();
} else if (constant->IsNullConstant()) {
return 0;
} else if (constant->IsFloatConstant()) {
return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
} else if (constant->IsLongConstant()) {
return constant->AsLongConstant()->GetValue();
} else {
DCHECK(constant->IsDoubleConstant());
return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
}
}
size_t GetFirstRegisterSlotInSlowPath() const {
return first_register_slot_in_slow_path_;
}
uint32_t FrameEntrySpillSize() const {
return GetFpuSpillSize() + GetCoreSpillSize();
}
virtual ParallelMoveResolver* GetMoveResolver() = 0;
static void CreateCommonInvokeLocationSummary(
HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
template <typename CriticalNativeCallingConventionVisitor,
size_t kNativeStackAlignment,
size_t GetCriticalNativeDirectCallFrameSize(const char* shorty, uint32_t shorty_len)>
size_t PrepareCriticalNativeCall(HInvokeStaticOrDirect* invoke) {
DCHECK(!invoke->GetLocations()->Intrinsified());
CriticalNativeCallingConventionVisitor calling_convention_visitor(
/*for_register_allocation=*/ false);
HParallelMove parallel_move(GetGraph()->GetAllocator());
PrepareCriticalNativeArgumentMoves(invoke, &calling_convention_visitor, &parallel_move);
size_t out_frame_size =
RoundUp(calling_convention_visitor.GetStackOffset(), kNativeStackAlignment);
if (kIsDebugBuild) {
uint32_t shorty_len;
const char* shorty = GetCriticalNativeShorty(invoke, &shorty_len);
CHECK_EQ(GetCriticalNativeDirectCallFrameSize(shorty, shorty_len), out_frame_size);
}
if (out_frame_size != 0u) {
FinishCriticalNativeFrameSetup(out_frame_size, &parallel_move);
}
return out_frame_size;
}
void GenerateInvokeStaticOrDirectRuntimeCall(
HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path);
void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke, SlowPathCode* slow_path = nullptr);
void GenerateInvokeCustomCall(HInvokeCustom* invoke);
void CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction, Location out);
void CreateUnresolvedFieldLocationSummary(
HInstruction* field_access,
DataType::Type field_type,
const FieldAccessCallingConvention& calling_convention);
void GenerateUnresolvedFieldAccess(
HInstruction* field_access,
DataType::Type field_type,
uint32_t field_index,
uint32_t dex_pc,
const FieldAccessCallingConvention& calling_convention);
static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
Location runtime_type_index_location,
Location runtime_return_location);
void GenerateLoadClassRuntimeCall(HLoadClass* cls);
static void CreateLoadMethodHandleRuntimeCallLocationSummary(HLoadMethodHandle* method_handle,
Location runtime_handle_index_location,
Location runtime_return_location);
void GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle);
static void CreateLoadMethodTypeRuntimeCallLocationSummary(HLoadMethodType* method_type,
Location runtime_type_index_location,
Location runtime_return_location);
void GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type);
static uint32_t GetBootImageOffset(ObjPtr<mirror::Object> object)
REQUIRES_SHARED(Locks::mutator_lock_);
static uint32_t GetBootImageOffset(HLoadClass* load_class);
static uint32_t GetBootImageOffset(HLoadString* load_string);
static uint32_t GetBootImageOffset(HInvoke* invoke);
static uint32_t GetBootImageOffset(ClassRoot class_root);
static uint32_t GetBootImageOffsetOfIntrinsicDeclaringClass(HInvoke* invoke);
static LocationSummary* CreateSystemArrayCopyLocationSummary(
HInvoke* invoke, int32_t length_threshold = -1, size_t num_temps = 3);
void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
SlowPathCode* slow_path = nullptr) = 0;
// Check if the desired_string_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
virtual HLoadString::LoadKind GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) = 0;
// Check if the desired_class_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
virtual HLoadClass::LoadKind GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) = 0;
LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) {
switch (load->GetLoadKind()) {
case HLoadString::LoadKind::kBssEntry:
DCHECK(load->NeedsEnvironment());
return LocationSummary::kCallOnSlowPath;
case HLoadString::LoadKind::kRuntimeCall:
DCHECK(load->NeedsEnvironment());
return LocationSummary::kCallOnMainOnly;
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(!load->NeedsEnvironment());
return EmitReadBarrier()
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
break;
default:
DCHECK(!load->NeedsEnvironment());
return LocationSummary::kNoCall;
}
}
// Check if the desired_dispatch_info is supported. If it is, return it,
// otherwise return a fall-back info that should be used instead.
virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
ArtMethod* method) = 0;
// Generate a call to a static or direct method.
virtual void GenerateStaticOrDirectCall(
HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;
// Generate a call to a virtual method.
virtual void GenerateVirtualCall(
HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;
// Copy the result of a call into the given target.
virtual void MoveFromReturnRegister(Location trg, DataType::Type type) = 0;
virtual void IncreaseFrame(size_t adjustment) = 0;
virtual void DecreaseFrame(size_t adjustment) = 0;
virtual void GenerateNop() = 0;
static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array);
static ScaleFactor ScaleFactorForType(DataType::Type type);
ArrayRef<const uint8_t> GetCode() const {
return ArrayRef<const uint8_t>(GetAssembler().CodeBufferBaseAddress(),
GetAssembler().CodeSize());
}
protected:
// Patch info used for recording locations of required linker patches and their targets,
// i.e. target method, string, type or code identified by their dex file and index,
// or .data.bimg.rel.ro entries identified by the boot image offset.
template <typename LabelType>
struct PatchInfo {
PatchInfo(const DexFile* dex_file, uint32_t off_or_idx)
: target_dex_file(dex_file), offset_or_index(off_or_idx), label() { }
// Target dex file or null for .data.bmig.rel.ro patches.
const DexFile* target_dex_file;
// Either the boot image offset (to write to .data.bmig.rel.ro) or string/type/method index.
uint32_t offset_or_index;
// Label for the instruction to patch.
LabelType label;
};
CodeGenerator(HGraph* graph,
size_t number_of_core_registers,
size_t number_of_fpu_registers,
size_t number_of_register_pairs,
uint32_t core_callee_save_mask,
uint32_t fpu_callee_save_mask,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats,
const art::ArrayRef<const bool>& unimplemented_intrinsics);
virtual HGraphVisitor* GetLocationBuilder() = 0;
virtual HGraphVisitor* GetInstructionVisitor() = 0;
template <typename RegType>
static uint32_t ComputeRegisterMask(const RegType* registers, size_t length) {
uint32_t mask = 0;
for (size_t i = 0, e = length; i < e; ++i) {
mask |= (1 << registers[i]);
}
return mask;
}
// Returns the location of the first spilled entry for floating point registers,
// relative to the stack pointer.
uint32_t GetFpuSpillStart() const {
return GetFrameSize() - FrameEntrySpillSize();
}
uint32_t GetFpuSpillSize() const {
return POPCOUNT(fpu_spill_mask_) * GetCalleePreservedFPWidth();
}
uint32_t GetCoreSpillSize() const {
return POPCOUNT(core_spill_mask_) * GetWordSize();
}
virtual bool HasAllocatedCalleeSaveRegisters() const {
// We check the core registers against 1 because it always comprises the return PC.
return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
|| (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
}
bool CallPushesPC() const {
InstructionSet instruction_set = GetInstructionSet();
return instruction_set == InstructionSet::kX86 || instruction_set == InstructionSet::kX86_64;
}
// Arm64 has its own type for a label, so we need to templatize these methods
// to share the logic.
template <typename LabelType>
LabelType* CommonInitializeLabels() {
// We use raw array allocations instead of ArenaVector<> because Labels are
// non-constructible and non-movable and as such cannot be held in a vector.
size_t size = GetGraph()->GetBlocks().size();
LabelType* labels =
GetGraph()->GetAllocator()->AllocArray<LabelType>(size, kArenaAllocCodeGenerator);
for (size_t i = 0; i != size; ++i) {
new(labels + i) LabelType();
}
return labels;
}
template <typename LabelType>
LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
block = FirstNonEmptyBlock(block);
return raw_pointer_to_labels_array + block->GetBlockId();
}
SlowPathCode* GetCurrentSlowPath() {
return current_slow_path_;
}
StackMapStream* GetStackMapStream();
CodeGenerationData* GetCodeGenerationData() {
return code_generation_data_.get();
}
void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string);
uint64_t GetJitStringRootIndex(StringReference string_reference);
void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass);
uint64_t GetJitClassRootIndex(TypeReference type_reference);
// Emit the patches assocatied with JIT roots. Only applies to JIT compiled code.
virtual void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data);
// Frame size required for this method.
uint32_t frame_size_;
uint32_t core_spill_mask_;
uint32_t fpu_spill_mask_;
uint32_t first_register_slot_in_slow_path_;
// Registers that were allocated during linear scan.
RegisterSet allocated_registers_;
// Arrays used when doing register allocation to know which
// registers we can allocate. `SetupBlockedRegisters` updates the
// arrays.
bool* const blocked_core_registers_;
bool* const blocked_fpu_registers_;
size_t number_of_core_registers_;
size_t number_of_fpu_registers_;
size_t number_of_register_pairs_;
const uint32_t core_callee_save_mask_;
const uint32_t fpu_callee_save_mask_;
// The order to use for code generation.
const ArenaVector<HBasicBlock*>* block_order_;
DisassemblyInformation* disasm_info_;
private:
void InitializeCodeGenerationData();
size_t GetStackOffsetOfSavedRegister(size_t index);
void GenerateSlowPaths();
void BlockIfInRegister(Location location, bool is_out = false) const;
void EmitEnvironment(HEnvironment* environment,
SlowPathCode* slow_path,
bool needs_vreg_info = true,
bool is_for_catch_handler = false,
bool innermost_environment = true);
void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path, bool is_for_catch_handler);
void EmitVRegInfoOnlyCatchPhis(HEnvironment* environment);
static void PrepareCriticalNativeArgumentMoves(
HInvokeStaticOrDirect* invoke,
/*inout*/InvokeDexCallingConventionVisitor* visitor,
/*out*/HParallelMove* parallel_move);
void FinishCriticalNativeFrameSetup(size_t out_frame_size, /*inout*/HParallelMove* parallel_move);
static const char* GetCriticalNativeShorty(HInvokeStaticOrDirect* invoke, uint32_t* shorty_len);
OptimizingCompilerStats* stats_;
HGraph* const graph_;
const CompilerOptions& compiler_options_;
// The current slow-path that we're generating code for.
SlowPathCode* current_slow_path_;
// The current block index in `block_order_` of the block
// we are generating code for.
size_t current_block_index_;
// Whether the method is a leaf method.
bool is_leaf_;
// Whether the method has to emit a SuspendCheck at entry.
bool needs_suspend_check_entry_;
// Whether an instruction in the graph accesses the current method.
// TODO: Rename: this actually indicates that some instruction in the method
// needs the environment including a valid stack frame.
bool requires_current_method_;
// The CodeGenerationData contains a ScopedArenaAllocator intended for reusing the
// ArenaStack memory allocated in previous passes instead of adding to the memory
// held by the ArenaAllocator. This ScopedArenaAllocator is created in
// CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed.
std::unique_ptr<CodeGenerationData> code_generation_data_;
// Which intrinsics we don't have handcrafted code for.
art::ArrayRef<const bool> unimplemented_intrinsics_;
friend class OptimizingCFITest;
ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD);
ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD);
DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
};
template <typename C, typename F>
class CallingConvention {
public:
CallingConvention(const C* registers,
size_t number_of_registers,
const F* fpu_registers,
size_t number_of_fpu_registers,
PointerSize pointer_size)
: registers_(registers),
number_of_registers_(number_of_registers),
fpu_registers_(fpu_registers),
number_of_fpu_registers_(number_of_fpu_registers),
pointer_size_(pointer_size) {}
size_t GetNumberOfRegisters() const { return number_of_registers_; }
size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
C GetRegisterAt(size_t index) const {
DCHECK_LT(index, number_of_registers_);
return registers_[index];
}
F GetFpuRegisterAt(size_t index) const {
DCHECK_LT(index, number_of_fpu_registers_);
return fpu_registers_[index];
}
size_t GetStackOffsetOf(size_t index) const {
// We still reserve the space for parameters passed by registers.
// Add space for the method pointer.
return static_cast<size_t>(pointer_size_) + index * kVRegSize;
}
private:
const C* registers_;
const size_t number_of_registers_;
const F* fpu_registers_;
const size_t number_of_fpu_registers_;
const PointerSize pointer_size_;
DISALLOW_COPY_AND_ASSIGN(CallingConvention);
};
/**
* A templated class SlowPathGenerator with a templated method NewSlowPath()
* that can be used by any code generator to share equivalent slow-paths with
* the objective of reducing generated code size.
*
* InstructionType: instruction that requires SlowPathCodeType
* SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
*/
template <typename InstructionType>
class SlowPathGenerator {
static_assert(std::is_base_of<HInstruction, InstructionType>::value,
"InstructionType is not a subclass of art::HInstruction");
public:
SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
: graph_(graph),
codegen_(codegen),
slow_path_map_(std::less<uint32_t>(),
graph->GetAllocator()->Adapter(kArenaAllocSlowPaths)) {}
// Creates and adds a new slow-path, if needed, or returns existing one otherwise.
// Templating the method (rather than the whole class) on the slow-path type enables
// keeping this code at a generic, non architecture-specific place.
//
// NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
// To relax this requirement, we would need some RTTI on the stored slow-paths,
// or template the class as a whole on SlowPathType.
template <typename SlowPathCodeType>
SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
"SlowPathCodeType is not a subclass of art::SlowPathCode");
static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
"SlowPathCodeType is not constructible from InstructionType*");
// Iterate over potential candidates for sharing. Currently, only same-typed
// slow-paths with exactly the same dex-pc are viable candidates.
// TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
const uint32_t dex_pc = instruction->GetDexPc();
auto iter = slow_path_map_.find(dex_pc);
if (iter != slow_path_map_.end()) {
const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second;
for (const auto& it : candidates) {
InstructionType* other_instruction = it.first;
SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
// Determine if the instructions allow for slow-path sharing.
if (HaveSameLiveRegisters(instruction, other_instruction) &&
HaveSameStackMap(instruction, other_instruction)) {
// Can share: reuse existing one.
return other_slow_path;
}
}
} else {
// First time this dex-pc is seen.
iter = slow_path_map_.Put(dex_pc,
{{}, {graph_->GetAllocator()->Adapter(kArenaAllocSlowPaths)}});
}
// Cannot share: create and add new slow-path for this particular dex-pc.
SlowPathCodeType* slow_path =
new (codegen_->GetScopedAllocator()) SlowPathCodeType(instruction);
iter->second.emplace_back(std::make_pair(instruction, slow_path));
codegen_->AddSlowPath(slow_path);
return slow_path;
}
private:
// Tests if both instructions have same set of live physical registers. This ensures
// the slow-path has exactly the same preamble on saving these registers to stack.
bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
return (((live1->GetCoreRegisters() & core_spill) ==
(live2->GetCoreRegisters() & core_spill)) &&
((live1->GetFloatingPointRegisters() & fpu_spill) ==
(live2->GetFloatingPointRegisters() & fpu_spill)));
}
// Tests if both instructions have the same stack map. This ensures the interpreter
// will find exactly the same dex-registers at the same entries.
bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
DCHECK(i1->HasEnvironment());
DCHECK(i2->HasEnvironment());
// We conservatively test if the two instructions find exactly the same instructions
// and location in each dex-register. This guarantees they will have the same stack map.
HEnvironment* e1 = i1->GetEnvironment();
HEnvironment* e2 = i2->GetEnvironment();
if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
return false;
}
for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
!e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
return false;
}
}
return true;
}
HGraph* const graph_;
CodeGenerator* const codegen_;
// Map from dex-pc to vector of already existing instruction/slow-path pairs.
ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;
DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
};
class InstructionCodeGenerator : public HGraphVisitor {
public:
InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
: HGraphVisitor(graph),
deopt_slow_paths_(graph, codegen) {}
protected:
// Add slow-path generator for each instruction/slow-path combination that desires sharing.
// TODO: under current regime, only deopt sharing make sense; extend later.
SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
};
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_