compiler/optimizing/code_generator.h - LeafOS-Project/android_art - Gitiles

 /*
  * Copyright (C) 2014 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_

 #include "arch/instruction_set.h"
 #include "arch/instruction_set_features.h"
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
 #include "base/array_ref.h"
 #include "base/bit_field.h"
 #include "base/bit_utils.h"
 #include "base/enums.h"
 #include "base/globals.h"
 #include "base/macros.h"
 #include "base/memory_region.h"
 #include "class_root.h"
 #include "dex/string_reference.h"
 #include "dex/type_reference.h"
 #include "graph_visualizer.h"
 #include "locations.h"
 #include "nodes.h"
 #include "oat_quick_method_header.h"
 #include "optimizing_compiler_stats.h"
 #include "read_barrier_option.h"
 #include "stack.h"
 #include "subtype_check.h"
 #include "utils/assembler.h"
 #include "utils/label.h"

 namespace art HIDDEN {

 // Binary encoding of 2^32 for type double.
 static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
 // Binary encoding of 2^31 for type double.
 static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);

 // Minimum value for a primitive integer.
 static int32_t constexpr kPrimIntMin = 0x80000000;
 // Minimum value for a primitive long.
 static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);

 // Maximum value for a primitive integer.
 static int32_t constexpr kPrimIntMax = 0x7fffffff;
 // Maximum value for a primitive long.
 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);

 static const ReadBarrierOption gCompilerReadBarrierOption =
     gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;

 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
 constexpr size_t status_byte_offset =
     mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
 constexpr uint32_t shifted_visibly_initialized_value =
     enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
 constexpr uint32_t shifted_initializing_value =
     enum_cast<uint32_t>(ClassStatus::kInitializing) << (status_lsb_position % kBitsPerByte);
 constexpr uint32_t shifted_initialized_value =
     enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);

 class Assembler;
 class CodeGenerator;
 class CompilerOptions;
 class StackMapStream;
 class ParallelMoveResolver;

 namespace linker {
 class LinkerPatch;
 }  // namespace linker

 class CodeAllocator {
  public:
   CodeAllocator() {}
   virtual ~CodeAllocator() {}

   virtual uint8_t* Allocate(size_t size) = 0;
   virtual ArrayRef<const uint8_t> GetMemory() const = 0;

  private:
   DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
 };

 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
  public:
   explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
     for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
       saved_core_stack_offsets_[i] = kRegisterNotSaved;
       saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
     }
   }

   virtual ~SlowPathCode() {}

   virtual void EmitNativeCode(CodeGenerator* codegen) = 0;

   // Save live core and floating-point caller-save registers and
   // update the stack mask in `locations` for registers holding object
   // references.
   virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
   // Restore live core and floating-point caller-save registers.
   virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);

   bool IsCoreRegisterSaved(int reg) const {
     return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
   }

   bool IsFpuRegisterSaved(int reg) const {
     return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
   }

   uint32_t GetStackOffsetOfCoreRegister(int reg) const {
     return saved_core_stack_offsets_[reg];
   }

   uint32_t GetStackOffsetOfFpuRegister(int reg) const {
     return saved_fpu_stack_offsets_[reg];
   }

   virtual bool IsFatal() const { return false; }

   virtual const char* GetDescription() const = 0;

   Label* GetEntryLabel() { return &entry_label_; }
   Label* GetExitLabel() { return &exit_label_; }

   HInstruction* GetInstruction() const {
     return instruction_;
   }

   uint32_t GetDexPc() const {
     return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
   }

  protected:
   static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
   static constexpr uint32_t kRegisterNotSaved = -1;
   // The instruction where this slow path is happening.
   HInstruction* instruction_;
   uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
   uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];

  private:
   Label entry_label_;
   Label exit_label_;

   DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
 };

 class InvokeDexCallingConventionVisitor {
  public:
   virtual Location GetNextLocation(DataType::Type type) = 0;
   virtual Location GetReturnLocation(DataType::Type type) const = 0;
   virtual Location GetMethodLocation() const = 0;

  protected:
   InvokeDexCallingConventionVisitor() {}
   virtual ~InvokeDexCallingConventionVisitor() {}

   // The current index for core registers.
   uint32_t gp_index_ = 0u;
   // The current index for floating-point registers.
   uint32_t float_index_ = 0u;
   // The current stack index.
   uint32_t stack_index_ = 0u;

  private:
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
 };

 class FieldAccessCallingConvention {
  public:
   virtual Location GetObjectLocation() const = 0;
   virtual Location GetFieldIndexLocation() const = 0;
   virtual Location GetReturnLocation(DataType::Type type) const = 0;
   virtual Location GetSetValueLocation(DataType::Type type, bool is_instance) const = 0;
   virtual Location GetFpuLocation(DataType::Type type) const = 0;
   virtual ~FieldAccessCallingConvention() {}

  protected:
   FieldAccessCallingConvention() {}

  private:
   DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
 };

 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
  public:
   // Compiles the graph to executable instructions.
   void Compile(CodeAllocator* allocator);
   static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
                                                const CompilerOptions& compiler_options,
                                                OptimizingCompilerStats* stats = nullptr);
   virtual ~CodeGenerator();

   // Get the graph. This is the outermost graph, never the graph of a method being inlined.
   HGraph* GetGraph() const { return graph_; }

   HBasicBlock* GetNextBlockToEmit() const;
   HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
   bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;

   size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
     // Note that this follows the current calling convention.
     return GetFrameSize()
         + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet()))  // Art method
         + parameter->GetIndex() * kVRegSize;
   }

   virtual void Initialize() = 0;
   virtual void Finalize(CodeAllocator* allocator);
   virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches);
   virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const;
   virtual void EmitThunkCode(const linker::LinkerPatch& patch,
                              /*out*/ ArenaVector<uint8_t>* code,
                              /*out*/ std::string* debug_name);
   virtual void GenerateFrameEntry() = 0;
   virtual void GenerateFrameExit() = 0;
   virtual void Bind(HBasicBlock* block) = 0;
   virtual void MoveConstant(Location destination, int32_t value) = 0;
   virtual void MoveLocation(Location dst, Location src, DataType::Type dst_type) = 0;
   virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;

   virtual Assembler* GetAssembler() = 0;
   virtual const Assembler& GetAssembler() const = 0;
   virtual size_t GetWordSize() const = 0;

   // Returns whether the target supports predicated SIMD instructions.
   virtual bool SupportsPredicatedSIMD() const { return false; }

   // Get FP register width in bytes for spilling/restoring in the slow paths.
   //
   // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers
   // alias and live SIMD registers are forced to be spilled in full size in the slow paths.
   virtual size_t GetSlowPathFPWidth() const {
     // Default implementation.
     return GetCalleePreservedFPWidth();
   }

   // Get FP register width required to be preserved by the target ABI.
   virtual size_t GetCalleePreservedFPWidth() const  = 0;

   // Get the size of the target SIMD register in bytes.
   virtual size_t GetSIMDRegisterWidth() const = 0;
   virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
   void InitializeCodeGeneration(size_t number_of_spill_slots,
                                 size_t maximum_safepoint_spill_size,
                                 size_t number_of_out_slots,
                                 const ArenaVector<HBasicBlock*>& block_order);
   // Backends can override this as necessary. For most, no special alignment is required.
   virtual uint32_t GetPreferredSlotsAlignment() const { return 1; }

   uint32_t GetFrameSize() const { return frame_size_; }
   void SetFrameSize(uint32_t size) { frame_size_ = size; }
   uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
   uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }

   size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
   size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
   virtual void SetupBlockedRegisters() const = 0;

   virtual void ComputeSpillMask() {
     core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
     DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
     fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
   }

   static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
     uint32_t mask = 0;
     for (size_t i = 0, e = length; i < e; ++i) {
       mask |= (1 << registers[i]);
     }
     return mask;
   }

   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
   virtual InstructionSet GetInstructionSet() const = 0;

   const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }

   // Saves the register in the stack. Returns the size taken on stack.
   virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
   // Restores the register from the stack. Returns the size taken on stack.
   virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;

   virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
   virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;

   virtual bool NeedsTwoRegisters(DataType::Type type) const = 0;
   // Returns whether we should split long moves in parallel moves.
   virtual bool ShouldSplitLongMoves() const { return false; }

   // Returns true if `invoke` is an implemented intrinsic in this codegen's arch.
   bool IsImplementedIntrinsic(HInvoke* invoke) const {
     return invoke->IsIntrinsic() &&
            !unimplemented_intrinsics_[static_cast<size_t>(invoke->GetIntrinsic())];
   }

   size_t GetNumberOfCoreCalleeSaveRegisters() const {
     return POPCOUNT(core_callee_save_mask_);
   }

   size_t GetNumberOfCoreCallerSaveRegisters() const {
     DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
     return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
   }

   bool IsCoreCalleeSaveRegister(int reg) const {
     return (core_callee_save_mask_ & (1 << reg)) != 0;
   }

   bool IsFloatingPointCalleeSaveRegister(int reg) const {
     return (fpu_callee_save_mask_ & (1 << reg)) != 0;
   }

   uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const {
     DCHECK(locations->OnlyCallsOnSlowPath() ||
            (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() &&
                !locations->HasCustomSlowPathCallingConvention()));
     uint32_t live_registers = core_registers
         ? locations->GetLiveRegisters()->GetCoreRegisters()
         : locations->GetLiveRegisters()->GetFloatingPointRegisters();
     if (locations->HasCustomSlowPathCallingConvention()) {
       // Save only the live registers that the custom calling convention wants us to save.
       uint32_t caller_saves = core_registers
           ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters()
           : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters();
       return live_registers & caller_saves;
     } else {
       // Default ABI, we need to spill non-callee-save live registers.
       uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_;
       return live_registers & ~callee_saves;
     }
   }

   size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const {
     return POPCOUNT(GetSlowPathSpills(locations, core_registers));
   }

   size_t GetStackOffsetOfShouldDeoptimizeFlag() const {
     DCHECK(GetGraph()->HasShouldDeoptimizeFlag());
     DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize);
     return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize;
   }

   // Record native to dex mapping for a suspend point. Required by runtime.
   void RecordPcInfo(HInstruction* instruction,
                     uint32_t dex_pc,
                     uint32_t native_pc,
                     SlowPathCode* slow_path = nullptr,
                     bool native_debug_info = false);

   // Record native to dex mapping for a suspend point.
   // The native_pc is used from Assembler::CodePosition.
   //
   // Note: As Assembler::CodePosition is target dependent, it does not guarantee the exact native_pc
   // for the instruction. If the exact native_pc is required it must be provided explicitly.
   void RecordPcInfo(HInstruction* instruction,
                     uint32_t dex_pc,
                     SlowPathCode* slow_path = nullptr,
                     bool native_debug_info = false);

   // Check whether we have already recorded mapping at this PC.
   bool HasStackMapAtCurrentPc();

   // Record extra stack maps if we support native debugging.
   //
   // ARM specific behaviour: The recorded native PC might be a branch over pools to instructions
   // corresponding the dex PC.
   void MaybeRecordNativeDebugInfo(HInstruction* instruction,
                                   uint32_t dex_pc,
                                   SlowPathCode* slow_path = nullptr);

   bool CanMoveNullCheckToUser(HNullCheck* null_check);
   virtual void MaybeRecordImplicitNullCheck(HInstruction* instruction);
   LocationSummary* CreateThrowingSlowPathLocations(
       HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty());
   void GenerateNullCheck(HNullCheck* null_check);
   virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
   virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;

   // Records a stack map which the runtime might use to set catch phi values
   // during exception delivery.
   // TODO: Replace with a catch-entering instruction that records the environment.
   void RecordCatchBlockInfo();

   // Get the ScopedArenaAllocator used for codegen memory allocation.
   ScopedArenaAllocator* GetScopedAllocator();

   void AddSlowPath(SlowPathCode* slow_path);

   ScopedArenaVector<uint8_t> BuildStackMaps(const dex::CodeItem* code_item_for_osr_check);
   size_t GetNumberOfJitRoots() const;

   // Fills the `literals` array with literals collected during code generation.
   // Also emits literal patches.
   void EmitJitRoots(uint8_t* code,
                     const uint8_t* roots_data,
                     /*out*/std::vector<Handle<mirror::Object>>* roots)
       REQUIRES_SHARED(Locks::mutator_lock_);

   bool IsLeafMethod() const {
     return is_leaf_;
   }

   void MarkNotLeaf() {
     is_leaf_ = false;
     requires_current_method_ = true;
   }

   bool NeedsSuspendCheckEntry() const {
     return needs_suspend_check_entry_;
   }

   void MarkNeedsSuspendCheckEntry() {
     needs_suspend_check_entry_ = true;
   }

   void SetRequiresCurrentMethod() {
     requires_current_method_ = true;
   }

   bool RequiresCurrentMethod() const {
     return requires_current_method_;
   }

   // Clears the spill slots taken by loop phis in the `LocationSummary` of the
   // suspend check. This is called when the code generator generates code
   // for the suspend check at the back edge (instead of where the suspend check
   // is, which is the loop entry). At this point, the spill slots for the phis
   // have not been written to.
   void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check,
                                              HParallelMove* spills) const;

   bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
   bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }

   bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; }
   bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; }

   // Helper that returns the offset of the array's length field.
   // Note: Besides the normal arrays, we also use the HArrayLength for
   // accessing the String's `count` field in String intrinsics.
   static uint32_t GetArrayLengthOffset(HArrayLength* array_length);

   // Helper that returns the offset of the array's data.
   // Note: Besides the normal arrays, we also use the HArrayGet for
   // accessing the String's `value` field in String intrinsics.
   static uint32_t GetArrayDataOffset(HArrayGet* array_get);

   void EmitParallelMoves(Location from1,
                          Location to1,
                          DataType::Type type1,
                          Location from2,
                          Location to2,
                          DataType::Type type2);

   static bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) {
     // Used only for kExactCheck, kAbstractClassCheck, kClassHierarchyCheck and kArrayObjectCheck.
     DCHECK(instance_of->GetTypeCheckKind() == TypeCheckKind::kExactCheck ||
            instance_of->GetTypeCheckKind() == TypeCheckKind::kAbstractClassCheck ||
            instance_of->GetTypeCheckKind() == TypeCheckKind::kClassHierarchyCheck ||
            instance_of->GetTypeCheckKind() == TypeCheckKind::kArrayObjectCheck)
         << instance_of->GetTypeCheckKind();
     // If the target class is in the boot image, it's non-moveable and it doesn't matter
     // if we compare it with a from-space or to-space reference, the result is the same.
     // It's OK to traverse a class hierarchy jumping between from-space and to-space.
     return gUseReadBarrier && !instance_of->GetTargetClass()->IsInBootImage();
   }

   static ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) {
     return InstanceOfNeedsReadBarrier(instance_of) ? kWithReadBarrier : kWithoutReadBarrier;
   }

   static bool IsTypeCheckSlowPathFatal(HCheckCast* check_cast) {
     switch (check_cast->GetTypeCheckKind()) {
       case TypeCheckKind::kExactCheck:
       case TypeCheckKind::kAbstractClassCheck:
       case TypeCheckKind::kClassHierarchyCheck:
       case TypeCheckKind::kArrayObjectCheck:
       case TypeCheckKind::kInterfaceCheck: {
         bool needs_read_barrier =
             gUseReadBarrier && !check_cast->GetTargetClass()->IsInBootImage();
         // We do not emit read barriers for HCheckCast, so we can get false negatives
         // and the slow path shall re-check and simply return if the cast is actually OK.
         return !needs_read_barrier;
       }
       case TypeCheckKind::kArrayCheck:
       case TypeCheckKind::kUnresolvedCheck:
         return false;
       case TypeCheckKind::kBitstringCheck:
         return true;
     }
     LOG(FATAL) << "Unreachable";
     UNREACHABLE();
   }

   static LocationSummary::CallKind GetCheckCastCallKind(HCheckCast* check_cast) {
     return (IsTypeCheckSlowPathFatal(check_cast) && !check_cast->CanThrowIntoCatchBlock())
         ? LocationSummary::kNoCall  // In fact, call on a fatal (non-returning) slow path.
         : LocationSummary::kCallOnSlowPath;
   }

   static bool StoreNeedsWriteBarrier(DataType::Type type, HInstruction* value) {
     // Check that null value is not represented as an integer constant.
     DCHECK_IMPLIES(type == DataType::Type::kReference, !value->IsIntConstant());
     return type == DataType::Type::kReference && !value->IsNullConstant();
   }


   // Performs checks pertaining to an InvokeRuntime call.
   void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
                              HInstruction* instruction,
                              SlowPathCode* slow_path);

   // Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
   static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
                                                           SlowPathCode* slow_path);

   void AddAllocatedRegister(Location location) {
     allocated_registers_.Add(location);
   }

   bool HasAllocatedRegister(bool is_core, int reg) const {
     return is_core
         ? allocated_registers_.ContainsCoreRegister(reg)
         : allocated_registers_.ContainsFloatingPointRegister(reg);
   }

   void AllocateLocations(HInstruction* instruction);

   // Tells whether the stack frame of the compiled method is
   // considered "empty", that is either actually having a size of zero,
   // or just containing the saved return address register.
   bool HasEmptyFrame() const {
     return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
   }

   static int8_t GetInt8ValueOf(HConstant* constant) {
     DCHECK(constant->IsIntConstant());
     return constant->AsIntConstant()->GetValue();
   }

   static int16_t GetInt16ValueOf(HConstant* constant) {
     DCHECK(constant->IsIntConstant());
     return constant->AsIntConstant()->GetValue();
   }

   static int32_t GetInt32ValueOf(HConstant* constant) {
     if (constant->IsIntConstant()) {
       return constant->AsIntConstant()->GetValue();
     } else if (constant->IsNullConstant()) {
       return 0;
     } else {
       DCHECK(constant->IsFloatConstant());
       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
     }
   }

   static int64_t GetInt64ValueOf(HConstant* constant) {
     if (constant->IsIntConstant()) {
       return constant->AsIntConstant()->GetValue();
     } else if (constant->IsNullConstant()) {
       return 0;
     } else if (constant->IsFloatConstant()) {
       return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
     } else if (constant->IsLongConstant()) {
       return constant->AsLongConstant()->GetValue();
     } else {
       DCHECK(constant->IsDoubleConstant());
       return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
     }
   }

   size_t GetFirstRegisterSlotInSlowPath() const {
     return first_register_slot_in_slow_path_;
   }

   uint32_t FrameEntrySpillSize() const {
     return GetFpuSpillSize() + GetCoreSpillSize();
   }

   virtual ParallelMoveResolver* GetMoveResolver() = 0;

   static void CreateCommonInvokeLocationSummary(
       HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);

   template <typename CriticalNativeCallingConventionVisitor,
             size_t kNativeStackAlignment,
             size_t GetCriticalNativeDirectCallFrameSize(const char* shorty, uint32_t shorty_len)>
   size_t PrepareCriticalNativeCall(HInvokeStaticOrDirect* invoke) {
       DCHECK(!invoke->GetLocations()->Intrinsified());
       CriticalNativeCallingConventionVisitor calling_convention_visitor(
           /*for_register_allocation=*/ false);
       HParallelMove parallel_move(GetGraph()->GetAllocator());
       PrepareCriticalNativeArgumentMoves(invoke, &calling_convention_visitor, &parallel_move);
       size_t out_frame_size =
           RoundUp(calling_convention_visitor.GetStackOffset(), kNativeStackAlignment);
       if (kIsDebugBuild) {
         uint32_t shorty_len;
         const char* shorty = GetCriticalNativeShorty(invoke, &shorty_len);
         DCHECK_EQ(GetCriticalNativeDirectCallFrameSize(shorty, shorty_len), out_frame_size);
       }
       if (out_frame_size != 0u) {
         FinishCriticalNativeFrameSetup(out_frame_size, &parallel_move);
       }
       return out_frame_size;
   }

   void GenerateInvokeStaticOrDirectRuntimeCall(
       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path);

   void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);

   void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke, SlowPathCode* slow_path = nullptr);

   void GenerateInvokeCustomCall(HInvokeCustom* invoke);

   void CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction, Location out);

   void CreateUnresolvedFieldLocationSummary(
       HInstruction* field_access,
       DataType::Type field_type,
       const FieldAccessCallingConvention& calling_convention);

   void GenerateUnresolvedFieldAccess(
       HInstruction* field_access,
       DataType::Type field_type,
       uint32_t field_index,
       uint32_t dex_pc,
       const FieldAccessCallingConvention& calling_convention);

   static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
                                                         Location runtime_type_index_location,
                                                         Location runtime_return_location);
   void GenerateLoadClassRuntimeCall(HLoadClass* cls);

   static void CreateLoadMethodHandleRuntimeCallLocationSummary(HLoadMethodHandle* method_handle,
                                                              Location runtime_handle_index_location,
                                                              Location runtime_return_location);
   void GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle);

   static void CreateLoadMethodTypeRuntimeCallLocationSummary(HLoadMethodType* method_type,
                                                              Location runtime_type_index_location,
                                                              Location runtime_return_location);
   void GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type);

   static uint32_t GetBootImageOffset(ObjPtr<mirror::Object> object)
       REQUIRES_SHARED(Locks::mutator_lock_);
   static uint32_t GetBootImageOffset(HLoadClass* load_class);
   static uint32_t GetBootImageOffset(HLoadString* load_string);
   static uint32_t GetBootImageOffset(HInvoke* invoke);
   static uint32_t GetBootImageOffset(ClassRoot class_root);
   static uint32_t GetBootImageOffsetOfIntrinsicDeclaringClass(HInvoke* invoke);

   static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);

   void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
   DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }

   virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
                              HInstruction* instruction,
                              uint32_t dex_pc,
                              SlowPathCode* slow_path = nullptr) = 0;

   // Check if the desired_string_load_kind is supported. If it is, return it,
   // otherwise return a fall-back kind that should be used instead.
   virtual HLoadString::LoadKind GetSupportedLoadStringKind(
       HLoadString::LoadKind desired_string_load_kind) = 0;

   // Check if the desired_class_load_kind is supported. If it is, return it,
   // otherwise return a fall-back kind that should be used instead.
   virtual HLoadClass::LoadKind GetSupportedLoadClassKind(
       HLoadClass::LoadKind desired_class_load_kind) = 0;

   static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) {
     switch (load->GetLoadKind()) {
       case HLoadString::LoadKind::kBssEntry:
         DCHECK(load->NeedsEnvironment());
         return LocationSummary::kCallOnSlowPath;
       case HLoadString::LoadKind::kRuntimeCall:
         DCHECK(load->NeedsEnvironment());
         return LocationSummary::kCallOnMainOnly;
       case HLoadString::LoadKind::kJitTableAddress:
         DCHECK(!load->NeedsEnvironment());
         return gUseReadBarrier
             ? LocationSummary::kCallOnSlowPath
             : LocationSummary::kNoCall;
         break;
       default:
         DCHECK(!load->NeedsEnvironment());
         return LocationSummary::kNoCall;
     }
   }

   // Check if the desired_dispatch_info is supported. If it is, return it,
   // otherwise return a fall-back info that should be used instead.
   virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       ArtMethod* method) = 0;

   // Generate a call to a static or direct method.
   virtual void GenerateStaticOrDirectCall(
       HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;
   // Generate a call to a virtual method.
   virtual void GenerateVirtualCall(
       HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;

   // Copy the result of a call into the given target.
   virtual void MoveFromReturnRegister(Location trg, DataType::Type type) = 0;

   virtual void IncreaseFrame(size_t adjustment) = 0;
   virtual void DecreaseFrame(size_t adjustment) = 0;

   virtual void GenerateNop() = 0;

   static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array);
   static ScaleFactor ScaleFactorForType(DataType::Type type);

  protected:
   // Patch info used for recording locations of required linker patches and their targets,
   // i.e. target method, string, type or code identified by their dex file and index,
   // or .data.bimg.rel.ro entries identified by the boot image offset.
   template <typename LabelType>
   struct PatchInfo {
     PatchInfo(const DexFile* dex_file, uint32_t off_or_idx)
         : target_dex_file(dex_file), offset_or_index(off_or_idx), label() { }

     // Target dex file or null for .data.bmig.rel.ro patches.
     const DexFile* target_dex_file;
     // Either the boot image offset (to write to .data.bmig.rel.ro) or string/type/method index.
     uint32_t offset_or_index;
     // Label for the instruction to patch.
     LabelType label;
   };

   CodeGenerator(HGraph* graph,
                 size_t number_of_core_registers,
                 size_t number_of_fpu_registers,
                 size_t number_of_register_pairs,
                 uint32_t core_callee_save_mask,
                 uint32_t fpu_callee_save_mask,
                 const CompilerOptions& compiler_options,
                 OptimizingCompilerStats* stats,
                 const art::ArrayRef<const bool>& unimplemented_intrinsics);

   virtual HGraphVisitor* GetLocationBuilder() = 0;
   virtual HGraphVisitor* GetInstructionVisitor() = 0;

   // Returns the location of the first spilled entry for floating point registers,
   // relative to the stack pointer.
   uint32_t GetFpuSpillStart() const {
     return GetFrameSize() - FrameEntrySpillSize();
   }

   uint32_t GetFpuSpillSize() const {
     return POPCOUNT(fpu_spill_mask_) * GetCalleePreservedFPWidth();
   }

   uint32_t GetCoreSpillSize() const {
     return POPCOUNT(core_spill_mask_) * GetWordSize();
   }

   virtual bool HasAllocatedCalleeSaveRegisters() const {
     // We check the core registers against 1 because it always comprises the return PC.
     return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
       || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
   }

   bool CallPushesPC() const {
     InstructionSet instruction_set = GetInstructionSet();
     return instruction_set == InstructionSet::kX86 || instruction_set == InstructionSet::kX86_64;
   }

   // Arm64 has its own type for a label, so we need to templatize these methods
   // to share the logic.

   template <typename LabelType>
   LabelType* CommonInitializeLabels() {
     // We use raw array allocations instead of ArenaVector<> because Labels are
     // non-constructible and non-movable and as such cannot be held in a vector.
     size_t size = GetGraph()->GetBlocks().size();
     LabelType* labels =
         GetGraph()->GetAllocator()->AllocArray<LabelType>(size, kArenaAllocCodeGenerator);
     for (size_t i = 0; i != size; ++i) {
       new(labels + i) LabelType();
     }
     return labels;
   }

   template <typename LabelType>
   LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
     block = FirstNonEmptyBlock(block);
     return raw_pointer_to_labels_array + block->GetBlockId();
   }

   SlowPathCode* GetCurrentSlowPath() {
     return current_slow_path_;
   }

   StackMapStream* GetStackMapStream();

   void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string);
   uint64_t GetJitStringRootIndex(StringReference string_reference);
   void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass);
   uint64_t GetJitClassRootIndex(TypeReference type_reference);

   // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code.
   virtual void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data);

   // Frame size required for this method.
   uint32_t frame_size_;
   uint32_t core_spill_mask_;
   uint32_t fpu_spill_mask_;
   uint32_t first_register_slot_in_slow_path_;

   // Registers that were allocated during linear scan.
   RegisterSet allocated_registers_;

   // Arrays used when doing register allocation to know which
   // registers we can allocate. `SetupBlockedRegisters` updates the
   // arrays.
   bool* const blocked_core_registers_;
   bool* const blocked_fpu_registers_;
   size_t number_of_core_registers_;
   size_t number_of_fpu_registers_;
   size_t number_of_register_pairs_;
   const uint32_t core_callee_save_mask_;
   const uint32_t fpu_callee_save_mask_;

   // The order to use for code generation.
   const ArenaVector<HBasicBlock*>* block_order_;

   DisassemblyInformation* disasm_info_;

  private:
   class CodeGenerationData;

   void InitializeCodeGenerationData();
   size_t GetStackOffsetOfSavedRegister(size_t index);
   void GenerateSlowPaths();
   void BlockIfInRegister(Location location, bool is_out = false) const;
   void EmitEnvironment(HEnvironment* environment,
                        SlowPathCode* slow_path,
                        bool needs_vreg_info = true,
                        bool is_for_catch_handler = false,
                        bool innermost_environment = true);
   void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path, bool is_for_catch_handler);
   void EmitVRegInfoOnlyCatchPhis(HEnvironment* environment);

   static void PrepareCriticalNativeArgumentMoves(
       HInvokeStaticOrDirect* invoke,
       /*inout*/InvokeDexCallingConventionVisitor* visitor,
       /*out*/HParallelMove* parallel_move);

   void FinishCriticalNativeFrameSetup(size_t out_frame_size, /*inout*/HParallelMove* parallel_move);

   static const char* GetCriticalNativeShorty(HInvokeStaticOrDirect* invoke, uint32_t* shorty_len);

   OptimizingCompilerStats* stats_;

   HGraph* const graph_;
   const CompilerOptions& compiler_options_;

   // The current slow-path that we're generating code for.
   SlowPathCode* current_slow_path_;

   // The current block index in `block_order_` of the block
   // we are generating code for.
   size_t current_block_index_;

   // Whether the method is a leaf method.
   bool is_leaf_;

   // Whether the method has to emit a SuspendCheck at entry.
   bool needs_suspend_check_entry_;

   // Whether an instruction in the graph accesses the current method.
   // TODO: Rename: this actually indicates that some instruction in the method
   // needs the environment including a valid stack frame.
   bool requires_current_method_;

   // The CodeGenerationData contains a ScopedArenaAllocator intended for reusing the
   // ArenaStack memory allocated in previous passes instead of adding to the memory
   // held by the ArenaAllocator. This ScopedArenaAllocator is created in
   // CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed.
   std::unique_ptr<CodeGenerationData> code_generation_data_;

   // Which intrinsics we don't have handcrafted code for.
   art::ArrayRef<const bool> unimplemented_intrinsics_;

   friend class OptimizingCFITest;
   ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD);
   ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD);

   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
 };

 template <typename C, typename F>
 class CallingConvention {
  public:
   CallingConvention(const C* registers,
                     size_t number_of_registers,
                     const F* fpu_registers,
                     size_t number_of_fpu_registers,
                     PointerSize pointer_size)
       : registers_(registers),
         number_of_registers_(number_of_registers),
         fpu_registers_(fpu_registers),
         number_of_fpu_registers_(number_of_fpu_registers),
         pointer_size_(pointer_size) {}

   size_t GetNumberOfRegisters() const { return number_of_registers_; }
   size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }

   C GetRegisterAt(size_t index) const {
     DCHECK_LT(index, number_of_registers_);
     return registers_[index];
   }

   F GetFpuRegisterAt(size_t index) const {
     DCHECK_LT(index, number_of_fpu_registers_);
     return fpu_registers_[index];
   }

   size_t GetStackOffsetOf(size_t index) const {
     // We still reserve the space for parameters passed by registers.
     // Add space for the method pointer.
     return static_cast<size_t>(pointer_size_) + index * kVRegSize;
   }

  private:
   const C* registers_;
   const size_t number_of_registers_;
   const F* fpu_registers_;
   const size_t number_of_fpu_registers_;
   const PointerSize pointer_size_;

   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
 };

 /**
  * A templated class SlowPathGenerator with a templated method NewSlowPath()
  * that can be used by any code generator to share equivalent slow-paths with
  * the objective of reducing generated code size.
  *
  * InstructionType:  instruction that requires SlowPathCodeType
  * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
  */
 template <typename InstructionType>
 class SlowPathGenerator {
   static_assert(std::is_base_of<HInstruction, InstructionType>::value,
                 "InstructionType is not a subclass of art::HInstruction");

  public:
   SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
       : graph_(graph),
         codegen_(codegen),
         slow_path_map_(std::less<uint32_t>(),
                        graph->GetAllocator()->Adapter(kArenaAllocSlowPaths)) {}

   // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
   // Templating the method (rather than the whole class) on the slow-path type enables
   // keeping this code at a generic, non architecture-specific place.
   //
   // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
   //       To relax this requirement, we would need some RTTI on the stored slow-paths,
   //       or template the class as a whole on SlowPathType.
   template <typename SlowPathCodeType>
   SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
     static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
                   "SlowPathCodeType is not a subclass of art::SlowPathCode");
     static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
                   "SlowPathCodeType is not constructible from InstructionType*");
     // Iterate over potential candidates for sharing. Currently, only same-typed
     // slow-paths with exactly the same dex-pc are viable candidates.
     // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
     const uint32_t dex_pc = instruction->GetDexPc();
     auto iter = slow_path_map_.find(dex_pc);
     if (iter != slow_path_map_.end()) {
       const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second;
       for (const auto& it : candidates) {
         InstructionType* other_instruction = it.first;
         SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
         // Determine if the instructions allow for slow-path sharing.
         if (HaveSameLiveRegisters(instruction, other_instruction) &&
             HaveSameStackMap(instruction, other_instruction)) {
           // Can share: reuse existing one.
           return other_slow_path;
         }
       }
     } else {
       // First time this dex-pc is seen.
       iter = slow_path_map_.Put(dex_pc,
                                 {{}, {graph_->GetAllocator()->Adapter(kArenaAllocSlowPaths)}});
     }
     // Cannot share: create and add new slow-path for this particular dex-pc.
     SlowPathCodeType* slow_path =
         new (codegen_->GetScopedAllocator()) SlowPathCodeType(instruction);
     iter->second.emplace_back(std::make_pair(instruction, slow_path));
     codegen_->AddSlowPath(slow_path);
     return slow_path;
   }

  private:
   // Tests if both instructions have same set of live physical registers. This ensures
   // the slow-path has exactly the same preamble on saving these registers to stack.
   bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
     const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
     const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
     RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
     RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
     return (((live1->GetCoreRegisters() & core_spill) ==
              (live2->GetCoreRegisters() & core_spill)) &&
             ((live1->GetFloatingPointRegisters() & fpu_spill) ==
              (live2->GetFloatingPointRegisters() & fpu_spill)));
   }

   // Tests if both instructions have the same stack map. This ensures the interpreter
   // will find exactly the same dex-registers at the same entries.
   bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
     DCHECK(i1->HasEnvironment());
     DCHECK(i2->HasEnvironment());
     // We conservatively test if the two instructions find exactly the same instructions
     // and location in each dex-register. This guarantees they will have the same stack map.
     HEnvironment* e1 = i1->GetEnvironment();
     HEnvironment* e2 = i2->GetEnvironment();
     if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
       return false;
     }
     for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
       if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
           !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
         return false;
       }
     }
     return true;
   }

   HGraph* const graph_;
   CodeGenerator* const codegen_;

   // Map from dex-pc to vector of already existing instruction/slow-path pairs.
   ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;

   DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
 };

 class InstructionCodeGenerator : public HGraphVisitor {
  public:
   InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
       : HGraphVisitor(graph),
         deopt_slow_paths_(graph, codegen) {}

  protected:
   // Add slow-path generator for each instruction/slow-path combination that desires sharing.
   // TODO: under current regime, only deopt sharing make sense; extend later.
   SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
 };

 }  // namespace art

 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_