compiler/optimizing/scheduler_arm64.h - LeafOS-Project/android_art - Gitiles

 /*
  * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_

 #include "scheduler.h"

 namespace art {
 namespace arm64 {

 static constexpr uint32_t kArm64MemoryLoadLatency = 5;
 static constexpr uint32_t kArm64MemoryStoreLatency = 3;

 static constexpr uint32_t kArm64CallInternalLatency = 10;
 static constexpr uint32_t kArm64CallLatency = 5;

 // AArch64 instruction latency.
 // We currently assume that all arm64 CPUs share the same instruction latency list.
 static constexpr uint32_t kArm64IntegerOpLatency = 2;
 static constexpr uint32_t kArm64FloatingPointOpLatency = 5;


 static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3;
 static constexpr uint32_t kArm64DivDoubleLatency = 30;
 static constexpr uint32_t kArm64DivFloatLatency = 15;
 static constexpr uint32_t kArm64DivIntegerLatency = 5;
 static constexpr uint32_t kArm64LoadStringInternalLatency = 7;
 static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
 static constexpr uint32_t kArm64MulIntegerLatency = 6;
 static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
 static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency;

 static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10;
 static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6;
 static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10;
 static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6;
 static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12;
 static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12;
 static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16;
 static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60;
 static constexpr uint32_t kArm64SIMDDivFloatLatency = 30;
 static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;

 class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
  public:
   // Default visitor for instructions not handled specifically below.
   void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
     last_visited_latency_ = kArm64IntegerOpLatency;
   }

 // We add a second unused parameter to be able to use this macro like the others
 // defined in `nodes.h`.
 #define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M)     \
   M(ArrayGet             , unused)                   \
   M(ArrayLength          , unused)                   \
   M(ArraySet             , unused)                   \
   M(BinaryOperation      , unused)                   \
   M(BoundsCheck          , unused)                   \
   M(Div                  , unused)                   \
   M(InstanceFieldGet     , unused)                   \
   M(InstanceOf           , unused)                   \
   M(Invoke               , unused)                   \
   M(LoadString           , unused)                   \
   M(Mul                  , unused)                   \
   M(NewArray             , unused)                   \
   M(NewInstance          , unused)                   \
   M(Rem                  , unused)                   \
   M(StaticFieldGet       , unused)                   \
   M(SuspendCheck         , unused)                   \
   M(TypeConversion       , unused)                   \
   M(VecReplicateScalar   , unused)                   \
   M(VecExtractScalar     , unused)                   \
   M(VecReduce            , unused)                   \
   M(VecCnv               , unused)                   \
   M(VecNeg               , unused)                   \
   M(VecAbs               , unused)                   \
   M(VecNot               , unused)                   \
   M(VecAdd               , unused)                   \
   M(VecHalvingAdd        , unused)                   \
   M(VecSub               , unused)                   \
   M(VecMul               , unused)                   \
   M(VecDiv               , unused)                   \
   M(VecMin               , unused)                   \
   M(VecMax               , unused)                   \
   M(VecAnd               , unused)                   \
   M(VecAndNot            , unused)                   \
   M(VecOr                , unused)                   \
   M(VecXor               , unused)                   \
   M(VecShl               , unused)                   \
   M(VecShr               , unused)                   \
   M(VecUShr              , unused)                   \
   M(VecSetScalars        , unused)                   \
   M(VecMultiplyAccumulate, unused)                   \
   M(VecLoad              , unused)                   \
   M(VecStore             , unused)

 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
   M(BitwiseNegatedRight, unused)                 \
   M(MultiplyAccumulate, unused)                  \
   M(IntermediateAddress, unused)                 \
   M(IntermediateAddressIndex, unused)            \
   M(DataProcWithShifterOp, unused)

 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
   void Visit##type(H##type* instruction) OVERRIDE;

   FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)

 #undef DECLARE_VISIT_INSTRUCTION

  private:
   void HandleSimpleArithmeticSIMD(HVecOperation *instr);
   void HandleVecAddress(HVecMemoryOperation* instruction, size_t size);
 };

 class HSchedulerARM64 : public HScheduler {
  public:
   HSchedulerARM64(ScopedArenaAllocator* allocator, SchedulingNodeSelector* selector)
       : HScheduler(allocator, &arm64_latency_visitor_, selector) {}
   ~HSchedulerARM64() OVERRIDE {}

   bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
 #define CASE_INSTRUCTION_KIND(type, unused) case \
   HInstruction::InstructionKind::k##type:
     switch (instruction->GetKind()) {
       FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
         return true;
       FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND)
         return true;
       FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND)
         return true;
       default:
         return HScheduler::IsSchedulable(instruction);
     }
 #undef CASE_INSTRUCTION_KIND
   }

   // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized
   // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler;
   // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of
   // SIMD&FP registers are callee saved) so don't reorder such vector instructions.
   //
   // TODO: remove this when a proper support of SIMD registers is introduced to the compiler.
   bool IsSchedulingBarrier(const HInstruction* instr) const OVERRIDE {
     return HScheduler::IsSchedulingBarrier(instr) ||
            instr->IsVecReduce() ||
            instr->IsVecExtractScalar() ||
            instr->IsVecSetScalars() ||
            instr->IsVecReplicateScalar();
   }

  private:
   SchedulingLatencyVisitorARM64 arm64_latency_visitor_;
   DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64);
 };

 }  // namespace arm64
 }  // namespace art

 #endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
	/*
	* Copyright (C) 2016 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
	#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_

	#include "scheduler.h"

	namespace art {
	namespace arm64 {

	static constexpr uint32_t kArm64MemoryLoadLatency = 5;
	static constexpr uint32_t kArm64MemoryStoreLatency = 3;

	static constexpr uint32_t kArm64CallInternalLatency = 10;
	static constexpr uint32_t kArm64CallLatency = 5;

	// AArch64 instruction latency.
	// We currently assume that all arm64 CPUs share the same instruction latency list.
	static constexpr uint32_t kArm64IntegerOpLatency = 2;
	static constexpr uint32_t kArm64FloatingPointOpLatency = 5;


	static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3;
	static constexpr uint32_t kArm64DivDoubleLatency = 30;
	static constexpr uint32_t kArm64DivFloatLatency = 15;
	static constexpr uint32_t kArm64DivIntegerLatency = 5;
	static constexpr uint32_t kArm64LoadStringInternalLatency = 7;
	static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
	static constexpr uint32_t kArm64MulIntegerLatency = 6;
	static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
	static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency;

	static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10;
	static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6;
	static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10;
	static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6;
	static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12;
	static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12;
	static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16;
	static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60;
	static constexpr uint32_t kArm64SIMDDivFloatLatency = 30;
	static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;

	class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
	public:
	// Default visitor for instructions not handled specifically below.
	void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
	last_visited_latency_ = kArm64IntegerOpLatency;
	}

	// We add a second unused parameter to be able to use this macro like the others
	// defined in `nodes.h`.
	#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \
	M(ArrayGet , unused) \
	M(ArrayLength , unused) \
	M(ArraySet , unused) \
	M(BinaryOperation , unused) \
	M(BoundsCheck , unused) \
	M(Div , unused) \
	M(InstanceFieldGet , unused) \
	M(InstanceOf , unused) \
	M(Invoke , unused) \
	M(LoadString , unused) \
	M(Mul , unused) \
	M(NewArray , unused) \
	M(NewInstance , unused) \
	M(Rem , unused) \
	M(StaticFieldGet , unused) \
	M(SuspendCheck , unused) \
	M(TypeConversion , unused) \
	M(VecReplicateScalar , unused) \
	M(VecExtractScalar , unused) \
	M(VecReduce , unused) \
	M(VecCnv , unused) \
	M(VecNeg , unused) \
	M(VecAbs , unused) \
	M(VecNot , unused) \
	M(VecAdd , unused) \
	M(VecHalvingAdd , unused) \
	M(VecSub , unused) \
	M(VecMul , unused) \
	M(VecDiv , unused) \
	M(VecMin , unused) \
	M(VecMax , unused) \
	M(VecAnd , unused) \
	M(VecAndNot , unused) \
	M(VecOr , unused) \
	M(VecXor , unused) \
	M(VecShl , unused) \
	M(VecShr , unused) \
	M(VecUShr , unused) \
	M(VecSetScalars , unused) \
	M(VecMultiplyAccumulate, unused) \
	M(VecLoad , unused) \
	M(VecStore , unused)

	#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
	M(BitwiseNegatedRight, unused) \
	M(MultiplyAccumulate, unused) \
	M(IntermediateAddress, unused) \
	M(IntermediateAddressIndex, unused) \
	M(DataProcWithShifterOp, unused)

	#define DECLARE_VISIT_INSTRUCTION(type, unused) \
	void Visit##type(H##type* instruction) OVERRIDE;

	FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
	FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
	FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)

	#undef DECLARE_VISIT_INSTRUCTION

	private:
	void HandleSimpleArithmeticSIMD(HVecOperation *instr);
	void HandleVecAddress(HVecMemoryOperation* instruction, size_t size);
	};

	class HSchedulerARM64 : public HScheduler {
	public:
	HSchedulerARM64(ScopedArenaAllocator* allocator, SchedulingNodeSelector* selector)
	: HScheduler(allocator, &arm64_latency_visitor_, selector) {}
	~HSchedulerARM64() OVERRIDE {}

	bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
	#define CASE_INSTRUCTION_KIND(type, unused) case \
	HInstruction::InstructionKind::k##type:
	switch (instruction->GetKind()) {
	FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
	return true;
	FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND)
	return true;
	FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND)
	return true;
	default:
	return HScheduler::IsSchedulable(instruction);
	}
	#undef CASE_INSTRUCTION_KIND
	}

	// Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized
	// loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler;
	// around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of
	// SIMD&FP registers are callee saved) so don't reorder such vector instructions.
	//
	// TODO: remove this when a proper support of SIMD registers is introduced to the compiler.
	bool IsSchedulingBarrier(const HInstruction* instr) const OVERRIDE {
	return HScheduler::IsSchedulingBarrier(instr) \|\|
	instr->IsVecReduce() \|\|
	instr->IsVecExtractScalar() \|\|
	instr->IsVecSetScalars() \|\|
	instr->IsVecReplicateScalar();
	}

	private:
	SchedulingLatencyVisitorARM64 arm64_latency_visitor_;
	DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64);
	};

	} // namespace arm64
	} // namespace art

	#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_