Fix iCache misses for GetKind on x86,x86_64
GetKind() takes about 2.6% of total compilation time on x86_64.
The primary reason is that the target call GetKindInternal() is often
beyond the page boundary causing frequent i-cache misses.
This patch removes the virtual call to GetKindInternal () and instead
keeps the InstructionKind into each constructed instruction.
Since we have about 121 instructions in total as of now,
it takes about 7 extra bits in each instruction.
dex2oat runs about 12% faster with --compiler-filter=everything on an
APK of 25MB.
Test: Tested the patch by running host art tests.
Rebased.
Change-Id: Ia7bbcd67180151e4565507164a718acbb6284885
Signed-off-by: Gupta Kumar, Sanjiv <sanjiv.kumar.gupta@intel.com>
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index ecabdf3..0d38d57 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -71,13 +71,15 @@
// TODO: we could introduce SIMD types in HIR.
static constexpr DataType::Type kSIMDType = DataType::Type::kFloat64;
- HVecOperation(ArenaAllocator* allocator,
+ HVecOperation(InstructionKind kind,
+ ArenaAllocator* allocator,
DataType::Type packed_type,
SideEffects side_effects,
size_t number_of_inputs,
size_t vector_length,
uint32_t dex_pc)
- : HVariableInputSizeInstruction(side_effects,
+ : HVariableInputSizeInstruction(kind,
+ side_effects,
dex_pc,
allocator,
number_of_inputs,
@@ -196,12 +198,14 @@
// Abstraction of a unary vector operation.
class HVecUnaryOperation : public HVecOperation {
public:
- HVecUnaryOperation(ArenaAllocator* allocator,
+ HVecUnaryOperation(InstructionKind kind,
+ ArenaAllocator* allocator,
HInstruction* input,
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecOperation(allocator,
+ : HVecOperation(kind,
+ allocator,
packed_type,
SideEffects::None(),
/* number_of_inputs */ 1,
@@ -221,13 +225,15 @@
// Abstraction of a binary vector operation.
class HVecBinaryOperation : public HVecOperation {
public:
- HVecBinaryOperation(ArenaAllocator* allocator,
+ HVecBinaryOperation(InstructionKind kind,
+ ArenaAllocator* allocator,
HInstruction* left,
HInstruction* right,
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecOperation(allocator,
+ : HVecOperation(kind,
+ allocator,
packed_type,
SideEffects::None(),
/* number_of_inputs */ 2,
@@ -250,13 +256,15 @@
// The Android runtime guarantees elements have at least natural alignment.
class HVecMemoryOperation : public HVecOperation {
public:
- HVecMemoryOperation(ArenaAllocator* allocator,
+ HVecMemoryOperation(InstructionKind kind,
+ ArenaAllocator* allocator,
DataType::Type packed_type,
SideEffects side_effects,
size_t number_of_inputs,
size_t vector_length,
uint32_t dex_pc)
- : HVecOperation(allocator,
+ : HVecOperation(kind,
+ allocator,
packed_type,
side_effects,
number_of_inputs,
@@ -315,7 +323,8 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecUnaryOperation(allocator, scalar, packed_type, vector_length, dex_pc) {
+ : HVecUnaryOperation(
+ kVecReplicateScalar, allocator, scalar, packed_type, vector_length, dex_pc) {
DCHECK(!scalar->IsVecOperation());
}
@@ -341,7 +350,8 @@
size_t vector_length,
size_t index,
uint32_t dex_pc)
- : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc) {
+ : HVecUnaryOperation(
+ kVecExtractScalar, allocator, input, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(input, packed_type));
DCHECK_LT(index, vector_length);
DCHECK_EQ(index, 0u);
@@ -379,7 +389,7 @@
size_t vector_length,
ReductionKind kind,
uint32_t dex_pc)
- : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc),
+ : HVecUnaryOperation(kVecReduce, allocator, input, packed_type, vector_length, dex_pc),
kind_(kind) {
DCHECK(HasConsistentPackedTypes(input, packed_type));
}
@@ -412,7 +422,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc) {
+ : HVecUnaryOperation(kVecCnv, allocator, input, packed_type, vector_length, dex_pc) {
DCHECK(input->IsVecOperation());
DCHECK_NE(GetInputType(), GetResultType()); // actual convert
}
@@ -437,7 +447,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc) {
+ : HVecUnaryOperation(kVecNeg, allocator, input, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(input, packed_type));
}
@@ -459,7 +469,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc) {
+ : HVecUnaryOperation(kVecAbs, allocator, input, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(input, packed_type));
}
@@ -481,7 +491,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecUnaryOperation(allocator, input, packed_type, vector_length, dex_pc) {
+ : HVecUnaryOperation(kVecNot, allocator, input, packed_type, vector_length, dex_pc) {
DCHECK(input->IsVecOperation());
}
@@ -507,7 +517,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
+ : HVecBinaryOperation(kVecAdd, allocator, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
}
@@ -533,7 +543,8 @@
size_t vector_length,
bool is_rounded,
uint32_t dex_pc)
- : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
+ : HVecBinaryOperation(
+ kVecHalvingAdd, allocator, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
SetPackedFlag<kFieldHAddIsRounded>(is_rounded);
@@ -571,7 +582,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
+ : HVecBinaryOperation(kVecSub, allocator, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
}
@@ -594,7 +605,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
+ : HVecBinaryOperation(kVecMul, allocator, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
}
@@ -617,7 +628,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
+ : HVecBinaryOperation(kVecDiv, allocator, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
}
@@ -641,7 +652,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
+ : HVecBinaryOperation(kVecMin, allocator, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
}
@@ -665,7 +676,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
+ : HVecBinaryOperation(kVecMax, allocator, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
DCHECK(HasConsistentPackedTypes(right, packed_type));
}
@@ -688,7 +699,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
+ : HVecBinaryOperation(kVecAnd, allocator, left, right, packed_type, vector_length, dex_pc) {
DCHECK(left->IsVecOperation() && right->IsVecOperation());
}
@@ -710,7 +721,8 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
+ : HVecBinaryOperation(
+ kVecAndNot, allocator, left, right, packed_type, vector_length, dex_pc) {
DCHECK(left->IsVecOperation() && right->IsVecOperation());
}
@@ -732,7 +744,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
+ : HVecBinaryOperation(kVecOr, allocator, left, right, packed_type, vector_length, dex_pc) {
DCHECK(left->IsVecOperation() && right->IsVecOperation());
}
@@ -754,7 +766,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
+ : HVecBinaryOperation(kVecXor, allocator, left, right, packed_type, vector_length, dex_pc) {
DCHECK(left->IsVecOperation() && right->IsVecOperation());
}
@@ -776,7 +788,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
+ : HVecBinaryOperation(kVecShl, allocator, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
}
@@ -798,7 +810,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
+ : HVecBinaryOperation(kVecShr, allocator, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
}
@@ -820,7 +832,7 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecBinaryOperation(allocator, left, right, packed_type, vector_length, dex_pc) {
+ : HVecBinaryOperation(kVecUShr, allocator, left, right, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(left, packed_type));
}
@@ -847,7 +859,8 @@
size_t vector_length,
size_t number_of_scalars,
uint32_t dex_pc)
- : HVecOperation(allocator,
+ : HVecOperation(kVecSetScalars,
+ allocator,
packed_type,
SideEffects::None(),
number_of_scalars,
@@ -881,7 +894,8 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecOperation(allocator,
+ : HVecOperation(kVecMultiplyAccumulate,
+ allocator,
packed_type,
SideEffects::None(),
/* number_of_inputs */ 3,
@@ -931,7 +945,8 @@
DataType::Type packed_type,
size_t vector_length,
uint32_t dex_pc)
- : HVecOperation(allocator,
+ : HVecOperation(kVecSADAccumulate,
+ allocator,
packed_type,
SideEffects::None(),
/* number_of_inputs */ 3,
@@ -965,7 +980,8 @@
size_t vector_length,
bool is_string_char_at,
uint32_t dex_pc)
- : HVecMemoryOperation(allocator,
+ : HVecMemoryOperation(kVecLoad,
+ allocator,
packed_type,
side_effects,
/* number_of_inputs */ 2,
@@ -1010,7 +1026,8 @@
SideEffects side_effects,
size_t vector_length,
uint32_t dex_pc)
- : HVecMemoryOperation(allocator,
+ : HVecMemoryOperation(kVecStore,
+ allocator,
packed_type,
side_effects,
/* number_of_inputs */ 3,