ARM64: Share address computation across SIMD LDRs/STRs.
For array accesses the element address has the following structure:
Address = CONST_OFFSET + base_addr + index << ELEM_SHIFT
Taking into account ARM64 LDR/STR addressing modes address part
(CONST_OFFSET + index << ELEM_SHIFT) can be shared across array
access with the same data type and index.
For example, for the following loop 5 accesses can share address
computation:
void foo(int[] a, int[] b, int[] c) {
for (i...) {
a[i] = a[i] + 5;
b[i] = b[i] + c[i];
}
}
Test: test-art-host, test-art-target
Change-Id: I46af3b4e4a55004336672cdba3296b7622d815ca
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index d7cc577..7601125 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -6260,6 +6260,15 @@
}
}
+void LocationsBuilderARM::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
void LocationsBuilderARM::VisitBoundsCheck(HBoundsCheck* instruction) {
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConvention calling_convention;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index eee832a..9f2272b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -2644,6 +2644,38 @@
Operand(InputOperandAt(instruction, 1)));
}
+void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+
+ HIntConstant* shift = instruction->GetShift()->AsIntConstant();
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ // For byte case we don't need to shift the index variable so we can encode the data offset into
+ // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
+ // data offset constant generation out of the loop and reduce the critical path length in the
+ // loop.
+ locations->SetInAt(1, shift->GetValue() == 0
+ ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant())
+ : Location::RequiresRegister());
+ locations->SetInAt(2, Location::ConstantLocation(shift));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ Register index_reg = InputRegisterAt(instruction, 0);
+ uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2));
+ uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
+
+ if (shift == 0) {
+ __ Add(OutputRegister(instruction), index_reg, offset);
+ } else {
+ Register offset_reg = InputRegisterAt(instruction, 1);
+ __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
+ }
+}
+
void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index b6678b0..23a3477 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -6299,6 +6299,16 @@
}
}
+void LocationsBuilderARMVIXL::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddressIndex(
+ HIntermediateAddressIndex* instruction) {
+ LOG(FATAL) << "Unreachable " << instruction->GetId();
+}
+
void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConventionARMVIXL calling_convention;
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 57f7e6b..478bd24 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -783,6 +783,12 @@
/*out*/ Register* scratch) {
LocationSummary* locations = instruction->GetLocations();
Register base = InputRegisterAt(instruction, 0);
+
+ if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
+ DCHECK(!is_string_char_at);
+ return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
+ }
+
Location index = locations->InAt(1);
uint32_t offset = is_string_char_at
? mirror::String::ValueOffset().Uint32Value()
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index f16e372..311be1f 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -216,5 +216,18 @@
}
}
+void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) {
+ if (!instruction->IsStringCharAt()
+ && TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
+ RecordSimplification();
+ }
+}
+
+void InstructionSimplifierArm64Visitor::VisitVecStore(HVecStore* instruction) {
+ if (TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
+ RecordSimplification();
+ }
+}
+
} // namespace arm64
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index eec4e49..8596f6a 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -75,6 +75,8 @@
void VisitUShr(HUShr* instruction) OVERRIDE;
void VisitXor(HXor* instruction) OVERRIDE;
void VisitVecMul(HVecMul* instruction) OVERRIDE;
+ void VisitVecLoad(HVecLoad* instruction) OVERRIDE;
+ void VisitVecStore(HVecStore* instruction) OVERRIDE;
OptimizingCompilerStats* stats_;
};
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index c39e5f4..e5a8499 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -16,6 +16,8 @@
#include "instruction_simplifier_shared.h"
+#include "mirror/array-inl.h"
+
namespace art {
namespace {
@@ -346,4 +348,59 @@
return false;
}
+bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) {
+ if (index->IsConstant()) {
+ // If index is constant the whole address calculation often can be done by LDR/STR themselves.
+ // TODO: Treat the case with not-embedable constant.
+ return false;
+ }
+
+ HGraph* graph = access->GetBlock()->GetGraph();
+ ArenaAllocator* arena = graph->GetArena();
+ Primitive::Type packed_type = access->GetPackedType();
+ uint32_t data_offset = mirror::Array::DataOffset(
+ Primitive::ComponentSize(packed_type)).Uint32Value();
+ size_t component_shift = Primitive::ComponentSizeShift(packed_type);
+
+ bool is_extracting_beneficial = false;
+ // It is beneficial to extract index intermediate address only if there are at least 2 users.
+ for (const HUseListNode<HInstruction*>& use : index->GetUses()) {
+ HInstruction* user = use.GetUser();
+ if (user->IsVecMemoryOperation() && user != access) {
+ HVecMemoryOperation* another_access = user->AsVecMemoryOperation();
+ Primitive::Type another_packed_type = another_access->GetPackedType();
+ uint32_t another_data_offset = mirror::Array::DataOffset(
+ Primitive::ComponentSize(another_packed_type)).Uint32Value();
+ size_t another_component_shift = Primitive::ComponentSizeShift(another_packed_type);
+ if (another_data_offset == data_offset && another_component_shift == component_shift) {
+ is_extracting_beneficial = true;
+ break;
+ }
+ } else if (user->IsIntermediateAddressIndex()) {
+ HIntermediateAddressIndex* another_access = user->AsIntermediateAddressIndex();
+ uint32_t another_data_offset = another_access->GetOffset()->AsIntConstant()->GetValue();
+ size_t another_component_shift = another_access->GetShift()->AsIntConstant()->GetValue();
+ if (another_data_offset == data_offset && another_component_shift == component_shift) {
+ is_extracting_beneficial = true;
+ break;
+ }
+ }
+ }
+
+ if (!is_extracting_beneficial) {
+ return false;
+ }
+
+ // Proceed to extract the index + data_offset address computation.
+ HIntConstant* offset = graph->GetIntConstant(data_offset);
+ HIntConstant* shift = graph->GetIntConstant(component_shift);
+ HIntermediateAddressIndex* address =
+ new (arena) HIntermediateAddressIndex(index, offset, shift, kNoDexPc);
+
+ access->GetBlock()->InsertInstructionBefore(address, access);
+ access->ReplaceInput(address, 1);
+
+ return true;
+}
+
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 2ea103a..371619f 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -59,6 +59,7 @@
size_t data_offset);
bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa);
+bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index);
} // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 36c7df7..00d2988 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1396,7 +1396,8 @@
M(BitwiseNegatedRight, Instruction) \
M(DataProcWithShifterOp, Instruction) \
M(MultiplyAccumulate, Instruction) \
- M(IntermediateAddress, Instruction)
+ M(IntermediateAddress, Instruction) \
+ M(IntermediateAddressIndex, Instruction)
#endif
#ifndef ART_ENABLE_CODEGEN_arm
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index c6bfbcc..075a816 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -150,6 +150,49 @@
DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress);
};
+// This instruction computes part of the array access offset (data and index offset).
+//
+// For array accesses the element address has the following structure:
+// Address = CONST_OFFSET + base_addr + index << ELEM_SHIFT. Taking into account LDR/STR addressing
+// modes address part (CONST_OFFSET + index << ELEM_SHIFT) can be shared across array access with
+// the same data type and index. For example, for the following loop 5 accesses can share address
+// computation:
+//
+// void foo(int[] a, int[] b, int[] c) {
+// for (i...) {
+// a[i] = a[i] + 5;
+// b[i] = b[i] + c[i];
+// }
+// }
+//
+// Note: as the instruction doesn't involve base array address into computations it has no side
+// effects (in comparison of HIntermediateAddress).
+class HIntermediateAddressIndex FINAL : public HExpression<3> {
+ public:
+ HIntermediateAddressIndex(
+ HInstruction* index, HInstruction* offset, HInstruction* shift, uint32_t dex_pc)
+ : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
+ SetRawInputAt(0, index);
+ SetRawInputAt(1, offset);
+ SetRawInputAt(2, shift);
+ }
+
+ bool CanBeMoved() const OVERRIDE { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ return true;
+ }
+ bool IsActualObject() const OVERRIDE { return false; }
+
+ HInstruction* GetIndex() const { return InputAt(0); }
+ HInstruction* GetOffset() const { return InputAt(1); }
+ HInstruction* GetShift() const { return InputAt(2); }
+
+ DECLARE_INSTRUCTION(IntermediateAddressIndex);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HIntermediateAddressIndex);
+};
+
class HDataProcWithShifterOp FINAL : public HExpression<2> {
public:
enum OpKind {
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 52c247b..92fe9bf 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -178,12 +178,17 @@
size_t vector_length,
uint32_t dex_pc)
: HVecOperation(arena, packed_type, side_effects, number_of_inputs, vector_length, dex_pc),
- alignment_(Primitive::ComponentSize(packed_type), 0) { }
+ alignment_(Primitive::ComponentSize(packed_type), 0) {
+ DCHECK_GE(number_of_inputs, 2u);
+ }
void SetAlignment(Alignment alignment) { alignment_ = alignment; }
Alignment GetAlignment() const { return alignment_; }
+ HInstruction* GetArray() const { return InputAt(0); }
+ HInstruction* GetIndex() const { return InputAt(1); }
+
DECLARE_ABSTRACT_INSTRUCTION(VecMemoryOperation);
private:
diff --git a/test/527-checker-array-access-simd/expected.txt b/test/527-checker-array-access-simd/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/527-checker-array-access-simd/expected.txt
diff --git a/test/527-checker-array-access-simd/info.txt b/test/527-checker-array-access-simd/info.txt
new file mode 100644
index 0000000..f147943
--- /dev/null
+++ b/test/527-checker-array-access-simd/info.txt
@@ -0,0 +1 @@
+Test arm- and arm64-specific array access optimization for simd loops.
diff --git a/test/527-checker-array-access-simd/src/Main.java b/test/527-checker-array-access-simd/src/Main.java
new file mode 100644
index 0000000..8af5465
--- /dev/null
+++ b/test/527-checker-array-access-simd/src/Main.java
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+ public static void assertIntEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ /// CHECK-START-ARM64: void Main.checkIntCase(int[]) instruction_simplifier_arm64 (before)
+ /// CHECK-DAG: <<Array:l\d+>> ParameterValue
+ /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>]
+ // -------------- Loop
+ /// CHECK-DAG: <<Index:i\d+>> Phi
+ /// CHECK-DAG: If
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array>>,<<Index>>]
+ /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Repl>>]
+ /// CHECK-DAG: VecStore [<<Array>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM64: void Main.checkIntCase(int[]) instruction_simplifier_arm64 (after)
+ /// CHECK-DAG: <<Array:l\d+>> ParameterValue
+ /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
+ /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>]
+ // -------------- Loop
+ /// CHECK-DAG: <<Index:i\d+>> Phi
+ /// CHECK-DAG: If
+ /// CHECK-DAG: <<Address1:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>]
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array>>,<<Address1>>]
+ /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Repl>>]
+ /// CHECK-DAG: <<Address2:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>]
+ /// CHECK-DAG: VecStore [<<Array>>,<<Address2>>,<<Add>>]
+
+ /// CHECK-START-ARM64: void Main.checkIntCase(int[]) GVN$after_arch (after)
+ /// CHECK-DAG: <<Array:l\d+>> ParameterValue
+ /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
+ /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>]
+ // -------------- Loop
+ /// CHECK-DAG: <<Index:i\d+>> Phi
+ /// CHECK-DAG: If
+ /// CHECK-DAG: <<Address1:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>]
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array>>,<<Address1>>]
+ /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Repl>>]
+ /// CHECK-NOT: IntermediateAddress
+ /// CHECK-DAG: VecStore [<<Array>>,<<Address1>>,<<Add>>]
+
+ /// CHECK-START-ARM64: void Main.checkIntCase(int[]) disassembly (after)
+ /// CHECK: IntermediateAddressIndex
+ /// CHECK-NEXT: add w{{[0-9]+}}, w{{[0-9]+}}, w{{[0-9]+}}, lsl #2
+ public static void checkIntCase(int[] a) {
+ for (int i = 0; i < 128; i++) {
+ a[i] += 5;
+ }
+ }
+
+ /// CHECK-START-ARM64: void Main.checkByteCase(byte[]) instruction_simplifier_arm64 (before)
+ /// CHECK-DAG: <<Array:l\d+>> ParameterValue
+ /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>]
+ // -------------- Loop
+ /// CHECK-DAG: <<Index:i\d+>> Phi
+ /// CHECK-DAG: If
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array>>,<<Index>>]
+ /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Repl>>]
+ /// CHECK-DAG: VecStore [<<Array>>,<<Index>>,<<Add>>]
+
+ /// CHECK-START-ARM64: void Main.checkByteCase(byte[]) instruction_simplifier_arm64 (after)
+ /// CHECK-DAG: <<Array:l\d+>> ParameterValue
+ /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0
+ /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>]
+ // -------------- Loop
+ /// CHECK-DAG: <<Index:i\d+>> Phi
+ /// CHECK-DAG: If
+ /// CHECK-DAG: <<Address1:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const0>>]
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array>>,<<Address1>>]
+ /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Repl>>]
+ /// CHECK-DAG: <<Address2:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const0>>]
+ /// CHECK-DAG: VecStore [<<Array>>,<<Address2>>,<<Add>>]
+
+ /// CHECK-START-ARM64: void Main.checkByteCase(byte[]) GVN$after_arch (after)
+ /// CHECK-DAG: <<Array:l\d+>> ParameterValue
+ /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0
+ /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>]
+ // -------------- Loop
+ /// CHECK-DAG: <<Index:i\d+>> Phi
+ /// CHECK-DAG: If
+ /// CHECK-DAG: <<Address1:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const0>>]
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array>>,<<Address1>>]
+ /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Repl>>]
+ /// CHECK-NOT: IntermediateAddress
+ /// CHECK-DAG: VecStore [<<Array>>,<<Address1>>,<<Add>>]
+
+ /// CHECK-START-ARM64: void Main.checkByteCase(byte[]) disassembly (after)
+ /// CHECK: IntermediateAddressIndex
+ /// CHECK-NEXT: add w{{[0-9]+}}, w{{[0-9]+}}, #0x{{[0-9a-fA-F]+}}
+ /// CHECK: VecLoad
+ /// CHECK-NEXT: ldr q{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}]
+ /// CHECK: VecStore
+ /// CHECK-NEXT: str q{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}]
+ public static void checkByteCase(byte[] a) {
+ for (int i = 0; i < 128; i++) {
+ a[i] += 5;
+ }
+ }
+
+ /// CHECK-START-ARM64: void Main.checkSingleAccess(int[]) instruction_simplifier_arm64 (before)
+ /// CHECK-DAG: <<Array:l\d+>> ParameterValue
+ /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>]
+ // -------------- Loop
+ /// CHECK-DAG: <<Index:i\d+>> Phi
+ /// CHECK-DAG: If
+ /// CHECK-DAG: VecStore [<<Array>>,<<Index>>,<<Repl>>]
+
+ /// CHECK-START-ARM64: void Main.checkSingleAccess(int[]) instruction_simplifier_arm64 (after)
+ /// CHECK-DAG: <<Array:l\d+>> ParameterValue
+ /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0
+ /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const5>>]
+ // -------------- Loop
+ /// CHECK-DAG: <<Index:i\d+>> Phi
+ /// CHECK-DAG: If
+ /// CHECK-DAG: VecStore [<<Array>>,<<Index>>,<<Repl>>]
+ /// CHECK-NOT: IntermediateAddress
+ public static void checkSingleAccess(int[] a) {
+ for (int i = 0; i < 128; i++) {
+ a[i] = 5;
+ }
+ }
+
+ /// CHECK-START-ARM64: void Main.checkInt2Float(int[], float[]) instruction_simplifier_arm64 (before)
+ /// CHECK-DAG: <<Array1:l\d+>> ParameterValue
+ /// CHECK-DAG: <<Array2:l\d+>> ParameterValue
+ // -------------- Loop
+ /// CHECK-DAG: <<Index:i\d+>> Phi
+ /// CHECK-DAG: If
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array1>>,<<Index>>]
+ /// CHECK-DAG: <<Cnv:d\d+>> VecCnv [<<Load>>]
+ /// CHECK-DAG: VecStore [<<Array2>>,<<Index>>,<<Cnv>>]
+
+ /// CHECK-START-ARM64: void Main.checkInt2Float(int[], float[]) instruction_simplifier_arm64 (after)
+ /// CHECK-DAG: <<Array1:l\d+>> ParameterValue
+ /// CHECK-DAG: <<Array2:l\d+>> ParameterValue
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
+ /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2
+ // -------------- Loop
+ /// CHECK-DAG: <<Index:i\d+>> Phi
+ /// CHECK-DAG: If
+ /// CHECK-DAG: <<Address1:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>]
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array1>>,<<Address1>>]
+ /// CHECK-DAG: <<Cnv:d\d+>> VecCnv [<<Load>>]
+ /// CHECK-DAG: <<Address2:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>]
+ /// CHECK-DAG: VecStore [<<Array2>>,<<Address2>>,<<Cnv>>]
+
+ /// CHECK-START-ARM64: void Main.checkInt2Float(int[], float[]) GVN$after_arch (after)
+ /// CHECK-DAG: <<Array1:l\d+>> ParameterValue
+ /// CHECK-DAG: <<Array2:l\d+>> ParameterValue
+ /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12
+ /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2
+ // -------------- Loop
+ /// CHECK-DAG: <<Index:i\d+>> Phi
+ /// CHECK-DAG: If
+ /// CHECK-DAG: <<Address1:i\d+>> IntermediateAddressIndex [<<Index>>,<<DataOffset>>,<<Const2>>]
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [<<Array1>>,<<Address1>>]
+ /// CHECK-DAG: <<Cnv:d\d+>> VecCnv [<<Load>>]
+ /// CHECK-NOT: IntermediateAddress
+ /// CHECK-DAG: VecStore [<<Array2>>,<<Address1>>,<<Cnv>>]
+
+ /// CHECK-START-ARM64: void Main.checkInt2Float(int[], float[]) disassembly (after)
+ /// CHECK: IntermediateAddressIndex
+ /// CHECK-NEXT: add w{{[0-9]+}}, w{{[0-9]+}}, w{{[0-9]+}}, lsl #2
+ public static void checkInt2Float(int[] a, float[] b) {
+ for (int i = 0; i < 128; i++) {
+ b[i] = (float) a[i];
+ }
+ }
+
+ public static final int ARRAY_SIZE = 1024;
+
+ public static int calcArraySum(int[] a, byte[] b, float[] c) {
+ int sum = 0;
+ for (int i = 0; i < 128; i++) {
+ sum += a[i] + b[i] + (int) c[i];
+ }
+ return sum;
+ }
+
+ public static void main(String[] args) {
+ byte[] ba = new byte[ARRAY_SIZE];
+ int[] ia = new int[ARRAY_SIZE];
+ float[] fa = new float[ARRAY_SIZE];
+
+ checkSingleAccess(ia);
+ checkIntCase(ia);
+ checkByteCase(ba);
+ checkInt2Float(ia, fa);
+
+ assertIntEquals(3200, calcArraySum(ia, ba, fa));
+ }
+}