summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Konstantin Baladurin <konstantin.baladurin@arm.com> 2024-09-14 01:26:02 +0100
committer Santiago Aboy Solanes <solanes@google.com> 2024-10-15 16:29:45 +0000
commit6127341b747ac26b361fd1779e155d940d49e39f (patch)
treee1c40de3dda7f6bc5048fff6ea5fb4deaffe6bbc
parentb50d59f4ca5557a7719dc26157f8f2fd9006913a (diff)
Arm64: fix VecPredToBoolean code generation for SVE
This patch fixes code generation for VecPredToBoolean so it updates conditional flags itself based on its predicate input. Prior to this patch, code generation for VecPredToBoolean (incorrectly) implicitly assumed that the conditional flags were always updated by its input HIR (VecPredWhile) and that it immediately followed that HIR. Authors: Konstantin Baladurin <konstantin.baladurin@arm.com> Chris Jones <christopher.jones@arm.com> Test: env ART_FORCE_TRY_PREDICATED_SIMD=true art/test.py --target --optimizing Test: art/tools/run-gtests.sh Change-Id: Id4c2494cdefd008509f9039e36081151aaf0e4a6
-rw-r--r--compiler/common_compiler_test.cc10
-rw-r--r--compiler/common_compiler_test.h8
-rw-r--r--compiler/optimizing/code_generator_arm64.cc10
-rw-r--r--compiler/optimizing/code_generator_arm64.h2
-rw-r--r--compiler/optimizing/code_generator_vector_arm64_sve.cc8
-rw-r--r--compiler/optimizing/codegen_test.cc44
-rw-r--r--compiler/optimizing/load_store_analysis_test.cc27
-rw-r--r--compiler/optimizing/nodes_vector.h5
-rw-r--r--compiler/optimizing/optimizing_unit_test.h47
-rw-r--r--simulator/code_simulator_arm64.cc1
10 files changed, 144 insertions, 18 deletions
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 392a0d1001..d627af8158 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -186,7 +186,9 @@ class CommonCompilerTestImpl::OneCompiledMethodStorage final : public CompiledCo
};
std::unique_ptr<CompilerOptions> CommonCompilerTestImpl::CreateCompilerOptions(
- InstructionSet instruction_set, const std::string& variant) {
+ InstructionSet instruction_set,
+ const std::string& variant,
+ const std::optional<std::string>& extra_features) {
std::unique_ptr<CompilerOptions> compiler_options = std::make_unique<CompilerOptions>();
compiler_options->emit_read_barrier_ = gUseReadBarrier;
compiler_options->instruction_set_ = instruction_set;
@@ -194,6 +196,12 @@ std::unique_ptr<CompilerOptions> CommonCompilerTestImpl::CreateCompilerOptions(
compiler_options->instruction_set_features_ =
InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg);
CHECK(compiler_options->instruction_set_features_ != nullptr) << error_msg;
+ if (extra_features) {
+ compiler_options->instruction_set_features_ =
+ compiler_options->instruction_set_features_->AddFeaturesFromString(*extra_features,
+ &error_msg);
+ CHECK_NE(compiler_options->instruction_set_features_, nullptr) << error_msg;
+ }
return compiler_options;
}
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 32a5234797..80b26eefe9 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -44,8 +44,12 @@ template<class T> class Handle;
// Export all symbols in `CommonCompilerTestImpl` for dex2oat tests.
class EXPORT CommonCompilerTestImpl {
public:
- static std::unique_ptr<CompilerOptions> CreateCompilerOptions(InstructionSet instruction_set,
- const std::string& variant);
+ // Create compiler options from the given instruction set and variant. Optionally use a string of
+ // instruction set features in addition to the features from the variant.
+ static std::unique_ptr<CompilerOptions> CreateCompilerOptions(
+ InstructionSet instruction_set,
+ const std::string& variant,
+ const std::optional<std::string>& extra_features = std::nullopt);
CommonCompilerTestImpl();
virtual ~CommonCompilerTestImpl();
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index d56146eb0e..13d126d3d4 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -138,6 +138,16 @@ inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
}
}
+Condition ARM64PCondition(HVecPredToBoolean::PCondKind cond) {
+ switch (cond) {
+ case HVecPredToBoolean::PCondKind::kFirst: return mi;
+ case HVecPredToBoolean::PCondKind::kNFirst: return pl;
+ default:
+ LOG(FATAL) << "Unsupported condition type: " << enum_cast<uint32_t>(cond);
+ UNREACHABLE();
+ }
+}
+
Location ARM64ReturnLocation(DataType::Type return_type) {
// Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
// same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index c6427b2da3..11c130ce2d 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -130,6 +130,8 @@ const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegi
vixl::aarch64::d15.GetCode());
Location ARM64ReturnLocation(DataType::Type return_type);
+vixl::aarch64::Condition ARM64PCondition(HVecPredToBoolean::PCondKind cond);
+
#define UNIMPLEMENTED_INTRINSIC_LIST_ARM64(V) \
V(MathSignumFloat) \
V(MathSignumDouble) \
diff --git a/compiler/optimizing/code_generator_vector_arm64_sve.cc b/compiler/optimizing/code_generator_vector_arm64_sve.cc
index ef79932899..ccff02e6b6 100644
--- a/compiler/optimizing/code_generator_vector_arm64_sve.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_sve.cc
@@ -1335,10 +1335,10 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredToBoolean(HVecPredToBoolean*
// Instruction is not predicated, see nodes_vector.h
DCHECK(!instruction->IsPredicated());
Register reg = OutputRegister(instruction);
- // Currently VecPredToBoolean is only used as part of vectorized loop check condition
- // evaluation.
- DCHECK(instruction->GetPCondKind() == HVecPredToBoolean::PCondKind::kNFirst);
- __ Cset(reg, pl);
+ HInstruction *input = instruction->InputAt(0);
+ const PRegister output_p_reg = GetVecPredSetFixedOutPReg(input->AsVecPredSetOperation());
+ __ Ptest(output_p_reg, output_p_reg.VnB());
+ __ Cset(reg, ARM64PCondition(instruction->GetPCondKind()));
}
Location InstructionCodeGeneratorARM64Sve::AllocateSIMDScratchLocation(
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 75b0ea2a08..4f8551ab87 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -932,6 +932,50 @@ TEST_F(CodegenTest, ARM64FrameSizeNoSIMD) {
EXPECT_EQ(codegen.GetFpuSpillSize(), kExpectedFPSpillSize);
}
+// This test checks that the result of the VecPredToBoolean instruction doesn't depend on
+// conditional flags that can be updated by other instructions. For example:
+//
+// VecPredWhile p0, opa, opb
+// Below opb, opa
+// VecPredToBoolean p0
+//
+// where Below updates conditions flags after VecPredWhile.
+TEST_F(CodegenTest, ARM64SvePredicateToBoolean) {
+ std::unique_ptr<CompilerOptions> compiler_options =
+ CommonCompilerTest::CreateCompilerOptions(InstructionSet::kArm64, "default", "sve");
+ for (int i = 0; i < 2; i++) {
+ for (int j = 0; j < 2; j++) {
+ HBasicBlock* block = InitEntryMainExitGraph();
+ TestCodeGeneratorARM64 codegen(graph_, *compiler_options);
+ if (!codegen.SupportsPredicatedSIMD()) {
+ GTEST_SKIP() << "Predicated SIMD is not supported.";
+ }
+
+ HInstruction *opa = graph_->GetIntConstant(i);
+ HInstruction *opb = graph_->GetIntConstant(j);
+ HVecPredWhile *pred_while = MakeVecPredWhile(block,
+ opa,
+ opb,
+ HVecPredWhile::CondKind::kLO,
+ DataType::Type::kInt32);
+ // Update condition flags by using Below instruction.
+ MakeCondition(block, IfCondition::kCondB, opb, opa);
+ HVecPredToBoolean *boolean = MakeVecPredToBoolean(block,
+ pred_while,
+ HVecPredToBoolean::PCondKind::kNFirst,
+ DataType::Type::kInt32);
+ MakeReturn(block, boolean);
+
+ graph_->SetHasPredicatedSIMD(true);
+ graph_->BuildDominatorTree();
+
+ if (CanExecute(codegen)) {
+ RunCode(&codegen, graph_, [](HGraph*) {}, true, i >= j);
+ }
+ }
+ }
+}
+
#endif
} // namespace art
diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc
index b6a7cd9c76..7c2e918d23 100644
--- a/compiler/optimizing/load_store_analysis_test.cc
+++ b/compiler/optimizing/load_store_analysis_test.cc
@@ -226,6 +226,9 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) {
}
TEST_F(LoadStoreAnalysisTest, ArrayAliasingTest) {
+ constexpr size_t vlen1 = kDefaultTestVectorSize;
+ constexpr size_t vlen2 = vlen1 / 2;
+
HBasicBlock* main = InitEntryMainExitGraphWithReturnVoid();
HInstruction* array = MakeParam(DataType::Type::kReference);
@@ -242,26 +245,32 @@ TEST_F(LoadStoreAnalysisTest, ArrayAliasingTest) {
HVecOperation* v1 = new (GetAllocator()) HVecReplicateScalar(GetAllocator(),
c1,
DataType::Type::kInt32,
- 4,
+ vlen1,
kNoDexPc);
AddOrInsertInstruction(main, v1);
HVecOperation* v2 = new (GetAllocator()) HVecReplicateScalar(GetAllocator(),
c1,
DataType::Type::kInt32,
- 2,
+ vlen2,
kNoDexPc);
AddOrInsertInstruction(main, v2);
HInstruction* i_add6 = MakeBinOp<HAdd>(main, DataType::Type::kInt32, index, c6);
HInstruction* i_add8 = MakeBinOp<HAdd>(main, DataType::Type::kInt32, index, c8);
- HInstruction* vstore_0 = MakeVecStore(main, array, c0, v1, DataType::Type::kInt32);
- HInstruction* vstore_1 = MakeVecStore(main, array, c1, v1, DataType::Type::kInt32);
- HInstruction* vstore_8 = MakeVecStore(main, array, c8, v1, DataType::Type::kInt32);
- HInstruction* vstore_i = MakeVecStore(main, array, index, v1, DataType::Type::kInt32);
- HInstruction* vstore_i_add6 = MakeVecStore(main, array, i_add6, v1, DataType::Type::kInt32);
- HInstruction* vstore_i_add8 = MakeVecStore(main, array, i_add8, v1, DataType::Type::kInt32);
+ HInstruction* vstore_0 =
+ MakeVecStore(main, array, c0, v1, DataType::Type::kInt32, vlen1);
+ HInstruction* vstore_1 =
+ MakeVecStore(main, array, c1, v1, DataType::Type::kInt32, vlen1);
+ HInstruction* vstore_8 =
+ MakeVecStore(main, array, c8, v1, DataType::Type::kInt32, vlen1);
+ HInstruction* vstore_i =
+ MakeVecStore(main, array, index, v1, DataType::Type::kInt32, vlen1);
+ HInstruction* vstore_i_add6 =
+ MakeVecStore(main, array, i_add6, v1, DataType::Type::kInt32, vlen1);
+ HInstruction* vstore_i_add8 =
+ MakeVecStore(main, array, i_add8, v1, DataType::Type::kInt32, vlen1);
HInstruction* vstore_i_add6_vlen2 =
- MakeVecStore(main, array, i_add6, v2, DataType::Type::kInt32, /*vector_lengt=*/ 2);
+ MakeVecStore(main, array, i_add6, v2, DataType::Type::kInt32, vlen2);
graph_->BuildDominatorTree();
ScopedArenaAllocator allocator(graph_->GetArenaStack());
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 6a60d6be01..a5f5720f7e 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -1529,6 +1529,11 @@ class HVecPredNot final : public HVecPredSetOperation {
DEFAULT_COPY_CONSTRUCTOR(VecPredNot);
};
+// Return the number of elements of the given type that will fit into a vector of given size.
+inline size_t GetNumberOfElementsInVector(size_t vector_size_in_bytes, DataType::Type type) {
+ return vector_size_in_bytes / DataType::Size(type);
+}
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index a7c3558c5f..e2f3e0a510 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -632,15 +632,54 @@ class OptimizingUnitTestHelper {
HInstruction* index,
HInstruction* value,
DataType::Type packed_type,
- size_t vector_length = 4,
+ size_t vector_size_in_bytes = kDefaultTestVectorSize,
uint32_t dex_pc = kNoDexPc) {
+ size_t num_of_elements = GetNumberOfElementsInVector(vector_size_in_bytes, packed_type);
SideEffects side_effects = SideEffects::ArrayWriteOfType(packed_type);
HVecStore* vec_store = new (GetAllocator()) HVecStore(
- GetAllocator(), base, index, value, packed_type, side_effects, vector_length, dex_pc);
+ GetAllocator(), base, index, value, packed_type, side_effects, num_of_elements, dex_pc);
AddOrInsertInstruction(block, vec_store);
return vec_store;
}
+ HVecPredToBoolean* MakeVecPredToBoolean(HBasicBlock* block,
+ HInstruction* input,
+ HVecPredToBoolean::PCondKind pred_cond,
+ DataType::Type packed_type,
+ size_t vector_size_in_bytes = kDefaultTestVectorSize,
+ uint32_t dex_pc = kNoDexPc) {
+ size_t num_of_elements = GetNumberOfElementsInVector(vector_size_in_bytes, packed_type);
+ HVecPredToBoolean* vec_pred_to_boolean = new (GetAllocator()) HVecPredToBoolean(
+ GetAllocator(),
+ input,
+ pred_cond,
+ packed_type,
+ num_of_elements,
+ dex_pc);
+ AddOrInsertInstruction(block, vec_pred_to_boolean);
+ return vec_pred_to_boolean;
+ }
+
+ HVecPredWhile* MakeVecPredWhile(HBasicBlock* block,
+ HInstruction* left,
+ HInstruction* right,
+ HVecPredWhile::CondKind cond,
+ DataType::Type packed_type,
+ size_t vector_size_in_bytes = kDefaultTestVectorSize,
+ uint32_t dex_pc = kNoDexPc) {
+ size_t num_of_elements = GetNumberOfElementsInVector(vector_size_in_bytes, packed_type);
+ HVecPredWhile* vec_pred_while = new (GetAllocator()) HVecPredWhile(
+ GetAllocator(),
+ left,
+ right,
+ cond,
+ packed_type,
+ num_of_elements,
+ dex_pc);
+ AddOrInsertInstruction(block, vec_pred_while);
+ return vec_pred_while;
+ }
+
HInvokeStaticOrDirect* MakeInvokeStatic(HBasicBlock* block,
DataType::Type return_type,
const std::vector<HInstruction*>& args,
@@ -836,6 +875,10 @@ class OptimizingUnitTestHelper {
size_t class_idx_ = 42;
uint32_t method_idx_ = 100;
+ // The default size of vectors to use for tests, in bytes. 16 bytes (128 bits) is used as it is
+ // commonly the smallest size of vector used in vector extensions.
+ static constexpr size_t kDefaultTestVectorSize = 16;
+
ScopedNullHandle<mirror::Class> null_klass_;
};
diff --git a/simulator/code_simulator_arm64.cc b/simulator/code_simulator_arm64.cc
index 32ca005a76..08a5deae1f 100644
--- a/simulator/code_simulator_arm64.cc
+++ b/simulator/code_simulator_arm64.cc
@@ -47,6 +47,7 @@ CodeSimulatorArm64::CodeSimulatorArm64()
SimStack::Allocated stack = stack_builder.Allocate();
simulator_ = new Simulator(decoder_, stdout, std::move(stack));
+ simulator_->SetVectorLengthInBits(kArm64DefaultSVEVectorLength);
}
CodeSimulatorArm64::~CodeSimulatorArm64() {