summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Artem Serov <artem.serov@arm.com> 2024-10-07 13:48:54 +0100
committer Treehugger Robot <android-test-infra-autosubmit@system.gserviceaccount.com> 2024-10-15 09:27:57 +0000
commitb544a1017753bfef7b91a2ea3024ffbde6cd5bb9 (patch)
tree0b54d507a49e9980337893949ed51e681b82fc83
parentd3af812dd1c95e9f87c55cfa996354a115c08414 (diff)
Arm64: Fix PackedSwitch codegen for large methods.
This patch fixes a bug in arm64 PackedSwitch code generation for very large methods where we exceeded the range of Adr instruction - jump tables were emited in the very end of the method. Instead we now emit the jump table in-place as part of the PackedSwitch visitor - in the same way how it is done in arm32 backend. This patch also removes an incorrect assumption that the size of a method has a linear dependency on the number of its HIR instructions. This was used to choose whether to emit a jump table for a PackedSwitch. Test: art/test.py --target --host --optimizing Test: art/test.py --gtest art_compiler_tests Change-Id: I0795811a6408a25021879ab6be9e23ef5f1f50e4
-rw-r--r--compiler/optimizing/code_generator_arm64.cc51
-rw-r--r--compiler/optimizing/code_generator_arm64.h22
-rw-r--r--compiler/optimizing/codegen_test.cc67
-rw-r--r--compiler/optimizing/optimizing_unit_test.h2
4 files changed, 117 insertions, 25 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 42d955ef9e..d56146eb0e 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -554,11 +554,19 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
// We are about to use the assembler to place literals directly. Make sure we have enough
// underlying code buffer and we have generated the jump table with right size.
- EmissionCheckScope scope(codegen->GetVIXLAssembler(),
+ ExactAssemblyScope scope(codegen->GetVIXLAssembler(),
num_entries * sizeof(int32_t),
CodeBufferCheckScope::kExactSize);
+ codegen->GetVIXLAssembler()->bind(&table_start_);
+ for (uint32_t i = 0; i < num_entries; i++) {
+ codegen->GetVIXLAssembler()->place(jump_targets_[i].get());
+ }
+}
+
+void JumpTableARM64::FixTable(CodeGeneratorARM64* codegen) {
+ uint32_t num_entries = switch_instr_->GetNumEntries();
+ DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
- __ Bind(&table_start_);
const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
for (uint32_t i = 0; i < num_entries; i++) {
vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
@@ -566,8 +574,7 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
- Literal<int32_t> literal(jump_offset);
- __ place(&literal);
+ jump_targets_[i].get()->UpdateValue(jump_offset, codegen->GetVIXLAssembler());
}
}
@@ -1072,14 +1079,14 @@ size_t CodeGeneratorARM64::GetSIMDRegisterWidth() const {
#define __ GetVIXLAssembler()->
-void CodeGeneratorARM64::EmitJumpTables() {
+void CodeGeneratorARM64::FixJumpTables() {
for (auto&& jump_table : jump_tables_) {
- jump_table->EmitTable(this);
+ jump_table->FixTable(this);
}
}
void CodeGeneratorARM64::Finalize() {
- EmitJumpTables();
+ FixJumpTables();
// Emit JIT baker read barrier slow paths.
DCHECK(GetCompilerOptions().IsJitCompiler() || jit_baker_read_barrier_slow_paths_.empty());
@@ -6709,17 +6716,7 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst
Register value_reg = InputRegisterAt(switch_instr, 0);
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
- // Roughly set 16 as max average assemblies generated per HIR in a graph.
- static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
- // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
- // make sure we don't emit it if the target may run out of range.
- // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
- // ranges and emit the tables only as required.
- static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
-
- if (num_entries <= kPackedSwitchCompareJumpThreshold ||
- // Current instruction id is an upper bound of the number of HIRs in the graph.
- GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
+ if (num_entries <= kPackedSwitchCompareJumpThreshold) {
// Create a series of compare/jumps.
UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
Register temp = temps.AcquireW();
@@ -6771,15 +6768,25 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst
// immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
// register pressure.
Register table_base = temps.AcquireX();
+
+ const size_t jump_size = switch_instr->GetNumEntries() * sizeof(int32_t);
+ ExactAssemblyScope scope(codegen_->GetVIXLAssembler(),
+ kInstructionSize * 4 + jump_size,
+ CodeBufferCheckScope::kExactSize);
+
// Load jump offset from the table.
- __ Adr(table_base, jump_table->GetTableStartLabel());
+ // Note: the table start address is always in range as the table is emitted immediately
+ // after these 4 instructions.
+ __ adr(table_base, jump_table->GetTableStartLabel());
Register jump_offset = temp_w;
- __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
+ __ ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
// Jump to target block by branching to table_base(pc related) + offset.
Register target_address = table_base;
- __ Add(target_address, table_base, Operand(jump_offset, SXTW));
- __ Br(target_address);
+ __ add(target_address, table_base, Operand(jump_offset, SXTW));
+ __ br(target_address);
+
+ jump_table->EmitTable(codegen_);
}
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 9f226e9e63..c6427b2da3 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -183,17 +183,35 @@ class SlowPathCodeARM64 : public SlowPathCode {
class JumpTableARM64 : public DeletableArenaObject<kArenaAllocSwitchTable> {
public:
+ using VIXLInt32Literal = vixl::aarch64::Literal<int32_t>;
+
explicit JumpTableARM64(HPackedSwitch* switch_instr)
- : switch_instr_(switch_instr), table_start_() {}
+ : switch_instr_(switch_instr),
+ table_start_(),
+ jump_targets_(switch_instr->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
+ uint32_t num_entries = switch_instr_->GetNumEntries();
+ for (uint32_t i = 0; i < num_entries; i++) {
+ VIXLInt32Literal* lit = new VIXLInt32Literal(0);
+ jump_targets_.emplace_back(lit);
+ }
+ }
vixl::aarch64::Label* GetTableStartLabel() { return &table_start_; }
+ // Emits the jump table into the code buffer; jump target offsets are not yet known.
void EmitTable(CodeGeneratorARM64* codegen);
+ // Updates the offsets in the jump table, to be used when the jump targets basic blocks
+ // addresses are resolved.
+ void FixTable(CodeGeneratorARM64* codegen);
+
private:
HPackedSwitch* const switch_instr_;
vixl::aarch64::Label table_start_;
+ // Contains literals for the switch's jump targets.
+ ArenaVector<std::unique_ptr<VIXLInt32Literal>> jump_targets_;
+
DISALLOW_COPY_AND_ASSIGN(JumpTableARM64);
};
@@ -1144,7 +1162,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
vixl::aarch64::Label* adrp_label,
ArenaDeque<PcRelativePatchInfo>* patches);
- void EmitJumpTables();
+ void FixJumpTables();
template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 7365f0fb7f..14a419fcd8 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -73,6 +73,7 @@ class CodegenTest : public CommonCompilerTest, public OptimizingUnitTestHelper {
int64_t j,
DataType::Type type,
const CodegenTargetConfig target_config);
+ void TestPackedSwitch(const CodegenTargetConfig target_config);
};
void CodegenTest::TestCode(const std::vector<uint16_t>& data, bool has_result, int32_t expected) {
@@ -682,6 +683,72 @@ TEST_F(CodegenTest, ComparisonsLong) {
}
}
+// Tests a PackedSwitch in a very large HGraph; validates that the switch jump table is in
+// range for the PC-relative load in the codegen visitor.
+void CodegenTest::TestPackedSwitch(const CodegenTargetConfig target_config) {
+ HBasicBlock* return_block = InitEntryMainExitGraph();
+ constexpr DataType::Type data_type = DataType::Type::kInt32;
+
+ // A number of entries - we are interested to test jump table implementation.
+ constexpr size_t kNumSwitchEntries = 10;
+
+ // Number of jump targets (including a 'default' case).
+ constexpr size_t kNumBB = kNumSwitchEntries + 1;
+ // Some arbitrary value to be used as input.
+ constexpr int kInputValue = kNumBB - 4;
+
+ HInstruction* input = graph_->GetIntConstant(kInputValue);
+ HIntConstant* constant_1 = graph_->GetIntConstant(1);
+
+ HBasicBlock* switch_block = AddNewBlock();
+ entry_block_->ReplaceSuccessor(return_block, switch_block);
+
+ HPackedSwitch* hswitch = new (GetAllocator()) HPackedSwitch(0, kNumSwitchEntries, input);
+ switch_block->AddInstruction(hswitch);
+
+ std::vector<HInstruction*> phi_inputs {};
+
+ // Add switch jump target blocks.
+ for (int i = 0; i < kNumBB; i++) {
+ HBasicBlock* case_block = AddNewBlock();
+ case_block->AddPredecessor(switch_block);
+ case_block->AddSuccessor(return_block);
+
+ HIntConstant* case_value = graph_->GetIntConstant(i);
+ HAdd* add = MakeBinOp<HAdd>(case_block, data_type, input, case_value);
+ phi_inputs.emplace_back(add);
+
+ MakeGoto(case_block);
+ }
+
+ HPhi* phi = MakePhi(return_block, phi_inputs);
+ HInstruction* return_val = phi;
+
+ // Emit a huge number of HAdds - to simulate a very large HGraph.
+ constexpr int kNumOfAdds = 2 * 1024 * 1024;
+ for (int i = 0; i < kNumOfAdds; i++) {
+ return_val = MakeBinOp<HAdd>(return_block, data_type, return_val, constant_1);
+ }
+
+ MakeReturn(return_block, return_val);
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ std::unique_ptr<CompilerOptions> compiler_options =
+ CommonCompilerTest::CreateCompilerOptions(target_config.GetInstructionSet(), "default");
+ RunCode(target_config,
+ *compiler_options,
+ graph_,
+ [](HGraph*) {}, true, kNumOfAdds + 2 * kInputValue);
+}
+
+TEST_F(CodegenTest, PackedSwitchInHugeMethod) {
+ for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+ TestPackedSwitch(target_config);
+ }
+}
+
#ifdef ART_ENABLE_CODEGEN_arm
TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) {
std::unique_ptr<CompilerOptions> compiler_options =
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index abd1ffe03e..a7c3558c5f 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -382,7 +382,7 @@ class OptimizingUnitTestHelper {
pre_header->AddSuccessor(loop);
loop->AddSuccessor(loop_exit); // true successor
- loop->AddSuccessor(loop); // fakse successor
+ loop->AddSuccessor(loop); // false successor
MakeGoto(pre_header);