Revert "Revert "ART: Reduce the instructions generated by packed switch.""
This reverts commit b4c137630fd2226ad07dfd178ab15725374220f1.
The underlying issue was fixed by https://android-review.googlesource.com/188271 .
Bug: 26121945
Change-Id: I58b08eb1a9f0a5c861f8cda93522af64bcf63920
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index c3979f3..ca71c32 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -90,8 +90,9 @@
static constexpr const char* kBuilderPassName = "builder";
- // The number of entries in a packed switch before we use a jump table.
- static constexpr uint16_t kSmallSwitchThreshold = 5;
+ // The number of entries in a packed switch before we use a jump table or specified
+ // compare/jump series.
+ static constexpr uint16_t kSmallSwitchThreshold = 3;
private:
// Analyzes the dex instruction and adds HInstruction to the graph
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ac6b5e8..0a26786 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -59,7 +59,7 @@
// S registers. Therefore there is no need to block it.
static constexpr DRegister DTMP = D31;
-static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
@@ -6106,7 +6106,7 @@
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- if (switch_instr->GetNumEntries() >= kPackedSwitchJumpTableThreshold &&
+ if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
codegen_->GetAssembler()->IsThumb()) {
locations->AddTemp(Location::RequiresRegister()); // We need a temp for the table base.
if (switch_instr->GetStartValue() != 0) {
@@ -6122,12 +6122,30 @@
Register value_reg = locations->InAt(0).AsRegister<Register>();
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
- if (num_entries < kPackedSwitchJumpTableThreshold || !codegen_->GetAssembler()->IsThumb()) {
+ if (num_entries <= kPackedSwitchCompareJumpThreshold || !codegen_->GetAssembler()->IsThumb()) {
// Create a series of compare/jumps.
+ Register temp_reg = IP;
+ // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
+ // the immediate, because IP is used as the destination register. For the other
+ // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
+ // and they can be encoded in the instruction without making use of IP register.
+ __ AddConstantSetFlags(temp_reg, value_reg, -lower_bound);
+
const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- for (uint32_t i = 0; i < num_entries; i++) {
- GenerateCompareWithImmediate(value_reg, lower_bound + i);
- __ b(codegen_->GetLabelOf(successors[i]), EQ);
+ // Jump to successors[0] if value == lower_bound.
+ __ b(codegen_->GetLabelOf(successors[0]), EQ);
+ int32_t last_index = 0;
+ for (; num_entries - last_index > 2; last_index += 2) {
+ __ AddConstantSetFlags(temp_reg, temp_reg, -2);
+ // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+ __ b(codegen_->GetLabelOf(successors[last_index + 1]), LO);
+ // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+ __ b(codegen_->GetLabelOf(successors[last_index + 2]), EQ);
+ }
+ if (num_entries - last_index == 2) {
+ // The last missing case_value.
+ GenerateCompareWithImmediate(temp_reg, 1);
+ __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ);
}
// And the default for any other value.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 04acd9d..227f4be 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -71,10 +71,10 @@
using helpers::ArtVixlRegCodeCoherentForRegSet;
static constexpr int kCurrentMethodStackOffset = 0;
-// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump
+// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
// generates less code/data with a small num_entries.
-static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
inline Condition ARM64Condition(IfCondition cond) {
switch (cond) {
@@ -546,7 +546,7 @@
void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
uint32_t num_entries = switch_instr_->GetNumEntries();
- DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold);
+ DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
// We are about to use the assembler to place literals directly. Make sure we have enough
// underlying code buffer and we have generated the jump table with right size.
@@ -4558,20 +4558,29 @@
// ranges and emit the tables only as required.
static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
- if (num_entries < kPackedSwitchJumpTableThreshold ||
+ if (num_entries <= kPackedSwitchCompareJumpThreshold ||
// Current instruction id is an upper bound of the number of HIRs in the graph.
GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
// Create a series of compare/jumps.
+ UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+ Register temp = temps.AcquireW();
+ __ Subs(temp, value_reg, Operand(lower_bound));
+
const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- for (uint32_t i = 0; i < num_entries; i++) {
- int32_t case_value = lower_bound + i;
- vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
- if (case_value == 0) {
- __ Cbz(value_reg, succ);
- } else {
- __ Cmp(value_reg, Operand(case_value));
- __ B(eq, succ);
- }
+ // Jump to successors[0] if value == lower_bound.
+ __ B(eq, codegen_->GetLabelOf(successors[0]));
+ int32_t last_index = 0;
+ for (; num_entries - last_index > 2; last_index += 2) {
+ __ Subs(temp, temp, Operand(2));
+ // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+ __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
+ // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+ __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
+ }
+ if (num_entries - last_index == 2) {
+ // The last missing case_value.
+ __ Cmp(temp, Operand(1));
+ __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
}
// And the default for any other value.
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 9dc9167..f872bfe 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -4248,19 +4248,31 @@
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
// Create a set of compare/jumps.
+ Register temp_reg = TMP;
+ __ Addiu32(temp_reg, value_reg, -lower_bound);
+ // Jump to default if index is negative
+ // Note: We don't check the case that index is positive while value < lower_bound, because in
+ // this case, index >= num_entries must be true. So that we can save one branch instruction.
+ __ Bltz(temp_reg, codegen_->GetLabelOf(default_block));
+
const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- for (int32_t i = 0; i < num_entries; ++i) {
- int32_t case_value = lower_bound + i;
- MipsLabel* successor_label = codegen_->GetLabelOf(successors[i]);
- if (case_value == 0) {
- __ Beqz(value_reg, successor_label);
- } else {
- __ LoadConst32(TMP, case_value);
- __ Beq(value_reg, TMP, successor_label);
- }
+ // Jump to successors[0] if value == lower_bound.
+ __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0]));
+ int32_t last_index = 0;
+ for (; num_entries - last_index > 2; last_index += 2) {
+ __ Addiu(temp_reg, temp_reg, -2);
+ // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+ __ Bltz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+ // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+ __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
+ }
+ if (num_entries - last_index == 2) {
+ // The last missing case_value.
+ __ Addiu(temp_reg, temp_reg, -1);
+ __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
}
- // Insert the default branch for every other value.
+ // And the default for any other value.
if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
__ B(codegen_->GetLabelOf(default_block));
}
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index bc5eb31..78f5644 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3975,17 +3975,34 @@
GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>();
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
- // Create a series of compare/jumps.
+ // Create a set of compare/jumps.
+ GpuRegister temp_reg = TMP;
+ if (IsInt<16>(-lower_bound)) {
+ __ Addiu(temp_reg, value_reg, -lower_bound);
+ } else {
+ __ LoadConst32(AT, -lower_bound);
+ __ Addu(temp_reg, value_reg, AT);
+ }
+ // Jump to default if index is negative
+ // Note: We don't check the case that index is positive while value < lower_bound, because in
+ // this case, index >= num_entries must be true. So that we can save one branch instruction.
+ __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block));
+
const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- for (int32_t i = 0; i < num_entries; i++) {
- int32_t case_value = lower_bound + i;
- Mips64Label* succ = codegen_->GetLabelOf(successors[i]);
- if (case_value == 0) {
- __ Beqzc(value_reg, succ);
- } else {
- __ LoadConst32(TMP, case_value);
- __ Beqc(value_reg, TMP, succ);
- }
+ // Jump to successors[0] if value == lower_bound.
+ __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0]));
+ int32_t last_index = 0;
+ for (; num_entries - last_index > 2; last_index += 2) {
+ __ Addiu(temp_reg, temp_reg, -2);
+ // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+ __ Bltzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+ // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+ __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
+ }
+ if (num_entries - last_index == 2) {
+ // The last missing case_value.
+ __ Addiu(temp_reg, temp_reg, -1);
+ __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
}
// And the default for any other value.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 2fb87d3..19f03df 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -42,7 +42,6 @@
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kMethodRegisterArgument = EAX;
-
static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
static constexpr int kC2ConditionMask = 0x400;
@@ -6426,31 +6425,67 @@
locations->SetInAt(0, Location::RequiresRegister());
}
-void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
- int32_t lower_bound = switch_instr->GetStartValue();
- int32_t num_entries = switch_instr->GetNumEntries();
- LocationSummary* locations = switch_instr->GetLocations();
- Register value_reg = locations->InAt(0).AsRegister<Register>();
- HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
+ int32_t lower_bound,
+ uint32_t num_entries,
+ HBasicBlock* switch_block,
+ HBasicBlock* default_block) {
+ // Figure out the correct compare values and jump conditions.
+ // Handle the first compare/branch as a special case because it might
+ // jump to the default case.
+ DCHECK_GT(num_entries, 2u);
+ Condition first_condition;
+ uint32_t index;
+ const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
+ if (lower_bound != 0) {
+ first_condition = kLess;
+ __ cmpl(value_reg, Immediate(lower_bound));
+ __ j(first_condition, codegen_->GetLabelOf(default_block));
+ __ j(kEqual, codegen_->GetLabelOf(successors[0]));
- // Create a series of compare/jumps.
- const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- for (int i = 0; i < num_entries; i++) {
- int32_t case_value = lower_bound + i;
- if (case_value == 0) {
- __ testl(value_reg, value_reg);
- } else {
- __ cmpl(value_reg, Immediate(case_value));
- }
- __ j(kEqual, codegen_->GetLabelOf(successors[i]));
+ index = 1;
+ } else {
+ // Handle all the compare/jumps below.
+ first_condition = kBelow;
+ index = 0;
+ }
+
+ // Handle the rest of the compare/jumps.
+ for (; index + 1 < num_entries; index += 2) {
+ int32_t compare_to_value = lower_bound + index + 1;
+ __ cmpl(value_reg, Immediate(compare_to_value));
+ // Jump to successors[index] if value < case_value[index].
+ __ j(first_condition, codegen_->GetLabelOf(successors[index]));
+ // Jump to successors[index + 1] if value == case_value[index + 1].
+ __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
+ }
+
+ if (index != num_entries) {
+ // There are an odd number of entries. Handle the last one.
+ DCHECK_EQ(index + 1, num_entries);
+ __ cmpl(value_reg, Immediate(lower_bound + index));
+ __ j(kEqual, codegen_->GetLabelOf(successors[index]));
}
// And the default for any other value.
- if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
- __ jmp(codegen_->GetLabelOf(default_block));
+ if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
+ __ jmp(codegen_->GetLabelOf(default_block));
}
}
+void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+ int32_t lower_bound = switch_instr->GetStartValue();
+ uint32_t num_entries = switch_instr->GetNumEntries();
+ LocationSummary* locations = switch_instr->GetLocations();
+ Register value_reg = locations->InAt(0).AsRegister<Register>();
+
+ GenPackedSwitchWithCompares(value_reg,
+ lower_bound,
+ num_entries,
+ switch_instr->GetBlock(),
+ switch_instr->GetDefaultBlock());
+}
+
void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
@@ -6465,11 +6500,20 @@
void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
int32_t lower_bound = switch_instr->GetStartValue();
- int32_t num_entries = switch_instr->GetNumEntries();
+ uint32_t num_entries = switch_instr->GetNumEntries();
LocationSummary* locations = switch_instr->GetLocations();
Register value_reg = locations->InAt(0).AsRegister<Register>();
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+ if (num_entries <= kPackedSwitchJumpTableThreshold) {
+ GenPackedSwitchWithCompares(value_reg,
+ lower_bound,
+ num_entries,
+ switch_instr->GetBlock(),
+ default_block);
+ return;
+ }
+
// Optimizing has a jump area.
Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
Register constant_area = locations->InAt(1).AsRegister<Register>();
@@ -6481,7 +6525,7 @@
}
// Is the value in range?
- DCHECK_GE(num_entries, 1);
+ DCHECK_GE(num_entries, 1u);
__ cmpl(value_reg, Immediate(num_entries - 1));
__ j(kAbove, codegen_->GetLabelOf(default_block));
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 064051c..f9403a6 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -195,6 +195,11 @@
X86Assembler* GetAssembler() const { return assembler_; }
+ // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
+ // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+ // generates less code/data with a small num_entries.
+ static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
+
private:
// Generate code for the given suspend check. If not null, `successor`
// is the block to branch to if the suspend check is not needed, and after
@@ -236,6 +241,11 @@
void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
+ void GenPackedSwitchWithCompares(Register value_reg,
+ int32_t lower_bound,
+ uint32_t num_entries,
+ HBasicBlock* switch_block,
+ HBasicBlock* default_block);
X86Assembler* const assembler_;
CodeGeneratorX86* const codegen_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 4618be9..44a51ea 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -41,6 +41,10 @@
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kMethodRegisterArgument = RDI;
+// The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
+// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+// generates less code/data with a small num_entries.
+static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
@@ -6021,11 +6025,58 @@
void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
int32_t lower_bound = switch_instr->GetStartValue();
- int32_t num_entries = switch_instr->GetNumEntries();
+ uint32_t num_entries = switch_instr->GetNumEntries();
LocationSummary* locations = switch_instr->GetLocations();
CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
+ HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+ // Should we generate smaller inline compare/jumps?
+ if (num_entries <= kPackedSwitchJumpTableThreshold) {
+ // Figure out the correct compare values and jump conditions.
+ // Handle the first compare/branch as a special case because it might
+ // jump to the default case.
+ DCHECK_GT(num_entries, 2u);
+ Condition first_condition;
+ uint32_t index;
+ const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+ if (lower_bound != 0) {
+ first_condition = kLess;
+ __ cmpl(value_reg_in, Immediate(lower_bound));
+ __ j(first_condition, codegen_->GetLabelOf(default_block));
+ __ j(kEqual, codegen_->GetLabelOf(successors[0]));
+
+ index = 1;
+ } else {
+ // Handle all the compare/jumps below.
+ first_condition = kBelow;
+ index = 0;
+ }
+
+ // Handle the rest of the compare/jumps.
+ for (; index + 1 < num_entries; index += 2) {
+ int32_t compare_to_value = lower_bound + index + 1;
+ __ cmpl(value_reg_in, Immediate(compare_to_value));
+ // Jump to successors[index] if value < case_value[index].
+ __ j(first_condition, codegen_->GetLabelOf(successors[index]));
+ // Jump to successors[index + 1] if value == case_value[index + 1].
+ __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
+ }
+
+ if (index != num_entries) {
+ // There are an odd number of entries. Handle the last one.
+ DCHECK_EQ(index + 1, num_entries);
+ __ cmpl(value_reg_in, Immediate(lower_bound + index));
+ __ j(kEqual, codegen_->GetLabelOf(successors[index]));
+ }
+
+ // And the default for any other value.
+ if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+ __ jmp(codegen_->GetLabelOf(default_block));
+ }
+ return;
+ }
// Remove the bias, if needed.
Register value_reg_out = value_reg_in.AsRegister();
@@ -6036,7 +6087,6 @@
CpuRegister value_reg(value_reg_out);
// Is the value in range?
- HBasicBlock* default_block = switch_instr->GetDefaultBlock();
__ cmpl(value_reg, Immediate(num_entries - 1));
__ j(kAbove, codegen_->GetLabelOf(default_block));
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index b383f1e..a385448 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -15,6 +15,7 @@
*/
#include "pc_relative_fixups_x86.h"
+#include "code_generator_x86.h"
namespace art {
namespace x86 {
@@ -79,6 +80,10 @@
}
void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
+ if (switch_insn->GetNumEntries() <=
+ InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) {
+ return;
+ }
// We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
// address the constant area.
InitializePCRelativeBasePointer();