diff options
Diffstat (limited to 'compiler/optimizing')
20 files changed, 864 insertions, 362 deletions
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index c3979f3dd1..ca71c32802 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -90,8 +90,9 @@ class HGraphBuilder : public ValueObject { static constexpr const char* kBuilderPassName = "builder"; - // The number of entries in a packed switch before we use a jump table. - static constexpr uint16_t kSmallSwitchThreshold = 5; + // The number of entries in a packed switch before we use a jump table or specified + // compare/jump series. + static constexpr uint16_t kSmallSwitchThreshold = 3; private: // Analyzes the dex instruction and adds HInstruction to the graph diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 3630dbec24..9fda83840c 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -59,7 +59,7 @@ static constexpr SRegister kFpuCalleeSaves[] = // S registers. Therefore there is no need to block it. static constexpr DRegister DTMP = D31; -static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; +static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value() @@ -6250,7 +6250,7 @@ void LocationsBuilderARM::VisitPackedSwitch(HPackedSwitch* switch_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - if (switch_instr->GetNumEntries() >= kPackedSwitchJumpTableThreshold && + if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold && codegen_->GetAssembler()->IsThumb()) { locations->AddTemp(Location::RequiresRegister()); // We need a temp for the table base. if (switch_instr->GetStartValue() != 0) { @@ -6266,12 +6266,30 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) Register value_reg = locations->InAt(0).AsRegister<Register>(); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - if (num_entries < kPackedSwitchJumpTableThreshold || !codegen_->GetAssembler()->IsThumb()) { + if (num_entries <= kPackedSwitchCompareJumpThreshold || !codegen_->GetAssembler()->IsThumb()) { // Create a series of compare/jumps. + Register temp_reg = IP; + // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store + // the immediate, because IP is used as the destination register. For the other + // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant, + // and they can be encoded in the instruction without making use of IP register. + __ AddConstantSetFlags(temp_reg, value_reg, -lower_bound); + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (uint32_t i = 0; i < num_entries; i++) { - GenerateCompareWithImmediate(value_reg, lower_bound + i); - __ b(codegen_->GetLabelOf(successors[i]), EQ); + // Jump to successors[0] if value == lower_bound. + __ b(codegen_->GetLabelOf(successors[0]), EQ); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ AddConstantSetFlags(temp_reg, temp_reg, -2); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ b(codegen_->GetLabelOf(successors[last_index + 1]), LO); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ b(codegen_->GetLabelOf(successors[last_index + 2]), EQ); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + GenerateCompareWithImmediate(temp_reg, 1); + __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ); } // And the default for any other value. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 451470f271..52058302be 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -71,10 +71,10 @@ using helpers::ARM64EncodableConstantOrRegister; using helpers::ArtVixlRegCodeCoherentForRegSet; static constexpr int kCurrentMethodStackOffset = 0; -// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump +// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump // table version generates 7 instructions and num_entries literals. Compare/jump sequence will // generates less code/data with a small num_entries. -static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; +static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; inline Condition ARM64Condition(IfCondition cond) { switch (cond) { @@ -546,7 +546,7 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { uint32_t num_entries = switch_instr_->GetNumEntries(); - DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold); + DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold); // We are about to use the assembler to place literals directly. Make sure we have enough // underlying code buffer and we have generated the jump table with right size. @@ -4582,20 +4582,29 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst // ranges and emit the tables only as required. static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction; - if (num_entries < kPackedSwitchJumpTableThreshold || + if (num_entries <= kPackedSwitchCompareJumpThreshold || // Current instruction id is an upper bound of the number of HIRs in the graph. GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) { // Create a series of compare/jumps. + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); + Register temp = temps.AcquireW(); + __ Subs(temp, value_reg, Operand(lower_bound)); + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (uint32_t i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - vixl::Label* succ = codegen_->GetLabelOf(successors[i]); - if (case_value == 0) { - __ Cbz(value_reg, succ); - } else { - __ Cmp(value_reg, Operand(case_value)); - __ B(eq, succ); - } + // Jump to successors[0] if value == lower_bound. + __ B(eq, codegen_->GetLabelOf(successors[0])); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ Subs(temp, temp, Operand(2)); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ B(lo, codegen_->GetLabelOf(successors[last_index + 1])); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ B(eq, codegen_->GetLabelOf(successors[last_index + 2])); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + __ Cmp(temp, Operand(1)); + __ B(eq, codegen_->GetLabelOf(successors[last_index + 1])); } // And the default for any other value. diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 5dc101b199..ae0f2c8935 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -4897,19 +4897,31 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr HBasicBlock* default_block = switch_instr->GetDefaultBlock(); // Create a set of compare/jumps. - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int32_t i = 0; i < num_entries; ++i) { - int32_t case_value = lower_bound + i; - MipsLabel* successor_label = codegen_->GetLabelOf(successors[i]); - if (case_value == 0) { - __ Beqz(value_reg, successor_label); - } else { - __ LoadConst32(TMP, case_value); - __ Beq(value_reg, TMP, successor_label); - } - } + Register temp_reg = TMP; + __ Addiu32(temp_reg, value_reg, -lower_bound); + // Jump to default if index is negative + // Note: We don't check the case that index is positive while value < lower_bound, because in + // this case, index >= num_entries must be true. So that we can save one branch instruction. + __ Bltz(temp_reg, codegen_->GetLabelOf(default_block)); - // Insert the default branch for every other value. + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + // Jump to successors[0] if value == lower_bound. + __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0])); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ Addiu(temp_reg, temp_reg, -2); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ Bltz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 2])); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + __ Addiu(temp_reg, temp_reg, -1); + __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); + } + + // And the default for any other value. if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { __ B(codegen_->GetLabelOf(default_block)); } diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 99f58dd2c5..1e428a06e1 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -1752,11 +1752,7 @@ void InstructionCodeGeneratorMIPS64::VisitClinitCheck(HClinitCheck* check) { void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { Primitive::Type in_type = compare->InputAt(0)->GetType(); - LocationSummary::CallKind call_kind = Primitive::IsFloatingPointType(in_type) - ? LocationSummary::kCall - : LocationSummary::kNoCall; - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, call_kind); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare); switch (in_type) { case Primitive::kPrimLong: @@ -1766,13 +1762,11 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { break; case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - } default: LOG(FATAL) << "Unexpected type for compare operation " << in_type; @@ -1781,14 +1775,15 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { LocationSummary* locations = instruction->GetLocations(); + GpuRegister res = locations->Out().AsRegister<GpuRegister>(); Primitive::Type in_type = instruction->InputAt(0)->GetType(); + bool gt_bias = instruction->IsGtBias(); // 0 if: left == right // 1 if: left > right // -1 if: left < right switch (in_type) { case Primitive::kPrimLong: { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); Location rhs_location = locations->InAt(1); bool use_imm = rhs_location.IsConstant(); @@ -1803,35 +1798,52 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { rhs = rhs_location.AsRegister<GpuRegister>(); } __ Slt(TMP, lhs, rhs); - __ Slt(dst, rhs, lhs); - __ Subu(dst, dst, TMP); + __ Slt(res, rhs, lhs); + __ Subu(res, res, TMP); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - int32_t entry_point_offset; - if (in_type == Primitive::kPrimFloat) { - entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgFloat) - : QUICK_ENTRY_POINT(pCmplFloat); + case Primitive::kPrimFloat: { + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + Mips64Label done; + __ CmpEqS(FTMP, lhs, rhs); + __ LoadConst32(res, 0); + __ Bc1nez(FTMP, &done); + if (gt_bias) { + __ CmpLtS(FTMP, lhs, rhs); + __ LoadConst32(res, -1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, 1); } else { - entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgDouble) - : QUICK_ENTRY_POINT(pCmplDouble); + __ CmpLtS(FTMP, rhs, lhs); + __ LoadConst32(res, 1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, -1); } - codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr); - if (in_type == Primitive::kPrimFloat) { - if (instruction->IsGtBias()) { - CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>(); - } else { - CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>(); - } + __ Bind(&done); + break; + } + + case Primitive::kPrimDouble: { + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + Mips64Label done; + __ CmpEqD(FTMP, lhs, rhs); + __ LoadConst32(res, 0); + __ Bc1nez(FTMP, &done); + if (gt_bias) { + __ CmpLtD(FTMP, lhs, rhs); + __ LoadConst32(res, -1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, 1); } else { - if (instruction->IsGtBias()) { - CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>(); - } else { - CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>(); - } + __ CmpLtD(FTMP, rhs, lhs); + __ LoadConst32(res, 1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, -1); } + __ Bind(&done); break; } @@ -1842,8 +1854,19 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { void LocationsBuilderMIPS64::VisitCondition(HCondition* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + switch (instruction->InputAt(0)->GetType()) { + default: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + break; + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + break; + } if (instruction->NeedsMaterialization()) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } @@ -1854,129 +1877,42 @@ void InstructionCodeGeneratorMIPS64::VisitCondition(HCondition* instruction) { return; } - // TODO: generalize to long - DCHECK_NE(instruction->InputAt(0)->GetType(), Primitive::kPrimLong); - + Primitive::Type type = instruction->InputAt(0)->GetType(); LocationSummary* locations = instruction->GetLocations(); - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - Location rhs_location = locations->InAt(1); + Mips64Label true_label; - GpuRegister rhs_reg = ZERO; - int64_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } else { - rhs_reg = rhs_location.AsRegister<GpuRegister>(); - } - - IfCondition if_cond = instruction->GetCondition(); - - switch (if_cond) { - case kCondEQ: - case kCondNE: - if (use_imm && IsUint<16>(rhs_imm)) { - __ Xori(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Xor(dst, lhs, rhs_reg); - } - if (if_cond == kCondEQ) { - __ Sltiu(dst, dst, 1); - } else { - __ Sltu(dst, ZERO, dst); - } - break; + switch (type) { + default: + // Integer case. + GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ false, locations); + return; + case Primitive::kPrimLong: + GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ true, locations); + return; - case kCondLT: - case kCondGE: - if (use_imm && IsInt<16>(rhs_imm)) { - __ Slti(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Slt(dst, lhs, rhs_reg); - } - if (if_cond == kCondGE) { - // Simulate lhs >= rhs via !(lhs < rhs) since there's - // only the slt instruction but no sge. - __ Xori(dst, dst, 1); - } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + // TODO: don't use branches. + GenerateFpCompareAndBranch(instruction->GetCondition(), + instruction->IsGtBias(), + type, + locations, + &true_label); break; + } - case kCondLE: - case kCondGT: - if (use_imm && IsInt<16>(rhs_imm + 1)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - __ Slti(dst, lhs, rhs_imm + 1); - if (if_cond == kCondGT) { - // Simulate lhs > rhs via !(lhs <= rhs) since there's - // only the slti instruction but no sgti. - __ Xori(dst, dst, 1); - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Slt(dst, rhs_reg, lhs); - if (if_cond == kCondLE) { - // Simulate lhs <= rhs via !(rhs < lhs) since there's - // only the slt instruction but no sle. - __ Xori(dst, dst, 1); - } - } - break; + // Convert the branches into the result. + Mips64Label done; - case kCondB: - case kCondAE: - if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7fff) { - __ Sltiu(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Sltu(dst, lhs, rhs_reg); - } - if (if_cond == kCondAE) { - // Simulate lhs >= rhs via !(lhs < rhs) since there's - // only the sltu instruction but no sgeu. - __ Xori(dst, dst, 1); - } - break; + // False case: result = 0. + __ LoadConst32(dst, 0); + __ Bc(&done); - case kCondBE: - case kCondA: - if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7ffe) { - // Simulate lhs <= rhs via lhs < rhs + 1. - __ Sltiu(dst, lhs, rhs_imm + 1); - if (if_cond == kCondA) { - // Simulate lhs > rhs via !(lhs <= rhs) since there's - // only the sltiu instruction but no sgtiu. - __ Xori(dst, dst, 1); - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Sltu(dst, rhs_reg, lhs); - if (if_cond == kCondBE) { - // Simulate lhs <= rhs via !(rhs < lhs) since there's - // only the sltu instruction but no sleu. - __ Xori(dst, dst, 1); - } - } - break; - } + // True case: result = 1. + __ Bind(&true_label); + __ LoadConst32(dst, 1); + __ Bind(&done); } void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { @@ -2375,6 +2311,329 @@ void InstructionCodeGeneratorMIPS64::VisitTryBoundary(HTryBoundary* try_boundary } } +void InstructionCodeGeneratorMIPS64::GenerateIntLongCompare(IfCondition cond, + bool is64bit, + LocationSummary* locations) { + GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + Location rhs_location = locations->InAt(1); + GpuRegister rhs_reg = ZERO; + int64_t rhs_imm = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + if (is64bit) { + rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()); + } else { + rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); + } + } else { + rhs_reg = rhs_location.AsRegister<GpuRegister>(); + } + int64_t rhs_imm_plus_one = rhs_imm + UINT64_C(1); + + switch (cond) { + case kCondEQ: + case kCondNE: + if (use_imm && IsUint<16>(rhs_imm)) { + __ Xori(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Xor(dst, lhs, rhs_reg); + } + if (cond == kCondEQ) { + __ Sltiu(dst, dst, 1); + } else { + __ Sltu(dst, ZERO, dst); + } + break; + + case kCondLT: + case kCondGE: + if (use_imm && IsInt<16>(rhs_imm)) { + __ Slti(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Slt(dst, lhs, rhs_reg); + } + if (cond == kCondGE) { + // Simulate lhs >= rhs via !(lhs < rhs) since there's + // only the slt instruction but no sge. + __ Xori(dst, dst, 1); + } + break; + + case kCondLE: + case kCondGT: + if (use_imm && IsInt<16>(rhs_imm_plus_one)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + __ Slti(dst, lhs, rhs_imm_plus_one); + if (cond == kCondGT) { + // Simulate lhs > rhs via !(lhs <= rhs) since there's + // only the slti instruction but no sgti. + __ Xori(dst, dst, 1); + } + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Slt(dst, rhs_reg, lhs); + if (cond == kCondLE) { + // Simulate lhs <= rhs via !(rhs < lhs) since there's + // only the slt instruction but no sle. + __ Xori(dst, dst, 1); + } + } + break; + + case kCondB: + case kCondAE: + if (use_imm && IsInt<16>(rhs_imm)) { + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. + __ Sltiu(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Sltu(dst, lhs, rhs_reg); + } + if (cond == kCondAE) { + // Simulate lhs >= rhs via !(lhs < rhs) since there's + // only the sltu instruction but no sgeu. + __ Xori(dst, dst, 1); + } + break; + + case kCondBE: + case kCondA: + if (use_imm && (rhs_imm_plus_one != 0) && IsInt<16>(rhs_imm_plus_one)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + // Note that this only works if rhs + 1 does not overflow + // to 0, hence the check above. + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. + __ Sltiu(dst, lhs, rhs_imm_plus_one); + if (cond == kCondA) { + // Simulate lhs > rhs via !(lhs <= rhs) since there's + // only the sltiu instruction but no sgtiu. + __ Xori(dst, dst, 1); + } + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Sltu(dst, rhs_reg, lhs); + if (cond == kCondBE) { + // Simulate lhs <= rhs via !(rhs < lhs) since there's + // only the sltu instruction but no sleu. + __ Xori(dst, dst, 1); + } + } + break; + } +} + +void InstructionCodeGeneratorMIPS64::GenerateIntLongCompareAndBranch(IfCondition cond, + bool is64bit, + LocationSummary* locations, + Mips64Label* label) { + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + Location rhs_location = locations->InAt(1); + GpuRegister rhs_reg = ZERO; + int64_t rhs_imm = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + if (is64bit) { + rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()); + } else { + rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); + } + } else { + rhs_reg = rhs_location.AsRegister<GpuRegister>(); + } + + if (use_imm && rhs_imm == 0) { + switch (cond) { + case kCondEQ: + case kCondBE: // <= 0 if zero + __ Beqzc(lhs, label); + break; + case kCondNE: + case kCondA: // > 0 if non-zero + __ Bnezc(lhs, label); + break; + case kCondLT: + __ Bltzc(lhs, label); + break; + case kCondGE: + __ Bgezc(lhs, label); + break; + case kCondLE: + __ Blezc(lhs, label); + break; + case kCondGT: + __ Bgtzc(lhs, label); + break; + case kCondB: // always false + break; + case kCondAE: // always true + __ Bc(label); + break; + } + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + switch (cond) { + case kCondEQ: + __ Beqc(lhs, rhs_reg, label); + break; + case kCondNE: + __ Bnec(lhs, rhs_reg, label); + break; + case kCondLT: + __ Bltc(lhs, rhs_reg, label); + break; + case kCondGE: + __ Bgec(lhs, rhs_reg, label); + break; + case kCondLE: + __ Bgec(rhs_reg, lhs, label); + break; + case kCondGT: + __ Bltc(rhs_reg, lhs, label); + break; + case kCondB: + __ Bltuc(lhs, rhs_reg, label); + break; + case kCondAE: + __ Bgeuc(lhs, rhs_reg, label); + break; + case kCondBE: + __ Bgeuc(rhs_reg, lhs, label); + break; + case kCondA: + __ Bltuc(rhs_reg, lhs, label); + break; + } + } +} + +void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* locations, + Mips64Label* label) { + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + if (type == Primitive::kPrimFloat) { + switch (cond) { + case kCondEQ: + __ CmpEqS(FTMP, lhs, rhs); + __ Bc1nez(FTMP, label); + break; + case kCondNE: + __ CmpEqS(FTMP, lhs, rhs); + __ Bc1eqz(FTMP, label); + break; + case kCondLT: + if (gt_bias) { + __ CmpLtS(FTMP, lhs, rhs); + } else { + __ CmpUltS(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondLE: + if (gt_bias) { + __ CmpLeS(FTMP, lhs, rhs); + } else { + __ CmpUleS(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGT: + if (gt_bias) { + __ CmpUltS(FTMP, rhs, lhs); + } else { + __ CmpLtS(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGE: + if (gt_bias) { + __ CmpUleS(FTMP, rhs, lhs); + } else { + __ CmpLeS(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + } + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + switch (cond) { + case kCondEQ: + __ CmpEqD(FTMP, lhs, rhs); + __ Bc1nez(FTMP, label); + break; + case kCondNE: + __ CmpEqD(FTMP, lhs, rhs); + __ Bc1eqz(FTMP, label); + break; + case kCondLT: + if (gt_bias) { + __ CmpLtD(FTMP, lhs, rhs); + } else { + __ CmpUltD(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondLE: + if (gt_bias) { + __ CmpLeD(FTMP, lhs, rhs); + } else { + __ CmpUleD(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGT: + if (gt_bias) { + __ CmpUltD(FTMP, rhs, lhs); + } else { + __ CmpLtD(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGE: + if (gt_bias) { + __ CmpUleD(FTMP, rhs, lhs); + } else { + __ CmpLeD(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + } + } +} + void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, Mips64Label* true_target, @@ -2420,97 +2679,27 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. HCondition* condition = cond->AsCondition(); + Primitive::Type type = condition->InputAt(0)->GetType(); + LocationSummary* locations = cond->GetLocations(); + IfCondition if_cond = condition->GetCondition(); + Mips64Label* branch_target = true_target; - GpuRegister lhs = condition->GetLocations()->InAt(0).AsRegister<GpuRegister>(); - Location rhs_location = condition->GetLocations()->InAt(1); - GpuRegister rhs_reg = ZERO; - int32_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } else { - rhs_reg = rhs_location.AsRegister<GpuRegister>(); - } - - IfCondition if_cond; - Mips64Label* non_fallthrough_target; if (true_target == nullptr) { if_cond = condition->GetOppositeCondition(); - non_fallthrough_target = false_target; - } else { - if_cond = condition->GetCondition(); - non_fallthrough_target = true_target; - } - - if (use_imm && rhs_imm == 0) { - switch (if_cond) { - case kCondEQ: - __ Beqzc(lhs, non_fallthrough_target); - break; - case kCondNE: - __ Bnezc(lhs, non_fallthrough_target); - break; - case kCondLT: - __ Bltzc(lhs, non_fallthrough_target); - break; - case kCondGE: - __ Bgezc(lhs, non_fallthrough_target); - break; - case kCondLE: - __ Blezc(lhs, non_fallthrough_target); - break; - case kCondGT: - __ Bgtzc(lhs, non_fallthrough_target); - break; - case kCondB: - break; // always false - case kCondBE: - __ Beqzc(lhs, non_fallthrough_target); // <= 0 if zero - break; - case kCondA: - __ Bnezc(lhs, non_fallthrough_target); // > 0 if non-zero - break; - case kCondAE: - __ Bc(non_fallthrough_target); // always true - break; - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - switch (if_cond) { - case kCondEQ: - __ Beqc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondNE: - __ Bnec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLT: - __ Bltc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondGE: - __ Bgec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLE: - __ Bgec(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondGT: - __ Bltc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondB: - __ Bltuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondAE: - __ Bgeuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondBE: - __ Bgeuc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondA: - __ Bltuc(rhs_reg, lhs, non_fallthrough_target); - break; - } + branch_target = false_target; + } + + switch (type) { + default: + GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ false, locations, branch_target); + break; + case Primitive::kPrimLong: + GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ true, locations, branch_target); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target); + break; } } @@ -3991,17 +4180,34 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>(); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - // Create a series of compare/jumps. + // Create a set of compare/jumps. + GpuRegister temp_reg = TMP; + if (IsInt<16>(-lower_bound)) { + __ Addiu(temp_reg, value_reg, -lower_bound); + } else { + __ LoadConst32(AT, -lower_bound); + __ Addu(temp_reg, value_reg, AT); + } + // Jump to default if index is negative + // Note: We don't check the case that index is positive while value < lower_bound, because in + // this case, index >= num_entries must be true. So that we can save one branch instruction. + __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block)); + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int32_t i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - Mips64Label* succ = codegen_->GetLabelOf(successors[i]); - if (case_value == 0) { - __ Beqzc(value_reg, succ); - } else { - __ LoadConst32(TMP, case_value); - __ Beqc(value_reg, TMP, succ); - } + // Jump to successors[0] if value == lower_bound. + __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0])); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ Addiu(temp_reg, temp_reg, -2); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ Bltzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 2])); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + __ Addiu(temp_reg, temp_reg, -1); + __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); } // And the default for any other value. diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 85e3a4a3ce..1593cec2a6 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -237,6 +237,16 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); + void GenerateIntLongCompare(IfCondition cond, bool is64bit, LocationSummary* locations); + void GenerateIntLongCompareAndBranch(IfCondition cond, + bool is64bit, + LocationSummary* locations, + Mips64Label* label); + void GenerateFpCompareAndBranch(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* locations, + Mips64Label* label); void HandleGoto(HInstruction* got, HBasicBlock* successor); Mips64Assembler* const assembler_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index bc3256ec8c..7a5b8dbe46 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -42,7 +42,6 @@ namespace x86 { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = EAX; - static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI }; static constexpr int kC2ConditionMask = 0x400; @@ -4157,7 +4156,7 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { */ switch (kind) { case MemBarrierKind::kAnyAny: { - __ mfence(); + MemoryFence(); break; } case MemBarrierKind::kAnyStore: @@ -6752,31 +6751,67 @@ void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { locations->SetInAt(0, Location::RequiresRegister()); } -void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { - int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); - LocationSummary* locations = switch_instr->GetLocations(); - Register value_reg = locations->InAt(0).AsRegister<Register>(); - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); +void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block) { + // Figure out the correct compare values and jump conditions. + // Handle the first compare/branch as a special case because it might + // jump to the default case. + DCHECK_GT(num_entries, 2u); + Condition first_condition; + uint32_t index; + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); + if (lower_bound != 0) { + first_condition = kLess; + __ cmpl(value_reg, Immediate(lower_bound)); + __ j(first_condition, codegen_->GetLabelOf(default_block)); + __ j(kEqual, codegen_->GetLabelOf(successors[0])); - // Create a series of compare/jumps. - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - if (case_value == 0) { - __ testl(value_reg, value_reg); - } else { - __ cmpl(value_reg, Immediate(case_value)); - } - __ j(kEqual, codegen_->GetLabelOf(successors[i])); + index = 1; + } else { + // Handle all the compare/jumps below. + first_condition = kBelow; + index = 0; + } + + // Handle the rest of the compare/jumps. + for (; index + 1 < num_entries; index += 2) { + int32_t compare_to_value = lower_bound + index + 1; + __ cmpl(value_reg, Immediate(compare_to_value)); + // Jump to successors[index] if value < case_value[index]. + __ j(first_condition, codegen_->GetLabelOf(successors[index])); + // Jump to successors[index + 1] if value == case_value[index + 1]. + __ j(kEqual, codegen_->GetLabelOf(successors[index + 1])); + } + + if (index != num_entries) { + // There are an odd number of entries. Handle the last one. + DCHECK_EQ(index + 1, num_entries); + __ cmpl(value_reg, Immediate(lower_bound + index)); + __ j(kEqual, codegen_->GetLabelOf(successors[index])); } // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ jmp(codegen_->GetLabelOf(default_block)); + if (!codegen_->GoesToNextBlock(switch_block, default_block)) { + __ jmp(codegen_->GetLabelOf(default_block)); } } +void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + uint32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + Register value_reg = locations->InAt(0).AsRegister<Register>(); + + GenPackedSwitchWithCompares(value_reg, + lower_bound, + num_entries, + switch_instr->GetBlock(), + switch_instr->GetDefaultBlock()); +} + void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); @@ -6791,11 +6826,20 @@ void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); + uint32_t num_entries = switch_instr->GetNumEntries(); LocationSummary* locations = switch_instr->GetLocations(); Register value_reg = locations->InAt(0).AsRegister<Register>(); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + if (num_entries <= kPackedSwitchJumpTableThreshold) { + GenPackedSwitchWithCompares(value_reg, + lower_bound, + num_entries, + switch_instr->GetBlock(), + default_block); + return; + } + // Optimizing has a jump area. Register temp_reg = locations->GetTemp(0).AsRegister<Register>(); Register constant_area = locations->InAt(1).AsRegister<Register>(); @@ -6807,7 +6851,7 @@ void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_ } // Is the value in range? - DCHECK_GE(num_entries, 1); + DCHECK_GE(num_entries, 1u); __ cmpl(value_reg, Immediate(num_entries - 1)); __ j(kAbove, codegen_->GetLabelOf(default_block)); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 7c292fa103..f0ead0356d 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ +#include "arch/x86/instruction_set_features_x86.h" #include "code_generator.h" #include "dex/compiler_enums.h" #include "driver/compiler_options.h" @@ -195,6 +196,11 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { X86Assembler* GetAssembler() const { return assembler_; } + // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump + // table version generates 7 instructions and num_entries literals. Compare/jump sequence will + // generates less code/data with a small num_entries. + static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; + private: // Generate code for the given suspend check. If not null, `successor` // is the block to branch to if the suspend check is not needed, and after @@ -269,6 +275,11 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); void HandleGoto(HInstruction* got, HBasicBlock* successor); + void GenPackedSwitchWithCompares(Register value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; @@ -496,6 +507,19 @@ class CodeGeneratorX86 : public CodeGenerator { // artReadBarrierForRootSlow. void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); + // Ensure that prior stores complete to memory before subsequent loads. + // The locked add implementation will avoid serializing device memory, but will + // touch (but not change) the top of the stack. + // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores. + void MemoryFence(bool non_temporal = false) { + if (!non_temporal && isa_features_.PrefersLockedAddSynchronization()) { + assembler_.lock()->addl(Address(ESP, 0), Immediate(0)); + } else { + assembler_.mfence(); + } + } + + private: // Factored implementation of GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 92cef5f226..1e6d50610b 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -41,6 +41,10 @@ namespace x86_64 { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = RDI; +// The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump +// table version generates 7 instructions and num_entries literals. Compare/jump sequence will +// generates less code/data with a small num_entries. +static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 }; static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 }; @@ -4029,7 +4033,7 @@ void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { */ switch (kind) { case MemBarrierKind::kAnyAny: { - __ mfence(); + MemoryFence(); break; } case MemBarrierKind::kAnyStore: @@ -6331,11 +6335,58 @@ void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); + uint32_t num_entries = switch_instr->GetNumEntries(); LocationSummary* locations = switch_instr->GetLocations(); CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>(); CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>(); CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + // Should we generate smaller inline compare/jumps? + if (num_entries <= kPackedSwitchJumpTableThreshold) { + // Figure out the correct compare values and jump conditions. + // Handle the first compare/branch as a special case because it might + // jump to the default case. + DCHECK_GT(num_entries, 2u); + Condition first_condition; + uint32_t index; + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + if (lower_bound != 0) { + first_condition = kLess; + __ cmpl(value_reg_in, Immediate(lower_bound)); + __ j(first_condition, codegen_->GetLabelOf(default_block)); + __ j(kEqual, codegen_->GetLabelOf(successors[0])); + + index = 1; + } else { + // Handle all the compare/jumps below. + first_condition = kBelow; + index = 0; + } + + // Handle the rest of the compare/jumps. + for (; index + 1 < num_entries; index += 2) { + int32_t compare_to_value = lower_bound + index + 1; + __ cmpl(value_reg_in, Immediate(compare_to_value)); + // Jump to successors[index] if value < case_value[index]. + __ j(first_condition, codegen_->GetLabelOf(successors[index])); + // Jump to successors[index + 1] if value == case_value[index + 1]. + __ j(kEqual, codegen_->GetLabelOf(successors[index + 1])); + } + + if (index != num_entries) { + // There are an odd number of entries. Handle the last one. + DCHECK_EQ(index + 1, num_entries); + __ cmpl(value_reg_in, Immediate(lower_bound + index)); + __ j(kEqual, codegen_->GetLabelOf(successors[index])); + } + + // And the default for any other value. + if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + __ jmp(codegen_->GetLabelOf(default_block)); + } + return; + } // Remove the bias, if needed. Register value_reg_out = value_reg_in.AsRegister(); @@ -6346,7 +6397,6 @@ void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_ins CpuRegister value_reg(value_reg_out); // Is the value in range? - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); __ cmpl(value_reg, Immediate(num_entries - 1)); __ j(kAbove, codegen_->GetLabelOf(default_block)); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index dda9ea22d9..e5a487c761 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ +#include "arch/x86_64/instruction_set_features_x86_64.h" #include "code_generator.h" #include "dex/compiler_enums.h" #include "driver/compiler_options.h" @@ -479,6 +480,18 @@ class CodeGeneratorX86_64 : public CodeGenerator { int64_t v, HInstruction* instruction); + // Ensure that prior stores complete to memory before subsequent loads. + // The locked add implementation will avoid serializing device memory, but will + // touch (but not change) the top of the stack. The locked add should not be used for + // ordering non-temporal stores. + void MemoryFence(bool force_mfence = false) { + if (!force_mfence && isa_features_.PrefersLockedAddSynchronization()) { + assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0)); + } else { + assembler_.mfence(); + } + } + private: // Factored implementation of GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 67097deaeb..c504ded54c 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -777,13 +777,6 @@ void InstructionSimplifierVisitor::VisitLessThanOrEqual(HLessThanOrEqual* condit void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) { // Try to fold an HCompare into this HCondition. - // This simplification is currently supported on x86, x86_64, ARM and ARM64. - // TODO: Implement it for MIPS64. - InstructionSet instruction_set = GetGraph()->GetInstructionSet(); - if (instruction_set == kMips64) { - return; - } - HInstruction* left = condition->GetLeft(); HInstruction* right = condition->GetRight(); // We can only replace an HCondition which compares a Compare to 0. diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index e8181bbb06..4683aee603 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -825,8 +825,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat Label loop_head; __ Bind(&loop_head); + // TODO: When `type == Primitive::kPrimNot`, add a read barrier for + // the reference stored in the object before attempting the CAS, + // similar to the one in the art::Unsafe_compareAndSwapObject JNI + // implementation. + // + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderARM::VisitUnsafeCASObject). + DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); __ ldrex(tmp_lo, tmp_ptr); - // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo)); @@ -852,15 +859,17 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) { CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic does not always work when heap - // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it - // off temporarily as a quick fix. + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS below). // - // TODO(rpl): Fix it and turn it back on. + // Also, the UnsafeCASObject intrinsic does not always work when heap + // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it + // off temporarily as a quick fix (b/26204023). // - // TODO(rpl): Also, we should investigate whether we need a read - // barrier in the generated code. - if (kPoisonHeapReferences) { + // TODO(rpl): Fix these two issues and re-enable this intrinsic. + if (kEmitCompilerReadBarrier || kPoisonHeapReferences) { return; } diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 6b34daadf0..9f6863cf6e 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1031,10 +1031,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat } else { __ Dmb(InnerShareable, BarrierWrites); __ Bind(&loop_head); - __ Ldxr(tmp_value, MemOperand(tmp_ptr)); - // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? + // TODO: When `type == Primitive::kPrimNot`, add a read barrier for + // the reference stored in the object before attempting the CAS, + // similar to the one in the art::Unsafe_compareAndSwapObject JNI + // implementation. + // // Note that this code is not (yet) used when read barriers are // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). + DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); + __ Ldxr(tmp_value, MemOperand(tmp_ptr)); __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); __ Stxr(tmp_32, value, MemOperand(tmp_ptr)); @@ -1057,15 +1062,17 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) { CreateIntIntIntIntIntToInt(arena_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic does not always work when heap - // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it - // off temporarily as a quick fix. + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS below). // - // TODO(rpl): Fix it and turn it back on. + // Also, the UnsafeCASObject intrinsic does not always work when heap + // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it + // off temporarily as a quick fix (b/26204023). // - // TODO(rpl): Also, we should investigate whether we need a read - // barrier in the generated code. - if (kPoisonHeapReferences) { + // TODO(rpl): Fix these two issues and re-enable this intrinsic. + if (kEmitCompilerReadBarrier || kPoisonHeapReferences) { return; } diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 8aa7d9ff6f..8b45ea7c4f 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1299,6 +1299,8 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat if (type == Primitive::kPrimLong) { __ Lld(out, TMP); } else { + // Note: We will need a read barrier here, when read barrier + // support is added to the MIPS64 back end. __ Ll(out, TMP); } __ Dsubu(out, out, expected); // If we didn't get the 'expected' diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index fd454d8322..80190629ee 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -2005,7 +2005,7 @@ static void GenUnsafePut(LocationSummary* locations, } if (is_volatile) { - __ mfence(); + codegen->MemoryFence(); } if (type == Primitive::kPrimNot) { @@ -2085,6 +2085,17 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented. + // + // TODO(rpl): Implement a read barrier in GenCAS below and re-enable + // this intrinsic. + if (kEmitCompilerReadBarrier) { + return; + } + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); } @@ -2136,6 +2147,13 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code __ PoisonHeapReference(value); } + // TODO: Add a read barrier for the reference stored in the object + // before attempting the CAS, similar to the one in the + // art::Unsafe_compareAndSwapObject JNI implementation. + // + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject). + DCHECK(!kEmitCompilerReadBarrier); __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); // LOCK CMPXCHG has full barrier semantics, and we don't need @@ -2145,11 +2163,8 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code __ setb(kZero, out.AsRegister<Register>()); __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); - // In the case of the `UnsafeCASObject` intrinsic, accessing an - // object in the heap with LOCK CMPXCHG does not require a read - // barrier, as we do not keep a reference to this heap location. - // However, if heap poisoning is enabled, we need to unpoison the - // values that were poisoned earlier. + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. if (kPoisonHeapReferences) { if (base_equals_value) { // `value` has been moved to a temporary register, no need to diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index ce737e3f7e..aa1c109738 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2080,7 +2080,7 @@ static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool } if (is_volatile) { - __ mfence(); + codegen->MemoryFence(); } if (type == Primitive::kPrimNot) { @@ -2150,6 +2150,17 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented. + // + // TODO(rpl): Implement a read barrier in GenCAS below and re-enable + // this intrinsic. + if (kEmitCompilerReadBarrier) { + return; + } + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); } @@ -2200,6 +2211,13 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ PoisonHeapReference(CpuRegister(value_reg)); } + // TODO: Add a read barrier for the reference stored in the object + // before attempting the CAS, similar to the one in the + // art::Unsafe_compareAndSwapObject JNI implementation. + // + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject). + DCHECK(!kEmitCompilerReadBarrier); __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg)); // LOCK CMPXCHG has full barrier semantics, and we don't need @@ -2209,11 +2227,8 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ setcc(kZero, out); __ movzxb(out, out); - // In the case of the `UnsafeCASObject` intrinsic, accessing an - // object in the heap with LOCK CMPXCHG does not require a read - // barrier, as we do not keep a reference to this heap location. - // However, if heap poisoning is enabled, we need to unpoison the - // values that were poisoned earlier. + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. if (kPoisonHeapReferences) { if (base_equals_value) { // `value_reg` has been moved to a temporary register, no need diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index adde00464b..727f2bb717 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -119,10 +119,16 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { : ref_info_(ref_info), offset_(offset), index_(index), - declaring_class_def_index_(declaring_class_def_index) { + declaring_class_def_index_(declaring_class_def_index), + value_killed_by_loop_side_effects_(true) { DCHECK(ref_info != nullptr); DCHECK((offset == kInvalidFieldOffset && index != nullptr) || (offset != kInvalidFieldOffset && index == nullptr)); + if (ref_info->IsSingleton() && !IsArrayElement()) { + // Assume this location's value cannot be killed by loop side effects + // until proven otherwise. + value_killed_by_loop_side_effects_ = false; + } } ReferenceInfo* GetReferenceInfo() const { return ref_info_; } @@ -139,11 +145,22 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { return index_ != nullptr; } + bool IsValueKilledByLoopSideEffects() const { + return value_killed_by_loop_side_effects_; + } + + void SetValueKilledByLoopSideEffects(bool val) { + value_killed_by_loop_side_effects_ = val; + } + private: ReferenceInfo* const ref_info_; // reference for instance/static field or array access. const size_t offset_; // offset of static/instance field. HInstruction* const index_; // index of an array element. const int16_t declaring_class_def_index_; // declaring class's def's dex index. + bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop + // side effects because this location is stored + // into inside a loop. DISALLOW_COPY_AND_ASSIGN(HeapLocation); }; @@ -370,13 +387,13 @@ class HeapLocationCollector : public HGraphVisitor { return heap_locations_[heap_location_idx]; } - void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { + HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { if (field_info.IsVolatile()) { has_volatile_ = true; } const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); const size_t offset = field_info.GetFieldOffset().SizeValue(); - GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); + return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); } void VisitArrayAccess(HInstruction* array, HInstruction* index) { @@ -390,8 +407,11 @@ class HeapLocationCollector : public HGraphVisitor { } void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { - VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); has_heap_stores_ = true; + if (instruction->GetBlock()->GetLoopInformation() != nullptr) { + location->SetValueKilledByLoopSideEffects(true); + } } void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { @@ -565,23 +585,26 @@ class LSEVisitor : public HGraphVisitor { HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); ArenaVector<HInstruction*>& pre_header_heap_values = heap_values_for_[pre_header->GetBlockId()]; + // Inherit the values from pre-header. + for (size_t i = 0; i < heap_values.size(); i++) { + heap_values[i] = pre_header_heap_values[i]; + } + // We do a single pass in reverse post order. For loops, use the side effects as a hint // to see if the heap values should be killed. if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) { - for (size_t i = 0; i < pre_header_heap_values.size(); i++) { - // heap value is killed by loop side effects, need to keep the last store. - KeepIfIsStore(pre_header_heap_values[i]); - } - if (kIsDebugBuild) { - // heap_values should all be kUnknownHeapValue that it is inited with. - for (size_t i = 0; i < heap_values.size(); i++) { - DCHECK_EQ(heap_values[i], kUnknownHeapValue); - } - } - } else { - // Inherit the values from pre-header. for (size_t i = 0; i < heap_values.size(); i++) { - heap_values[i] = pre_header_heap_values[i]; + HeapLocation* location = heap_location_collector_.GetHeapLocation(i); + ReferenceInfo* ref_info = location->GetReferenceInfo(); + if (!ref_info->IsSingleton() || location->IsValueKilledByLoopSideEffects()) { + // heap value is killed by loop side effects (stored into directly, or due to + // aliasing). + KeepIfIsStore(pre_header_heap_values[i]); + heap_values[i] = kUnknownHeapValue; + } else { + // A singleton's field that's not stored into inside a loop is invariant throughout + // the loop. + } } } } @@ -655,6 +678,16 @@ class LSEVisitor : public HGraphVisitor { } } + static bool IsIntFloatAlias(Primitive::Type type1, Primitive::Type type2) { + return (type1 == Primitive::kPrimFloat && type2 == Primitive::kPrimInt) || + (type2 == Primitive::kPrimFloat && type1 == Primitive::kPrimInt); + } + + static bool IsLongDoubleAlias(Primitive::Type type1, Primitive::Type type2) { + return (type1 == Primitive::kPrimDouble && type2 == Primitive::kPrimLong) || + (type2 == Primitive::kPrimDouble && type1 == Primitive::kPrimLong); + } + void VisitGetLocation(HInstruction* instruction, HInstruction* ref, size_t offset, @@ -686,7 +719,8 @@ class LSEVisitor : public HGraphVisitor { if ((heap_value != kUnknownHeapValue) && // Keep the load due to possible I/F, J/D array aliasing. // See b/22538329 for details. - (heap_value->GetType() == instruction->GetType())) { + !IsIntFloatAlias(heap_value->GetType(), instruction->GetType()) && + !IsLongDoubleAlias(heap_value->GetType(), instruction->GetType())) { removed_loads_.push_back(instruction); substitute_instructions_for_loads_.push_back(heap_value); TryRemovingNullCheck(instruction); @@ -751,6 +785,9 @@ class LSEVisitor : public HGraphVisitor { if (loop_info != nullptr) { // instruction is a store in the loop so the loop must does write. DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); + // If it's a singleton, IsValueKilledByLoopSideEffects() must be true. + DCHECK(!ref_info->IsSingleton() || + heap_location_collector_.GetHeapLocation(idx)->IsValueKilledByLoopSideEffects()); if (loop_info->IsDefinedOutOfTheLoop(original_ref)) { DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader())); diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 30bcf19c64..176c50ce21 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -169,7 +169,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // If `other_move` was swapped, we iterate again to find a new // potential cycle. required_swap = nullptr; - i = 0; + i = -1; } else if (required_swap != nullptr) { // A move is required to swap. We walk back the cycle to find the // move by just returning from this `PerforrmMove`. diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 46e6f3e5d0..5e8fe37669 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -609,4 +609,36 @@ TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) { } } +TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves2) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + { + TypeParam resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterLocation(0), + Location::RegisterLocation(3), + Primitive::kPrimInt, + nullptr); + moves->AddMove( + Location::RegisterPairLocation(2, 3), + Location::RegisterPairLocation(0, 1), + Primitive::kPrimLong, + nullptr); + moves->AddMove( + Location::RegisterLocation(7), + Location::RegisterLocation(2), + Primitive::kPrimInt, + nullptr); + resolver.EmitNativeCode(moves); + if (TestFixture::has_swap) { + ASSERT_STREQ("(2,3 <-> 0,1) (2 -> 3) (7 -> 2)", resolver.GetMessage().c_str()); + } else { + ASSERT_STREQ("(2,3 -> T0,T1) (0 -> 3) (T0,T1 -> 0,1) (7 -> 2)", + resolver.GetMessage().c_str()); + } + } +} + } // namespace art diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index b383f1e1ad..a385448104 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -15,6 +15,7 @@ */ #include "pc_relative_fixups_x86.h" +#include "code_generator_x86.h" namespace art { namespace x86 { @@ -79,6 +80,10 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { + if (switch_insn->GetNumEntries() <= + InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) { + return; + } // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to // address the constant area. InitializePCRelativeBasePointer(); |