diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/bounds_check_elimination.cc | 10 | ||||
| -rw-r--r-- | compiler/optimizing/builder.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/builder.h | 5 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator.cc | 14 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator.h | 9 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 30 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 35 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 228 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_mips.h | 4 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_mips64.cc | 37 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 86 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.h | 10 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 54 | ||||
| -rw-r--r-- | compiler/optimizing/induction_var_analysis.cc | 3 | ||||
| -rw-r--r-- | compiler/optimizing/load_store_elimination.cc | 5 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.cc | 45 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.h | 7 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 64 | ||||
| -rw-r--r-- | compiler/optimizing/pc_relative_fixups_x86.cc | 5 |
19 files changed, 464 insertions, 189 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 7dbfd7c58e..4c3f66aa4f 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1138,8 +1138,8 @@ class BCEVisitor : public HGraphVisitor { void VisitArrayGet(HArrayGet* array_get) OVERRIDE { if (!has_deoptimization_on_constant_subscripts_ && array_get->IsInLoop()) { HLoopInformation* loop = array_get->GetBlock()->GetLoopInformation(); - if (loop->IsLoopInvariant(array_get->InputAt(0), false) && - loop->IsLoopInvariant(array_get->InputAt(1), false)) { + if (loop->IsDefinedOutOfTheLoop(array_get->InputAt(0)) && + loop->IsDefinedOutOfTheLoop(array_get->InputAt(1))) { SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader()); if (!array_get->GetSideEffects().MayDependOn(loop_effects)) { HoistToPreheaderOrDeoptBlock(loop, array_get); @@ -1349,7 +1349,7 @@ class BCEVisitor : public HGraphVisitor { * by handling the null check under the hood of the array length operation. */ bool CanHandleLength(HLoopInformation* loop, HInstruction* length, bool needs_taken_test) { - if (loop->IsLoopInvariant(length, false)) { + if (loop->IsDefinedOutOfTheLoop(length)) { return true; } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) { if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) { @@ -1365,11 +1365,11 @@ class BCEVisitor : public HGraphVisitor { * by generating a deoptimization test. */ bool CanHandleNullCheck(HLoopInformation* loop, HInstruction* check, bool needs_taken_test) { - if (loop->IsLoopInvariant(check, false)) { + if (loop->IsDefinedOutOfTheLoop(check)) { return true; } else if (check->IsNullCheck() && check->GetBlock()->GetLoopInformation() == loop) { HInstruction* array = check->InputAt(0); - if (loop->IsLoopInvariant(array, false)) { + if (loop->IsDefinedOutOfTheLoop(array)) { // Generate: if (array == null) deoptimize; HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); HInstruction* cond = diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 8e75bdcdc9..b156d136ca 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -785,7 +785,7 @@ ArtMethod* HGraphBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_t ArtMethod* actual_method = compiling_class->GetSuperClass()->GetVTableEntry( vtable_index, class_linker->GetImagePointerSize()); if (actual_method != resolved_method && - !IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) { + !IsSameDexFile(*actual_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) { // TODO: The actual method could still be referenced in the current dex file, so we // could try locating it. // TODO: Remove the dex_file restriction. diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index c3979f3dd1..ca71c32802 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -90,8 +90,9 @@ class HGraphBuilder : public ValueObject { static constexpr const char* kBuilderPassName = "builder"; - // The number of entries in a packed switch before we use a jump table. - static constexpr uint16_t kSmallSwitchThreshold = 5; + // The number of entries in a packed switch before we use a jump table or specified + // compare/jump series. + static constexpr uint16_t kSmallSwitchThreshold = 3; private: // Analyzes the dex instruction and adds HInstruction to the graph diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 0baa0e30dc..53d3615a41 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -246,10 +246,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) InitLocationsBaseline(current); } DCHECK(CheckTypeConsistency(current)); - uintptr_t native_pc_begin = GetAssembler()->CodeSize(); current->Accept(instruction_visitor); - uintptr_t native_pc_end = GetAssembler()->CodeSize(); - RecordNativeDebugInfo(current->GetDexPc(), native_pc_begin, native_pc_end); } } @@ -926,17 +923,6 @@ void CodeGenerator::BuildStackMaps(MemoryRegion region) { stack_map_stream_.FillIn(region); } -void CodeGenerator::RecordNativeDebugInfo(uint32_t dex_pc, - uintptr_t native_pc_begin, - uintptr_t native_pc_end) { - if (compiler_options_.GetGenerateDebugInfo() && - dex_pc != kNoDexPc && - native_pc_begin != native_pc_end) { - src_map_.push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin), - static_cast<int32_t>(dex_pc)})); - } -} - void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path) { diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 114d97be94..eade05d7b6 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -269,8 +269,6 @@ class CodeGenerator { // Record native to dex mapping for a suspend point. Required by runtime. void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); - // Record additional native to dex mappings for native debugging/profiling tools. - void RecordNativeDebugInfo(uint32_t dex_pc, uintptr_t native_pc_begin, uintptr_t native_pc_end); bool CanMoveNullCheckToUser(HNullCheck* null_check); void MaybeRecordImplicitNullCheck(HInstruction* instruction); @@ -452,10 +450,6 @@ class CodeGenerator { // Copy the result of a call into the given target. virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0; - const ArenaVector<SrcMapElem>& GetSrcMappingTable() const { - return src_map_; - } - protected: // Method patch info used for recording locations of required linker patches and // target methods. The target method can be used for various purposes, whether for @@ -498,7 +492,6 @@ class CodeGenerator { stats_(stats), graph_(graph), compiler_options_(compiler_options), - src_map_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), current_slow_path_(nullptr), current_block_index_(0), @@ -616,8 +609,6 @@ class CodeGenerator { HGraph* const graph_; const CompilerOptions& compiler_options_; - // Native to dex_pc map used for native debugging/profiling tools. - ArenaVector<SrcMapElem> src_map_; ArenaVector<SlowPathCode*> slow_paths_; // The current slow path that we're generating code for. diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index ac6b5e823a..0a26786a87 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -59,7 +59,7 @@ static constexpr SRegister kFpuCalleeSaves[] = // S registers. Therefore there is no need to block it. static constexpr DRegister DTMP = D31; -static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; +static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value() @@ -6106,7 +6106,7 @@ void LocationsBuilderARM::VisitPackedSwitch(HPackedSwitch* switch_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - if (switch_instr->GetNumEntries() >= kPackedSwitchJumpTableThreshold && + if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold && codegen_->GetAssembler()->IsThumb()) { locations->AddTemp(Location::RequiresRegister()); // We need a temp for the table base. if (switch_instr->GetStartValue() != 0) { @@ -6122,12 +6122,30 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) Register value_reg = locations->InAt(0).AsRegister<Register>(); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - if (num_entries < kPackedSwitchJumpTableThreshold || !codegen_->GetAssembler()->IsThumb()) { + if (num_entries <= kPackedSwitchCompareJumpThreshold || !codegen_->GetAssembler()->IsThumb()) { // Create a series of compare/jumps. + Register temp_reg = IP; + // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store + // the immediate, because IP is used as the destination register. For the other + // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant, + // and they can be encoded in the instruction without making use of IP register. + __ AddConstantSetFlags(temp_reg, value_reg, -lower_bound); + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (uint32_t i = 0; i < num_entries; i++) { - GenerateCompareWithImmediate(value_reg, lower_bound + i); - __ b(codegen_->GetLabelOf(successors[i]), EQ); + // Jump to successors[0] if value == lower_bound. + __ b(codegen_->GetLabelOf(successors[0]), EQ); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ AddConstantSetFlags(temp_reg, temp_reg, -2); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ b(codegen_->GetLabelOf(successors[last_index + 1]), LO); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ b(codegen_->GetLabelOf(successors[last_index + 2]), EQ); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + GenerateCompareWithImmediate(temp_reg, 1); + __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ); } // And the default for any other value. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 04acd9d32c..227f4be642 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -71,10 +71,10 @@ using helpers::ARM64EncodableConstantOrRegister; using helpers::ArtVixlRegCodeCoherentForRegSet; static constexpr int kCurrentMethodStackOffset = 0; -// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump +// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump // table version generates 7 instructions and num_entries literals. Compare/jump sequence will // generates less code/data with a small num_entries. -static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; +static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; inline Condition ARM64Condition(IfCondition cond) { switch (cond) { @@ -546,7 +546,7 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { uint32_t num_entries = switch_instr_->GetNumEntries(); - DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold); + DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold); // We are about to use the assembler to place literals directly. Make sure we have enough // underlying code buffer and we have generated the jump table with right size. @@ -4558,20 +4558,29 @@ void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_inst // ranges and emit the tables only as required. static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction; - if (num_entries < kPackedSwitchJumpTableThreshold || + if (num_entries <= kPackedSwitchCompareJumpThreshold || // Current instruction id is an upper bound of the number of HIRs in the graph. GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) { // Create a series of compare/jumps. + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); + Register temp = temps.AcquireW(); + __ Subs(temp, value_reg, Operand(lower_bound)); + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (uint32_t i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - vixl::Label* succ = codegen_->GetLabelOf(successors[i]); - if (case_value == 0) { - __ Cbz(value_reg, succ); - } else { - __ Cmp(value_reg, Operand(case_value)); - __ B(eq, succ); - } + // Jump to successors[0] if value == lower_bound. + __ B(eq, codegen_->GetLabelOf(successors[0])); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ Subs(temp, temp, Operand(2)); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ B(lo, codegen_->GetLabelOf(successors[last_index + 1])); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ B(eq, codegen_->GetLabelOf(successors[last_index + 2])); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + __ Cmp(temp, Operand(1)); + __ B(eq, codegen_->GetLabelOf(successors[last_index + 1])); } // And the default for any other value. diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 9dc9167824..d092de9421 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -2218,6 +2218,171 @@ void InstructionCodeGeneratorMIPS::VisitCondition(HCondition* instruction) { } } +void InstructionCodeGeneratorMIPS::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + Register out = locations->Out().AsRegister<Register>(); + Register dividend = locations->InAt(0).AsRegister<Register>(); + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + DCHECK(imm == 1 || imm == -1); + + if (instruction->IsRem()) { + __ Move(out, ZERO); + } else { + if (imm == -1) { + __ Subu(out, ZERO, dividend); + } else if (out != dividend) { + __ Move(out, dividend); + } + } +} + +void InstructionCodeGeneratorMIPS::DivRemByPowerOfTwo(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + Register out = locations->Out().AsRegister<Register>(); + Register dividend = locations->InAt(0).AsRegister<Register>(); + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm)); + DCHECK(IsPowerOfTwo(abs_imm)); + int ctz_imm = CTZ(abs_imm); + + if (instruction->IsDiv()) { + if (ctz_imm == 1) { + // Fast path for division by +/-2, which is very common. + __ Srl(TMP, dividend, 31); + } else { + __ Sra(TMP, dividend, 31); + __ Srl(TMP, TMP, 32 - ctz_imm); + } + __ Addu(out, dividend, TMP); + __ Sra(out, out, ctz_imm); + if (imm < 0) { + __ Subu(out, ZERO, out); + } + } else { + if (ctz_imm == 1) { + // Fast path for modulo +/-2, which is very common. + __ Sra(TMP, dividend, 31); + __ Subu(out, dividend, TMP); + __ Andi(out, out, 1); + __ Addu(out, out, TMP); + } else { + __ Sra(TMP, dividend, 31); + __ Srl(TMP, TMP, 32 - ctz_imm); + __ Addu(out, dividend, TMP); + if (IsUint<16>(abs_imm - 1)) { + __ Andi(out, out, abs_imm - 1); + } else { + __ Sll(out, out, 32 - ctz_imm); + __ Srl(out, out, 32 - ctz_imm); + } + __ Subu(out, out, TMP); + } + } +} + +void InstructionCodeGeneratorMIPS::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + Register out = locations->Out().AsRegister<Register>(); + Register dividend = locations->InAt(0).AsRegister<Register>(); + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + + int64_t magic; + int shift; + CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + + __ LoadConst32(TMP, magic); + if (isR6) { + __ MuhR6(TMP, dividend, TMP); + } else { + __ MultR2(dividend, TMP); + __ Mfhi(TMP); + } + if (imm > 0 && magic < 0) { + __ Addu(TMP, TMP, dividend); + } else if (imm < 0 && magic > 0) { + __ Subu(TMP, TMP, dividend); + } + + if (shift != 0) { + __ Sra(TMP, TMP, shift); + } + + if (instruction->IsDiv()) { + __ Sra(out, TMP, 31); + __ Subu(out, TMP, out); + } else { + __ Sra(AT, TMP, 31); + __ Subu(AT, TMP, AT); + __ LoadConst32(TMP, imm); + if (isR6) { + __ MulR6(TMP, AT, TMP); + } else { + __ MulR2(TMP, AT, TMP); + } + __ Subu(out, dividend, TMP); + } +} + +void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt); + + LocationSummary* locations = instruction->GetLocations(); + Register out = locations->Out().AsRegister<Register>(); + Location second = locations->InAt(1); + + if (second.IsConstant()) { + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (IsPowerOfTwo(std::abs(imm))) { + DivRemByPowerOfTwo(instruction); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } + } else { + Register dividend = locations->InAt(0).AsRegister<Register>(); + Register divisor = second.AsRegister<Register>(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + if (instruction->IsDiv()) { + if (isR6) { + __ DivR6(out, dividend, divisor); + } else { + __ DivR2(out, dividend, divisor); + } + } else { + if (isR6) { + __ ModR6(out, dividend, divisor); + } else { + __ ModR2(out, dividend, divisor); + } + } + } +} + void LocationsBuilderMIPS::VisitDiv(HDiv* div) { Primitive::Type type = div->GetResultType(); LocationSummary::CallKind call_kind = (type == Primitive::kPrimLong) @@ -2229,7 +2394,7 @@ void LocationsBuilderMIPS::VisitDiv(HDiv* div) { switch (type) { case Primitive::kPrimInt: locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -2258,20 +2423,11 @@ void LocationsBuilderMIPS::VisitDiv(HDiv* div) { void InstructionCodeGeneratorMIPS::VisitDiv(HDiv* instruction) { Primitive::Type type = instruction->GetType(); LocationSummary* locations = instruction->GetLocations(); - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); switch (type) { - case Primitive::kPrimInt: { - Register dst = locations->Out().AsRegister<Register>(); - Register lhs = locations->InAt(0).AsRegister<Register>(); - Register rhs = locations->InAt(1).AsRegister<Register>(); - if (isR6) { - __ DivR6(dst, lhs, rhs); - } else { - __ DivR2(dst, lhs, rhs); - } + case Primitive::kPrimInt: + GenerateDivRemIntegral(instruction); break; - } case Primitive::kPrimLong: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), instruction, @@ -3666,7 +3822,7 @@ void LocationsBuilderMIPS::VisitRem(HRem* rem) { switch (type) { case Primitive::kPrimInt: locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -3696,21 +3852,11 @@ void LocationsBuilderMIPS::VisitRem(HRem* rem) { void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { Primitive::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); switch (type) { - case Primitive::kPrimInt: { - Register dst = locations->Out().AsRegister<Register>(); - Register lhs = locations->InAt(0).AsRegister<Register>(); - Register rhs = locations->InAt(1).AsRegister<Register>(); - if (isR6) { - __ ModR6(dst, lhs, rhs); - } else { - __ ModR2(dst, lhs, rhs); - } + case Primitive::kPrimInt: + GenerateDivRemIntegral(instruction); break; - } case Primitive::kPrimLong: { codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), instruction, @@ -4248,19 +4394,31 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr HBasicBlock* default_block = switch_instr->GetDefaultBlock(); // Create a set of compare/jumps. + Register temp_reg = TMP; + __ Addiu32(temp_reg, value_reg, -lower_bound); + // Jump to default if index is negative + // Note: We don't check the case that index is positive while value < lower_bound, because in + // this case, index >= num_entries must be true. So that we can save one branch instruction. + __ Bltz(temp_reg, codegen_->GetLabelOf(default_block)); + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int32_t i = 0; i < num_entries; ++i) { - int32_t case_value = lower_bound + i; - MipsLabel* successor_label = codegen_->GetLabelOf(successors[i]); - if (case_value == 0) { - __ Beqz(value_reg, successor_label); - } else { - __ LoadConst32(TMP, case_value); - __ Beq(value_reg, TMP, successor_label); - } + // Jump to successors[0] if value == lower_bound. + __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0])); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ Addiu(temp_reg, temp_reg, -2); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ Bltz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 2])); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + __ Addiu(temp_reg, temp_reg, -1); + __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); } - // Insert the default branch for every other value. + // And the default for any other value. if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { __ B(codegen_->GetLabelOf(default_block)); } diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index e3a2cb40ef..caf3174455 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -229,6 +229,10 @@ class InstructionCodeGeneratorMIPS : public HGraphVisitor { size_t condition_input_index, MipsLabel* true_target, MipsLabel* false_target); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivRemByPowerOfTwo(HBinaryOperation* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); + void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); MipsAssembler* const assembler_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index bc5eb31405..78f5644cfb 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -3975,17 +3975,34 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>(); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - // Create a series of compare/jumps. + // Create a set of compare/jumps. + GpuRegister temp_reg = TMP; + if (IsInt<16>(-lower_bound)) { + __ Addiu(temp_reg, value_reg, -lower_bound); + } else { + __ LoadConst32(AT, -lower_bound); + __ Addu(temp_reg, value_reg, AT); + } + // Jump to default if index is negative + // Note: We don't check the case that index is positive while value < lower_bound, because in + // this case, index >= num_entries must be true. So that we can save one branch instruction. + __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block)); + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int32_t i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - Mips64Label* succ = codegen_->GetLabelOf(successors[i]); - if (case_value == 0) { - __ Beqzc(value_reg, succ); - } else { - __ LoadConst32(TMP, case_value); - __ Beqc(value_reg, TMP, succ); - } + // Jump to successors[0] if value == lower_bound. + __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0])); + int32_t last_index = 0; + for (; num_entries - last_index > 2; last_index += 2) { + __ Addiu(temp_reg, temp_reg, -2); + // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. + __ Bltzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); + // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. + __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 2])); + } + if (num_entries - last_index == 2) { + // The last missing case_value. + __ Addiu(temp_reg, temp_reg, -1); + __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1])); } // And the default for any other value. diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 2fb87d3029..19f03df2a0 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -42,7 +42,6 @@ namespace x86 { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = EAX; - static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI }; static constexpr int kC2ConditionMask = 0x400; @@ -6426,31 +6425,67 @@ void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { locations->SetInAt(0, Location::RequiresRegister()); } -void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { - int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); - LocationSummary* locations = switch_instr->GetLocations(); - Register value_reg = locations->InAt(0).AsRegister<Register>(); - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); +void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block) { + // Figure out the correct compare values and jump conditions. + // Handle the first compare/branch as a special case because it might + // jump to the default case. + DCHECK_GT(num_entries, 2u); + Condition first_condition; + uint32_t index; + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); + if (lower_bound != 0) { + first_condition = kLess; + __ cmpl(value_reg, Immediate(lower_bound)); + __ j(first_condition, codegen_->GetLabelOf(default_block)); + __ j(kEqual, codegen_->GetLabelOf(successors[0])); - // Create a series of compare/jumps. - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); - for (int i = 0; i < num_entries; i++) { - int32_t case_value = lower_bound + i; - if (case_value == 0) { - __ testl(value_reg, value_reg); - } else { - __ cmpl(value_reg, Immediate(case_value)); - } - __ j(kEqual, codegen_->GetLabelOf(successors[i])); + index = 1; + } else { + // Handle all the compare/jumps below. + first_condition = kBelow; + index = 0; + } + + // Handle the rest of the compare/jumps. + for (; index + 1 < num_entries; index += 2) { + int32_t compare_to_value = lower_bound + index + 1; + __ cmpl(value_reg, Immediate(compare_to_value)); + // Jump to successors[index] if value < case_value[index]. + __ j(first_condition, codegen_->GetLabelOf(successors[index])); + // Jump to successors[index + 1] if value == case_value[index + 1]. + __ j(kEqual, codegen_->GetLabelOf(successors[index + 1])); + } + + if (index != num_entries) { + // There are an odd number of entries. Handle the last one. + DCHECK_EQ(index + 1, num_entries); + __ cmpl(value_reg, Immediate(lower_bound + index)); + __ j(kEqual, codegen_->GetLabelOf(successors[index])); } // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { - __ jmp(codegen_->GetLabelOf(default_block)); + if (!codegen_->GoesToNextBlock(switch_block, default_block)) { + __ jmp(codegen_->GetLabelOf(default_block)); } } +void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + uint32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + Register value_reg = locations->InAt(0).AsRegister<Register>(); + + GenPackedSwitchWithCompares(value_reg, + lower_bound, + num_entries, + switch_instr->GetBlock(), + switch_instr->GetDefaultBlock()); +} + void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); @@ -6465,11 +6500,20 @@ void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); + uint32_t num_entries = switch_instr->GetNumEntries(); LocationSummary* locations = switch_instr->GetLocations(); Register value_reg = locations->InAt(0).AsRegister<Register>(); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + if (num_entries <= kPackedSwitchJumpTableThreshold) { + GenPackedSwitchWithCompares(value_reg, + lower_bound, + num_entries, + switch_instr->GetBlock(), + default_block); + return; + } + // Optimizing has a jump area. Register temp_reg = locations->GetTemp(0).AsRegister<Register>(); Register constant_area = locations->InAt(1).AsRegister<Register>(); @@ -6481,7 +6525,7 @@ void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_ } // Is the value in range? - DCHECK_GE(num_entries, 1); + DCHECK_GE(num_entries, 1u); __ cmpl(value_reg, Immediate(num_entries - 1)); __ j(kAbove, codegen_->GetLabelOf(default_block)); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 064051c7f4..f9403a67c0 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -195,6 +195,11 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { X86Assembler* GetAssembler() const { return assembler_; } + // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump + // table version generates 7 instructions and num_entries literals. Compare/jump sequence will + // generates less code/data with a small num_entries. + static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; + private: // Generate code for the given suspend check. If not null, `successor` // is the block to branch to if the suspend check is not needed, and after @@ -236,6 +241,11 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); void HandleGoto(HInstruction* got, HBasicBlock* successor); + void GenPackedSwitchWithCompares(Register value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block); X86Assembler* const assembler_; CodeGeneratorX86* const codegen_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 4618be9cc3..44a51ea6e2 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -41,6 +41,10 @@ namespace x86_64 { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = RDI; +// The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump +// table version generates 7 instructions and num_entries literals. Compare/jump sequence will +// generates less code/data with a small num_entries. +static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 }; static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 }; @@ -6021,11 +6025,58 @@ void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); + uint32_t num_entries = switch_instr->GetNumEntries(); LocationSummary* locations = switch_instr->GetLocations(); CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>(); CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>(); CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + // Should we generate smaller inline compare/jumps? + if (num_entries <= kPackedSwitchJumpTableThreshold) { + // Figure out the correct compare values and jump conditions. + // Handle the first compare/branch as a special case because it might + // jump to the default case. + DCHECK_GT(num_entries, 2u); + Condition first_condition; + uint32_t index; + const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + if (lower_bound != 0) { + first_condition = kLess; + __ cmpl(value_reg_in, Immediate(lower_bound)); + __ j(first_condition, codegen_->GetLabelOf(default_block)); + __ j(kEqual, codegen_->GetLabelOf(successors[0])); + + index = 1; + } else { + // Handle all the compare/jumps below. + first_condition = kBelow; + index = 0; + } + + // Handle the rest of the compare/jumps. + for (; index + 1 < num_entries; index += 2) { + int32_t compare_to_value = lower_bound + index + 1; + __ cmpl(value_reg_in, Immediate(compare_to_value)); + // Jump to successors[index] if value < case_value[index]. + __ j(first_condition, codegen_->GetLabelOf(successors[index])); + // Jump to successors[index + 1] if value == case_value[index + 1]. + __ j(kEqual, codegen_->GetLabelOf(successors[index + 1])); + } + + if (index != num_entries) { + // There are an odd number of entries. Handle the last one. + DCHECK_EQ(index + 1, num_entries); + __ cmpl(value_reg_in, Immediate(lower_bound + index)); + __ j(kEqual, codegen_->GetLabelOf(successors[index])); + } + + // And the default for any other value. + if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + __ jmp(codegen_->GetLabelOf(default_block)); + } + return; + } // Remove the bias, if needed. Register value_reg_out = value_reg_in.AsRegister(); @@ -6036,7 +6087,6 @@ void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_ins CpuRegister value_reg(value_reg_out); // Is the value in range? - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); __ cmpl(value_reg, Immediate(num_entries - 1)); __ j(kAbove, codegen_->GetLabelOf(default_block)); diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index fdf8cc9c1f..0b7fdf85ea 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -705,7 +705,8 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::LookupInfo(HLoopInf return loop_it->second; } } - if (loop->IsLoopInvariant(instruction, true)) { + if (loop->IsDefinedOutOfTheLoop(instruction)) { + DCHECK(instruction->GetBlock()->Dominates(loop->GetPreHeader())); InductionInfo* info = CreateInvariantFetch(instruction); AssignInfo(loop, instruction, info); return info; diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 5b89cfef5a..389ada7504 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -725,7 +725,7 @@ class LSEVisitor : public HGraphVisitor { // instruction is a store in the loop so the loop must does write. DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); - if (loop_info->IsLoopInvariant(original_ref, false)) { + if (loop_info->IsDefinedOutOfTheLoop(original_ref)) { DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader())); // Keep the store since its value may be needed at the loop header. possibly_redundant = false; @@ -933,8 +933,9 @@ class LSEVisitor : public HGraphVisitor { }; void LoadStoreElimination::Run() { - if (graph_->IsDebuggable()) { + if (graph_->IsDebuggable() || graph_->HasTryCatch()) { // Debugger may set heap values or trigger deoptimization of callers. + // Try/catch support not implemented yet. // Skip this optimization. return; } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 9b26de44fe..926bc156cf 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -587,15 +587,8 @@ bool HLoopInformation::IsIn(const HLoopInformation& other) const { return other.blocks_.IsBitSet(header_->GetBlockId()); } -bool HLoopInformation::IsLoopInvariant(HInstruction* instruction, bool must_dominate) const { - HLoopInformation* other_loop = instruction->GetBlock()->GetLoopInformation(); - if (other_loop != this && (other_loop == nullptr || !other_loop->IsIn(*this))) { - if (must_dominate) { - return instruction->GetBlock()->Dominates(GetHeader()); - } - return true; - } - return false; +bool HLoopInformation::IsDefinedOutOfTheLoop(HInstruction* instruction) const { + return !blocks_.IsBitSet(instruction->GetBlock()->GetBlockId()); } size_t HLoopInformation::GetLifetimeEnd() const { @@ -1467,6 +1460,24 @@ void HInstructionList::Add(const HInstructionList& instruction_list) { } } +// Should be called on instructions in a dead block in post order. This method +// assumes `insn` has been removed from all users with the exception of catch +// phis because of missing exceptional edges in the graph. It removes the +// instruction from catch phi uses, together with inputs of other catch phis in +// the catch block at the same index, as these must be dead too. +static void RemoveUsesOfDeadInstruction(HInstruction* insn) { + DCHECK(!insn->HasEnvironmentUses()); + while (insn->HasNonEnvironmentUses()) { + HUseListNode<HInstruction*>* use = insn->GetUses().GetFirst(); + size_t use_index = use->GetIndex(); + HBasicBlock* user_block = use->GetUser()->GetBlock(); + DCHECK(use->GetUser()->IsPhi() && user_block->IsCatchBlock()); + for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + phi_it.Current()->AsPhi()->RemoveInputAt(use_index); + } + } +} + void HBasicBlock::DisconnectAndDelete() { // Dominators must be removed after all the blocks they dominate. This way // a loop header is removed last, a requirement for correct loop information @@ -1569,21 +1580,13 @@ void HBasicBlock::DisconnectAndDelete() { // graph will always remain consistent. for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) { HInstruction* insn = it.Current(); - while (insn->HasUses()) { - DCHECK(IsTryBlock()); - HUseListNode<HInstruction*>* use = insn->GetUses().GetFirst(); - size_t use_index = use->GetIndex(); - HBasicBlock* user_block = use->GetUser()->GetBlock(); - DCHECK(use->GetUser()->IsPhi() && user_block->IsCatchBlock()); - for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - phi_it.Current()->AsPhi()->RemoveInputAt(use_index); - } - } - + RemoveUsesOfDeadInstruction(insn); RemoveInstruction(insn); } for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) { - RemovePhi(it.Current()->AsPhi()); + HPhi* insn = it.Current()->AsPhi(); + RemoveUsesOfDeadInstruction(insn); + RemovePhi(insn); } // Disconnect from the dominator. diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 9d3c88c79e..3e38e9f2a4 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -564,11 +564,8 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { // Note that `other` *must* be populated before entering this function. bool IsIn(const HLoopInformation& other) const; - // Returns true if instruction is not defined within this loop or any loop nested inside - // this loop. If must_dominate is set, only definitions that actually dominate the loop - // header can be invariant. Otherwise, any definition outside the loop, including - // definitions that appear after the loop, is invariant. - bool IsLoopInvariant(HInstruction* instruction, bool must_dominate) const; + // Returns true if instruction is not defined within this loop. + bool IsDefinedOutOfTheLoop(HInstruction* instruction) const; const ArenaBitVector& GetBlocks() const { return blocks_; } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 34956039db..831b626c4f 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -531,7 +531,6 @@ static void RunOptimizations(HGraph* graph, graph, stats, "instruction_simplifier_after_bce"); InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier_before_codegen"); - IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver); HOptimization* optimizations1[] = { @@ -543,49 +542,30 @@ static void RunOptimizations(HGraph* graph, dce1, simplify2 }; - RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer); MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, &handles); - // TODO: Update passes incompatible with try/catch so we have the same - // pipeline for all methods. - if (graph->HasTryCatch()) { - HOptimization* optimizations2[] = { - boolean_simplify, - side_effects, - gvn, - dce2, - // The codegen has a few assumptions that only the instruction simplifier - // can satisfy. For example, the code generator does not expect to see a - // HTypeConversion from a type to the same type. - simplify4, - }; - - RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer); - } else { - HOptimization* optimizations2[] = { - // BooleanSimplifier depends on the InstructionSimplifier removing - // redundant suspend checks to recognize empty blocks. - boolean_simplify, - fold2, // TODO: if we don't inline we can also skip fold2. - side_effects, - gvn, - licm, - induction, - bce, - fold3, // evaluates code generated by dynamic bce - simplify3, - lse, - dce2, - // The codegen has a few assumptions that only the instruction simplifier - // can satisfy. For example, the code generator does not expect to see a - // HTypeConversion from a type to the same type. - simplify4, - }; - - RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer); - } + HOptimization* optimizations2[] = { + // BooleanSimplifier depends on the InstructionSimplifier removing + // redundant suspend checks to recognize empty blocks. + boolean_simplify, + fold2, // TODO: if we don't inline we can also skip fold2. + side_effects, + gvn, + licm, + induction, + bce, + fold3, // evaluates code generated by dynamic bce + simplify3, + lse, + dce2, + // The codegen has a few assumptions that only the instruction simplifier + // can satisfy. For example, the code generator does not expect to see a + // HTypeConversion from a type to the same type. + simplify4, + }; + RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer); RunArchOptimizations(driver->GetInstructionSet(), graph, stats, pass_observer); AllocateRegisters(graph, codegen, pass_observer); @@ -634,7 +614,7 @@ CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena, codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), - ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()), + ArrayRef<const SrcMapElem>(), ArrayRef<const uint8_t>(), // mapping_table. ArrayRef<const uint8_t>(stack_map), ArrayRef<const uint8_t>(), // native_gc_map. @@ -668,7 +648,7 @@ CompiledMethod* OptimizingCompiler::EmitBaseline( codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), - ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()), + ArrayRef<const SrcMapElem>(), AlignVectorSize(mapping_table), AlignVectorSize(vmap_table), AlignVectorSize(gc_map), diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index b383f1e1ad..a385448104 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -15,6 +15,7 @@ */ #include "pc_relative_fixups_x86.h" +#include "code_generator_x86.h" namespace art { namespace x86 { @@ -79,6 +80,10 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { + if (switch_insn->GetNumEntries() <= + InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) { + return; + } // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to // address the constant area. InitializePCRelativeBasePointer(); |