diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 67 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.h | 6 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 92 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 5 | ||||
| -rw-r--r-- | compiler/optimizing/gvn.cc | 7 | ||||
| -rw-r--r-- | compiler/optimizing/instruction_simplifier.cc | 21 | ||||
| -rw-r--r-- | compiler/optimizing/licm.cc | 27 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.cc | 14 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.h | 18 | ||||
| -rw-r--r-- | compiler/optimizing/nodes_vector.h | 10 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 18 | ||||
| -rw-r--r-- | compiler/optimizing/scheduler.h | 5 | ||||
| -rw-r--r-- | compiler/optimizing/scheduler_arm64.h | 14 |
13 files changed, 185 insertions, 119 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 9a7495b0ef..ba222fe532 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -5739,24 +5739,18 @@ X86Assembler* ParallelMoveResolverX86::GetAssembler() const { return codegen_->GetAssembler(); } -void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) { +void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) { ScratchRegisterScope ensure_scratch( this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(temp_reg, Address(ESP, src + stack_offset)); - __ movl(Address(ESP, dst + stack_offset), temp_reg); -} -void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) { - ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); - int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(temp_reg, Address(ESP, src + stack_offset)); - __ movl(Address(ESP, dst + stack_offset), temp_reg); - __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize)); - __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg); + // Now that temp register is available (possibly spilled), move blocks of memory. + for (int i = 0; i < number_of_words; i++) { + __ movl(temp_reg, Address(ESP, src + stack_offset)); + __ movl(Address(ESP, dst + stack_offset), temp_reg); + stack_offset += kX86WordSize; + } } void ParallelMoveResolverX86::EmitMove(size_t index) { @@ -5807,7 +5801,7 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); } else { DCHECK(destination.IsStackSlot()); - MoveMemoryToMemory32(destination.GetStackIndex(), source.GetStackIndex()); + MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1); } } else if (source.IsDoubleStackSlot()) { if (destination.IsRegisterPair()) { @@ -5818,11 +5812,15 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; - MoveMemoryToMemory64(destination.GetStackIndex(), source.GetStackIndex()); + MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2); } } else if (source.IsSIMDStackSlot()) { - DCHECK(destination.IsFpuRegister()); - __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); + if (destination.IsFpuRegister()) { + __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); + } else { + DCHECK(destination.IsSIMDStackSlot()); + MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4); + } } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); if (constant->IsIntConstant() || constant->IsNullConstant()) { @@ -5922,7 +5920,16 @@ void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) { __ movd(reg, temp_reg); } -void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { +void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) { + size_t extra_slot = 4 * kX86WordSize; + __ subl(ESP, Immediate(extra_slot)); + __ movups(Address(ESP, 0), XmmRegister(reg)); + ExchangeMemory(0, mem + extra_slot, 4); + __ movups(XmmRegister(reg), Address(ESP, 0)); + __ addl(ESP, Immediate(extra_slot)); +} + +void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) { ScratchRegisterScope ensure_scratch1( this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); @@ -5932,10 +5939,15 @@ void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0; stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0; - __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset)); - __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset)); - __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister())); - __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister())); + + // Now that temp registers are available (possibly spilled), exchange blocks of memory. + for (int i = 0; i < number_of_words; i++) { + __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset)); + __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset)); + __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister())); + __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister())); + stack_offset += kX86WordSize; + } } void ParallelMoveResolverX86::EmitSwap(size_t index) { @@ -5954,7 +5966,7 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { } else if (source.IsStackSlot() && destination.IsRegister()) { Exchange(destination.AsRegister<Register>(), source.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsStackSlot()) { - Exchange(destination.GetStackIndex(), source.GetStackIndex()); + ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1); } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { // Use XOR Swap algorithm to avoid a temporary. DCHECK_NE(source.reg(), destination.reg()); @@ -5990,8 +6002,13 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { // Move the high double to the low double. __ psrldq(reg, Immediate(8)); } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) { - Exchange(destination.GetStackIndex(), source.GetStackIndex()); - Exchange(destination.GetHighStackIndex(kX86WordSize), source.GetHighStackIndex(kX86WordSize)); + ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2); + } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) { + ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4); + } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) { + Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); + } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) { + Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); } else { LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination; } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 176e4dfda0..40b7e3c54f 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -139,10 +139,10 @@ class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap { private: void Exchange(Register reg, int mem); - void Exchange(int mem1, int mem2); void Exchange32(XmmRegister reg, int mem); - void MoveMemoryToMemory32(int dst, int src); - void MoveMemoryToMemory64(int dst, int src); + void Exchange128(XmmRegister reg, int mem); + void ExchangeMemory(int mem1, int mem2, int number_of_words); + void MoveMemoryToMemory(int dst, int src, int number_of_words); CodeGeneratorX86* const codegen_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 70ce522417..caad7885bd 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -5227,9 +5227,17 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } } else if (source.IsSIMDStackSlot()) { - DCHECK(destination.IsFpuRegister()); - __ movups(destination.AsFpuRegister<XmmRegister>(), - Address(CpuRegister(RSP), source.GetStackIndex())); + if (destination.IsFpuRegister()) { + __ movups(destination.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), source.GetStackIndex())); + } else { + DCHECK(destination.IsSIMDStackSlot()); + size_t high = kX86_64WordSize; + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high)); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP)); + } } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); if (constant->IsIntConstant() || constant->IsNullConstant()) { @@ -5297,19 +5305,6 @@ void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) { __ movl(reg, CpuRegister(TMP)); } -void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) { - ScratchRegisterScope ensure_scratch( - this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); - - int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; - __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); - __ movl(CpuRegister(ensure_scratch.GetRegister()), - Address(CpuRegister(RSP), mem2 + stack_offset)); - __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); - __ movl(Address(CpuRegister(RSP), mem1 + stack_offset), - CpuRegister(ensure_scratch.GetRegister())); -} - void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) { __ movq(CpuRegister(TMP), reg1); __ movq(reg1, reg2); @@ -5322,19 +5317,6 @@ void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) { __ movq(reg, CpuRegister(TMP)); } -void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) { - ScratchRegisterScope ensure_scratch( - this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); - - int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; - __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); - __ movq(CpuRegister(ensure_scratch.GetRegister()), - Address(CpuRegister(RSP), mem2 + stack_offset)); - __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); - __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), - CpuRegister(ensure_scratch.GetRegister())); -} - void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); __ movss(Address(CpuRegister(RSP), mem), reg); @@ -5347,6 +5329,48 @@ void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { __ movd(reg, CpuRegister(TMP)); } +void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) { + size_t extra_slot = 2 * kX86_64WordSize; + __ subq(CpuRegister(RSP), Immediate(extra_slot)); + __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg)); + ExchangeMemory64(0, mem + extra_slot, 2); + __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0)); + __ addq(CpuRegister(RSP), Immediate(extra_slot)); +} + +void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) { + ScratchRegisterScope ensure_scratch( + this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); + + int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; + __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); + __ movl(CpuRegister(ensure_scratch.GetRegister()), + Address(CpuRegister(RSP), mem2 + stack_offset)); + __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); + __ movl(Address(CpuRegister(RSP), mem1 + stack_offset), + CpuRegister(ensure_scratch.GetRegister())); +} + +void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) { + ScratchRegisterScope ensure_scratch( + this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); + + int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; + + // Now that temp registers are available (possibly spilled), exchange blocks of memory. + for (int i = 0; i < num_of_qwords; i++) { + __ movq(CpuRegister(TMP), + Address(CpuRegister(RSP), mem1 + stack_offset)); + __ movq(CpuRegister(ensure_scratch.GetRegister()), + Address(CpuRegister(RSP), mem2 + stack_offset)); + __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), + CpuRegister(TMP)); + __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), + CpuRegister(ensure_scratch.GetRegister())); + stack_offset += kX86_64WordSize; + } +} + void ParallelMoveResolverX86_64::EmitSwap(size_t index) { MoveOperands* move = moves_[index]; Location source = move->GetSource(); @@ -5359,13 +5383,13 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) { } else if (source.IsStackSlot() && destination.IsRegister()) { Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsStackSlot()) { - Exchange32(destination.GetStackIndex(), source.GetStackIndex()); + ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex()); } else if (source.IsRegister() && destination.IsDoubleStackSlot()) { Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); } else if (source.IsDoubleStackSlot() && destination.IsRegister()) { Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { - Exchange64(destination.GetStackIndex(), source.GetStackIndex()); + ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1); } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>()); __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>()); @@ -5378,6 +5402,12 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) { Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) { Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); + } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) { + ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2); + } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) { + Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); + } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) { + Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); } else { LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination; } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 00c5c27470..e86123ef01 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -139,11 +139,12 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap { private: void Exchange32(CpuRegister reg, int mem); void Exchange32(XmmRegister reg, int mem); - void Exchange32(int mem1, int mem2); void Exchange64(CpuRegister reg1, CpuRegister reg2); void Exchange64(CpuRegister reg, int mem); void Exchange64(XmmRegister reg, int mem); - void Exchange64(int mem1, int mem2); + void Exchange128(XmmRegister reg, int mem); + void ExchangeMemory32(int mem1, int mem2); + void ExchangeMemory64(int mem1, int mem2, int num_of_qwords); CodeGeneratorX86_64* const codegen_; diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 813772e9af..71c394ec1f 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -301,8 +301,11 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { // Pure instructions are put into odd buckets to speed up deletion. Note that in the // case of irreducible loops, we don't put pure instructions in odd buckets, as we // need to delete them when entering the loop. - if (instruction->GetSideEffects().HasDependencies() || - instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) { + // ClinitCheck is treated as a pure instruction since it's only executed + // once. + bool pure = !instruction->GetSideEffects().HasDependencies() || + instruction->IsClinitCheck(); + if (!pure || instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) { return (hash_code << 1) | 0; } else { return (hash_code << 1) | 1; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 53e449bbbe..a42a85dc1d 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -994,27 +994,6 @@ void InstructionSimplifierVisitor::VisitIf(HIf* instruction) { instruction->GetBlock()->SwapSuccessors(); RecordSimplification(); } - HInstruction* input = instruction->InputAt(0); - - // If a condition 'cond' is evaluated in an HIf instruction then in the successors of the - // IF_BLOCK we statically know the value of the condition (TRUE in TRUE_SUCC, FALSE in - // FALSE_SUCC). Using that we can replace another evaluation (use) EVAL of the same 'cond' - // with TRUE value (FALSE value) if every path from the ENTRY_BLOCK to EVAL_BLOCK contains the - // edge HIF_BLOCK->TRUE_SUCC (HIF_BLOCK->FALSE_SUCC). - if (!input->IsConstant()) { - HBasicBlock* true_succ = instruction->IfTrueSuccessor(); - HBasicBlock* false_succ = instruction->IfFalseSuccessor(); - - DCHECK_EQ(true_succ->GetPredecessors().size(), 1u); - input->ReplaceUsesDominatedBy( - true_succ->GetFirstInstruction(), GetGraph()->GetIntConstant(1), /* strictly */ false); - RecordSimplification(); - - DCHECK_EQ(false_succ->GetPredecessors().size(), 1u); - input->ReplaceUsesDominatedBy( - false_succ->GetFirstInstruction(), GetGraph()->GetIntConstant(0), /* strictly */ false); - RecordSimplification(); - } } void InstructionSimplifierVisitor::VisitArrayLength(HArrayLength* instruction) { diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index 7af1a20f98..d3a0376e9c 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -129,10 +129,25 @@ void LICM::Run() { !inst_it.Done(); inst_it.Advance()) { HInstruction* instruction = inst_it.Current(); - if (instruction->CanBeMoved() - && (!instruction->CanThrow() || !found_first_non_hoisted_visible_instruction_in_loop) - && !instruction->GetSideEffects().MayDependOn(loop_effects) - && InputsAreDefinedBeforeLoop(instruction)) { + bool can_move = false; + if (instruction->CanBeMoved() && InputsAreDefinedBeforeLoop(instruction)) { + if (instruction->CanThrow()) { + if (!found_first_non_hoisted_visible_instruction_in_loop) { + DCHECK(instruction->GetBlock()->IsLoopHeader()); + if (instruction->IsClinitCheck()) { + // clinit is only done once, and since all visible instructions + // in the loop header so far have been hoisted out, we can hoist + // the clinit check out also. + can_move = true; + } else if (!instruction->GetSideEffects().MayDependOn(loop_effects)) { + can_move = true; + } + } + } else if (!instruction->GetSideEffects().MayDependOn(loop_effects)) { + can_move = true; + } + } + if (can_move) { // We need to update the environment if the instruction has a loop header // phi in it. if (instruction->NeedsEnvironment()) { @@ -142,7 +157,9 @@ void LICM::Run() { } instruction->MoveBefore(pre_header->GetLastInstruction()); MaybeRecordStat(stats_, MethodCompilationStat::kLoopInvariantMoved); - } else if (instruction->CanThrow() || instruction->DoesAnyWrite()) { + } + + if (!can_move && (instruction->CanThrow() || instruction->DoesAnyWrite())) { // If `instruction` can do something visible (throw or write), // we cannot move further instructions that can throw. found_first_non_hoisted_visible_instruction_in_loop = true; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index d39c2aded5..5f33ed6303 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1111,10 +1111,10 @@ bool HInstructionList::FoundBefore(const HInstruction* instruction1, return true; } -bool HInstruction::Dominates(HInstruction* other_instruction, bool strictly) const { +bool HInstruction::StrictlyDominates(HInstruction* other_instruction) const { if (other_instruction == this) { // An instruction does not strictly dominate itself. - return !strictly; + return false; } HBasicBlock* block = GetBlock(); HBasicBlock* other_block = other_instruction->GetBlock(); @@ -1148,10 +1148,6 @@ bool HInstruction::Dominates(HInstruction* other_instruction, bool strictly) con } } -bool HInstruction::StrictlyDominates(HInstruction* other_instruction) const { - return Dominates(other_instruction, /* strictly */ true); -} - void HInstruction::RemoveEnvironment() { RemoveEnvironmentUses(this); environment_ = nullptr; @@ -1174,16 +1170,14 @@ void HInstruction::ReplaceWith(HInstruction* other) { DCHECK(env_uses_.empty()); } -void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, - HInstruction* replacement, - bool strictly) { +void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) { const HUseList<HInstruction*>& uses = GetUses(); for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { HInstruction* user = it->GetUser(); size_t index = it->GetIndex(); // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). ++it; - if (dominator->Dominates(user, strictly)) { + if (dominator->StrictlyDominates(user)) { user->ReplaceInput(replacement, index); } } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 6527cd3b4c..8efe5e5e91 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -2098,13 +2098,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { return IsRemovable() && !HasUses(); } - // Does this instruction dominate (strictly or in regular sense depending on 'strictly') - // `other_instruction`? - // Returns '!strictly' if this instruction and `other_instruction` are the same. + // Does this instruction strictly dominate `other_instruction`? + // Returns false if this instruction and `other_instruction` are the same. // Aborts if this instruction and `other_instruction` are both phis. - bool Dominates(HInstruction* other_instruction, bool strictly) const; - - // Return 'Dominates(other_instruction, /*strictly*/ true)'. bool StrictlyDominates(HInstruction* other_instruction) const; int GetId() const { return id_; } @@ -2165,13 +2161,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { void SetLocations(LocationSummary* locations) { locations_ = locations; } void ReplaceWith(HInstruction* instruction); - - // Replace all uses of the instruction which are dominated by 'dominator' with 'replacement'. - // 'strictly' determines whether strict or regular domination relation should be checked. - void ReplaceUsesDominatedBy(HInstruction* dominator, - HInstruction* replacement, - bool strictly = true); - + void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement); void ReplaceInput(HInstruction* replacement, size_t index); // This is almost the same as doing `ReplaceWith()`. But in this helper, the @@ -6269,7 +6259,7 @@ class HClinitCheck FINAL : public HExpression<1> { HClinitCheck(HLoadClass* constant, uint32_t dex_pc) : HExpression( DataType::Type::kReference, - SideEffects::AllChanges(), // Assume write/read on all fields/arrays. + SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. dex_pc) { SetRawInputAt(0, constant); } diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 096349fd73..87dff8403b 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -109,6 +109,16 @@ class HVecOperation : public HVariableInputSizeInstruction { // Assumes vector nodes cannot be moved by default. Each concrete implementation // that can be moved should override this method and return true. + // + // Note: similar approach is used for instruction scheduling (if it is turned on for the target): + // by default HScheduler::IsSchedulable returns false for a particular HVecOperation. + // HScheduler${ARCH}::IsSchedulable can be overridden to return true for an instruction (see + // scheduler_arm64.h for example) if it is safe to schedule it; in this case one *must* also + // look at/update HScheduler${ARCH}::IsSchedulingBarrier for this instruction. + // + // Note: For newly introduced vector instructions HScheduler${ARCH}::IsSchedulingBarrier must be + // altered to return true if the instruction might reside outside the SIMD loop body since SIMD + // registers are not kept alive across vector loop boundaries (yet). bool CanBeMoved() const OVERRIDE { return false; } // Tests if all data of a vector node (vector length and packed type) is equal. diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 73c72fc57a..24b1a123ee 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -1224,7 +1224,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, } const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); - if (compiler_options.GetGenerateDebugInfo()) { + if (compiler_options.GenerateAnyDebugInfo()) { const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; @@ -1244,10 +1244,13 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = nullptr; info.cfi = jni_compiled_method.GetCfi(); - std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForMethods( + // If both flags are passed, generate full debug info. + const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); + std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( GetCompilerDriver()->GetInstructionSet(), GetCompilerDriver()->GetInstructionSetFeatures(), - ArrayRef<const debug::MethodDebugInfo>(&info, 1)); + mini_debug_info, + info); CreateJITCodeEntryForAddress(code_address, std::move(elf_file)); } @@ -1352,7 +1355,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, } const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); - if (compiler_options.GetGenerateDebugInfo()) { + if (compiler_options.GenerateAnyDebugInfo()) { const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; @@ -1372,10 +1375,13 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = stack_map_size == 0 ? nullptr : stack_map_data; info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()); - std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForMethods( + // If both flags are passed, generate full debug info. + const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); + std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( GetCompilerDriver()->GetInstructionSet(), GetCompilerDriver()->GetInstructionSetFeatures(), - ArrayRef<const debug::MethodDebugInfo>(&info, 1)); + mini_debug_info, + info); CreateJITCodeEntryForAddress(code_address, std::move(elf_file)); } diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index bb7c353bc2..dfa077f7de 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -462,6 +462,11 @@ class HScheduler { // containing basic block from being scheduled. // This method is used to restrict scheduling to instructions that we know are // safe to handle. + // + // For newly introduced instructions by default HScheduler::IsSchedulable returns false. + // HScheduler${ARCH}::IsSchedulable can be overridden to return true for an instruction (see + // scheduler_arm64.h for example) if it is safe to schedule it; in this case one *must* also + // look at/update HScheduler${ARCH}::IsSchedulingBarrier for this instruction. virtual bool IsSchedulable(const HInstruction* instruction) const; bool IsSchedulable(const HBasicBlock* block) const; diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h index 32f161f26a..f71cb5b784 100644 --- a/compiler/optimizing/scheduler_arm64.h +++ b/compiler/optimizing/scheduler_arm64.h @@ -151,6 +151,20 @@ class HSchedulerARM64 : public HScheduler { #undef CASE_INSTRUCTION_KIND } + // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized + // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler; + // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of + // SIMD&FP registers are callee saved) so don't reorder such vector instructions. + // + // TODO: remove this when a proper support of SIMD registers is introduced to the compiler. + bool IsSchedulingBarrier(const HInstruction* instr) const OVERRIDE { + return HScheduler::IsSchedulingBarrier(instr) || + instr->IsVecReduce() || + instr->IsVecExtractScalar() || + instr->IsVecSetScalars() || + instr->IsVecReplicateScalar(); + } + private: SchedulingLatencyVisitorARM64 arm64_latency_visitor_; DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64); |