diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator_mips64.cc | 78 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_mips64.h | 18 | ||||
| -rw-r--r-- | compiler/optimizing/inliner.cc | 50 | ||||
| -rw-r--r-- | compiler/optimizing/inliner.h | 3 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_mips.cc | 89 |
5 files changed, 207 insertions, 31 deletions
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 5cf3c246cf..36690c0569 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -4517,27 +4517,20 @@ void LocationsBuilderMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) { locations->SetInAt(0, Location::RequiresRegister()); } -void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) { - int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); - LocationSummary* locations = switch_instr->GetLocations(); - GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>(); - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - +void InstructionCodeGeneratorMIPS64::GenPackedSwitchWithCompares(GpuRegister value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block) { // Create a set of compare/jumps. GpuRegister temp_reg = TMP; - if (IsInt<16>(-lower_bound)) { - __ Addiu(temp_reg, value_reg, -lower_bound); - } else { - __ LoadConst32(AT, -lower_bound); - __ Addu(temp_reg, value_reg, AT); - } + __ Addiu32(temp_reg, value_reg, -lower_bound); // Jump to default if index is negative // Note: We don't check the case that index is positive while value < lower_bound, because in // this case, index >= num_entries must be true. So that we can save one branch instruction. __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block)); - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); // Jump to successors[0] if value == lower_bound. __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0])); int32_t last_index = 0; @@ -4555,11 +4548,66 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins } // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + if (!codegen_->GoesToNextBlock(switch_block, default_block)) { __ Bc(codegen_->GetLabelOf(default_block)); } } +void InstructionCodeGeneratorMIPS64::GenTableBasedPackedSwitch(GpuRegister value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block) { + // Create a jump table. + std::vector<Mips64Label*> labels(num_entries); + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); + for (uint32_t i = 0; i < num_entries; i++) { + labels[i] = codegen_->GetLabelOf(successors[i]); + } + JumpTable* table = __ CreateJumpTable(std::move(labels)); + + // Is the value in range? + __ Addiu32(TMP, value_reg, -lower_bound); + __ LoadConst32(AT, num_entries); + __ Bgeuc(TMP, AT, codegen_->GetLabelOf(default_block)); + + // We are in the range of the table. + // Load the target address from the jump table, indexing by the value. + __ LoadLabelAddress(AT, table->GetLabel()); + __ Sll(TMP, TMP, 2); + __ Daddu(TMP, TMP, AT); + __ Lw(TMP, TMP, 0); + // Compute the absolute target address by adding the table start address + // (the table contains offsets to targets relative to its start). + __ Daddu(TMP, TMP, AT); + // And jump. + __ Jr(TMP); + __ Nop(); +} + +void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + uint32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>(); + HBasicBlock* switch_block = switch_instr->GetBlock(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + if (num_entries > kPackedSwitchJumpTableThreshold) { + GenTableBasedPackedSwitch(value_reg, + lower_bound, + num_entries, + switch_block, + default_block); + } else { + GenPackedSwitchWithCompares(value_reg, + lower_bound, + num_entries, + switch_block, + default_block); + } +} + void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet*) { UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64"; } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index d5811c20e3..8ac919f47e 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -217,6 +217,14 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { Mips64Assembler* GetAssembler() const { return assembler_; } + // Compare-and-jump packed switch generates approx. 3 + 2.5 * N 32-bit + // instructions for N cases. + // Table-based packed switch generates approx. 11 32-bit instructions + // and N 32-bit data words for N cases. + // At N = 6 they come out as 18 and 17 32-bit words respectively. + // We switch to the table-based method starting with 7 cases. + static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; + private: void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg); void GenerateMemoryBarrier(MemBarrierKind kind); @@ -260,6 +268,16 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { LocationSummary* locations, Mips64Label* label); void HandleGoto(HInstruction* got, HBasicBlock* successor); + void GenPackedSwitchWithCompares(GpuRegister value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block); + void GenTableBasedPackedSwitch(GpuRegister value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block); Mips64Assembler* const assembler_; CodeGeneratorMIPS64* const codegen_; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index d84787984d..3b83e95071 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -344,6 +344,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { if (actual_method != nullptr) { bool result = TryInlineAndReplace(invoke_instruction, actual_method, + ReferenceTypeInfo::CreateInvalid(), /* do_rtp */ true, cha_devirtualize); if (result && !invoke_instruction->IsInvokeStaticOrDirect()) { @@ -471,9 +472,10 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, HInstruction* receiver = invoke_instruction->InputAt(0); HInstruction* cursor = invoke_instruction->GetPrevious(); HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); - + Handle<mirror::Class> handle = handles_->NewHandle(GetMonomorphicType(classes)); if (!TryInlineAndReplace(invoke_instruction, resolved_method, + ReferenceTypeInfo::Create(handle, /* is_exact */ true), /* do_rtp */ false, /* cha_devirtualize */ false)) { return false; @@ -591,13 +593,13 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, break; } ArtMethod* method = nullptr; + + Handle<mirror::Class> handle = handles_->NewHandle(classes->Get(i)); if (invoke_instruction->IsInvokeInterface()) { - method = classes->Get(i)->FindVirtualMethodForInterface( - resolved_method, pointer_size); + method = handle->FindVirtualMethodForInterface(resolved_method, pointer_size); } else { DCHECK(invoke_instruction->IsInvokeVirtual()); - method = classes->Get(i)->FindVirtualMethodForVirtual( - resolved_method, pointer_size); + method = handle->FindVirtualMethodForVirtual(resolved_method, pointer_size); } HInstruction* receiver = invoke_instruction->InputAt(0); @@ -605,10 +607,13 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); dex::TypeIndex class_index = FindClassIndexIn( - classes->Get(i), caller_dex_file, caller_compilation_unit_.GetDexCache()); + handle.Get(), caller_dex_file, caller_compilation_unit_.GetDexCache()); HInstruction* return_replacement = nullptr; if (!class_index.IsValid() || - !TryBuildAndInline(invoke_instruction, method, &return_replacement)) { + !TryBuildAndInline(invoke_instruction, + method, + ReferenceTypeInfo::Create(handle, /* is_exact */ true), + &return_replacement)) { all_targets_inlined = false; } else { one_target_inlined = true; @@ -627,7 +632,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, cursor, bb_cursor, class_index, - classes->Get(i), + handle.Get(), invoke_instruction, deoptimize); if (deoptimize) { @@ -792,7 +797,10 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); HInstruction* return_replacement = nullptr; - if (!TryBuildAndInline(invoke_instruction, actual_method, &return_replacement)) { + if (!TryBuildAndInline(invoke_instruction, + actual_method, + ReferenceTypeInfo::CreateInvalid(), + &return_replacement)) { return false; } @@ -857,13 +865,14 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* method, + ReferenceTypeInfo receiver_type, bool do_rtp, bool cha_devirtualize) { HInstruction* return_replacement = nullptr; uint32_t dex_pc = invoke_instruction->GetDexPc(); HInstruction* cursor = invoke_instruction->GetPrevious(); HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); - if (!TryBuildAndInline(invoke_instruction, method, &return_replacement)) { + if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) { if (invoke_instruction->IsInvokeInterface()) { // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always // better than an invoke-interface because: @@ -921,6 +930,7 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* method, + ReferenceTypeInfo receiver_type, HInstruction** return_replacement) { if (method->IsProxyMethod()) { VLOG(compiler) << "Method " << method->PrettyMethod() @@ -997,7 +1007,8 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - if (!TryBuildAndInlineHelper(invoke_instruction, method, same_dex_file, return_replacement)) { + if (!TryBuildAndInlineHelper( + invoke_instruction, method, receiver_type, same_dex_file, return_replacement)) { return false; } @@ -1194,8 +1205,10 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(Handle<mirror::DexCache> dex bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, ArtMethod* resolved_method, + ReferenceTypeInfo receiver_type, bool same_dex_file, HInstruction** return_replacement) { + DCHECK(!(resolved_method->IsStatic() && receiver_type.IsValid())); ScopedObjectAccess soa(Thread::Current()); const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); const DexFile& callee_dex_file = *resolved_method->GetDexFile(); @@ -1286,12 +1299,13 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, } size_t parameter_index = 0; + bool run_rtp = false; for (HInstructionIterator instructions(callee_graph->GetEntryBlock()->GetInstructions()); !instructions.Done(); instructions.Advance()) { HInstruction* current = instructions.Current(); if (current->IsParameterValue()) { - HInstruction* argument = invoke_instruction->InputAt(parameter_index++); + HInstruction* argument = invoke_instruction->InputAt(parameter_index); if (argument->IsNullConstant()) { current->ReplaceWith(callee_graph->GetNullConstant()); } else if (argument->IsIntConstant()) { @@ -1305,15 +1319,21 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, current->ReplaceWith( callee_graph->GetDoubleConstant(argument->AsDoubleConstant()->GetValue())); } else if (argument->GetType() == Primitive::kPrimNot) { - current->SetReferenceTypeInfo(argument->GetReferenceTypeInfo()); + if (!resolved_method->IsStatic() && parameter_index == 0 && receiver_type.IsValid()) { + run_rtp = true; + current->SetReferenceTypeInfo(receiver_type); + } else { + current->SetReferenceTypeInfo(argument->GetReferenceTypeInfo()); + } current->AsParameterValue()->SetCanBeNull(argument->CanBeNull()); } + ++parameter_index; } } // We have replaced formal arguments with actual arguments. If actual types // are more specific than the declared ones, run RTP again on the inner graph. - if (ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) { + if (run_rtp || ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) { ReferenceTypePropagation(callee_graph, dex_compilation_unit.GetDexCache(), handles_, @@ -1502,7 +1522,7 @@ static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti, ReferenceTypeInfo actual_rti = actual_obj->GetReferenceTypeInfo(); return (actual_rti.IsExact() && !declared_rti.IsExact()) || - declared_rti.IsStrictSupertypeOf(actual_rti); + declared_rti.IsStrictSupertypeOf(actual_rti); } ReferenceTypeInfo HInliner::GetClassRTI(mirror::Class* klass) { diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 0c6436235f..4c0b990f26 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -66,17 +66,20 @@ class HInliner : public HOptimization { // a CHA guard needs to be added for the inlining. bool TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* resolved_method, + ReferenceTypeInfo receiver_type, bool do_rtp, bool cha_devirtualize) REQUIRES_SHARED(Locks::mutator_lock_); bool TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* resolved_method, + ReferenceTypeInfo receiver_type, HInstruction** return_replacement) REQUIRES_SHARED(Locks::mutator_lock_); bool TryBuildAndInlineHelper(HInvoke* invoke_instruction, ArtMethod* resolved_method, + ReferenceTypeInfo receiver_type, bool same_dex_file, HInstruction** return_replacement); diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 1fb90e5113..e9c6615870 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2466,6 +2466,94 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { __ Bind(&done); } +// void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) +void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnMainOnly, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + locations->SetInAt(4, Location::RequiresRegister()); + + // We will call memcpy() to do the actual work. Allocate the temporary + // registers to use the correct input registers, and output register. + // memcpy() uses the normal MIPS calling convention. + InvokeRuntimeCallingConvention calling_convention; + + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + + Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<Register>())); +} + +void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { + MipsAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + // Check assumption that sizeof(Char) is 2 (used in scaling below). + const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + DCHECK_EQ(char_size, 2u); + const size_t char_shift = Primitive::ComponentSizeShift(Primitive::kPrimChar); + + Register srcObj = locations->InAt(0).AsRegister<Register>(); + Register srcBegin = locations->InAt(1).AsRegister<Register>(); + Register srcEnd = locations->InAt(2).AsRegister<Register>(); + Register dstObj = locations->InAt(3).AsRegister<Register>(); + Register dstBegin = locations->InAt(4).AsRegister<Register>(); + + Register dstPtr = locations->GetTemp(0).AsRegister<Register>(); + DCHECK_EQ(dstPtr, A0); + Register srcPtr = locations->GetTemp(1).AsRegister<Register>(); + DCHECK_EQ(srcPtr, A1); + Register numChrs = locations->GetTemp(2).AsRegister<Register>(); + DCHECK_EQ(numChrs, A2); + + Register dstReturn = locations->GetTemp(3).AsRegister<Register>(); + DCHECK_EQ(dstReturn, V0); + + MipsLabel done; + + // Location of data in char array buffer. + const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); + + // Get offset of value field within a string object. + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + + __ Beq(srcEnd, srcBegin, &done); // No characters to move. + + // Calculate number of characters to be copied. + __ Subu(numChrs, srcEnd, srcBegin); + + // Calculate destination address. + __ Addiu(dstPtr, dstObj, data_offset); + if (IsR6()) { + __ Lsa(dstPtr, dstBegin, dstPtr, char_shift); + } else { + __ Sll(AT, dstBegin, char_shift); + __ Addu(dstPtr, dstPtr, AT); + } + + // Calculate source address. + __ Addiu(srcPtr, srcObj, value_offset); + if (IsR6()) { + __ Lsa(srcPtr, srcBegin, srcPtr, char_shift); + } else { + __ Sll(AT, srcBegin, char_shift); + __ Addu(srcPtr, srcPtr, AT); + } + + // Calculate number of bytes to copy from number of characters. + __ Sll(numChrs, numChrs, char_shift); + + codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr); + + __ Bind(&done); +} + // Unimplemented intrinsics. UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil) @@ -2475,7 +2563,6 @@ UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundDouble) UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong) UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(MIPS, StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopy) |