diff options
Diffstat (limited to 'compiler')
23 files changed, 962 insertions, 287 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index 2eb6fba674..46f3358af1 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -49,6 +49,7 @@ art_cc_defaults { "optimizing/block_builder.cc", "optimizing/bounds_check_elimination.cc", "optimizing/builder.cc", + "optimizing/cha_guard_optimization.cc", "optimizing/code_generator.cc", "optimizing/code_generator_utils.cc", "optimizing/constant_folding.cc", diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 1290379569..2686abc305 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -24,8 +24,10 @@ #include "base/unix_file/fd_file.h" #include "class_linker-inl.h" +#include "compiler_callbacks.h" #include "common_compiler_test.h" #include "debug/method_debug_info.h" +#include "dex/quick_compiler_callbacks.h" #include "driver/compiler_options.h" #include "elf_writer.h" #include "elf_writer_quick.h" @@ -76,6 +78,14 @@ class ImageTest : public CommonCompilerTest { const std::string& extra_dex = "", const std::string& image_class = ""); + void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE { + CommonCompilerTest::SetUpRuntimeOptions(options); + callbacks_.reset(new QuickCompilerCallbacks( + verification_results_.get(), + CompilerCallbacks::CallbackMode::kCompileBootImage)); + options->push_back(std::make_pair("compilercallbacks", callbacks_.get())); + } + std::unordered_set<std::string>* GetImageClasses() OVERRIDE { return new std::unordered_set<std::string>(image_classes_); } diff --git a/compiler/linker/mips64/relative_patcher_mips64_test.cc b/compiler/linker/mips64/relative_patcher_mips64_test.cc index 9e37f6bbea..c3170584e4 100644 --- a/compiler/linker/mips64/relative_patcher_mips64_test.cc +++ b/compiler/linker/mips64/relative_patcher_mips64_test.cc @@ -45,7 +45,7 @@ class Mips64RelativePatcherTest : public RelativePatcherTest { const uint8_t Mips64RelativePatcherTest::kUnpatchedPcRelativeRawCode[] = { 0x34, 0x12, 0x5E, 0xEE, // auipc s2, high(diff); placeholder = 0x1234 - 0x78, 0x56, 0x52, 0x26, // addiu s2, s2, low(diff); placeholder = 0x5678 + 0x78, 0x56, 0x52, 0x66, // daddiu s2, s2, low(diff); placeholder = 0x5678 }; const uint8_t Mips64RelativePatcherTest::kUnpatchedPcRelativeCallRawCode[] = { 0x34, 0x12, 0x3E, 0xEC, // auipc at, high(diff); placeholder = 0x1234 @@ -71,7 +71,7 @@ void Mips64RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const Linker const uint8_t expected_code[] = { static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE, - static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26, + static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x66, }; EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); } diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc new file mode 100644 index 0000000000..fe423012ca --- /dev/null +++ b/compiler/optimizing/cha_guard_optimization.cc @@ -0,0 +1,253 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cha_guard_optimization.h" + +namespace art { + +// Note we can only do CHA guard elimination/motion in a single pass, since +// if a guard is not removed, another guard might be removed due to +// the existence of the first guard. The first guard should not be further +// removed in another pass. For example, due to further optimizations, +// a receiver of a guard might turn out to be a parameter value, or defined at +// a different site, which makes the guard removable as a result. However +// it's not safe to remove the guard in another pass since another guard might +// have been removed due to the existence of this guard. +// +// As a consequence, we decided not to rely on other passes to remove them +// (such as GVN or instruction simplifier). + +class CHAGuardVisitor : HGraphVisitor { + public: + explicit CHAGuardVisitor(HGraph* graph) + : HGraphVisitor(graph), + block_has_cha_guard_(GetGraph()->GetBlocks().size(), + 0, + graph->GetArena()->Adapter(kArenaAllocCHA)) { + number_of_guards_to_visit_ = GetGraph()->GetNumberOfCHAGuards(); + DCHECK_NE(number_of_guards_to_visit_, 0u); + // Will recount number of guards during guard optimization. + GetGraph()->SetNumberOfCHAGuards(0); + } + + void VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) OVERRIDE; + + void VisitBasicBlock(HBasicBlock* block) OVERRIDE; + + private: + void RemoveGuard(HShouldDeoptimizeFlag* flag); + // Return true if `flag` is removed. + bool OptimizeForParameter(HShouldDeoptimizeFlag* flag, HInstruction* receiver); + // Return true if `flag` is removed. + bool OptimizeWithDominatingGuard(HShouldDeoptimizeFlag* flag, HInstruction* receiver); + // Return true if `flag` is hoisted. + bool HoistGuard(HShouldDeoptimizeFlag* flag, HInstruction* receiver); + + // Record if each block has any CHA guard. It's updated during the + // reverse post order visit. Use int instead of bool since ArenaVector + // does not support bool. + ArenaVector<int> block_has_cha_guard_; + + // The iterator that's being used for this visitor. Need it to manually + // advance the iterator due to removing/moving more than one instruction. + HInstructionIterator* instruction_iterator_; + + // Used to short-circuit the pass when there is no more guards left to visit. + uint32_t number_of_guards_to_visit_; + + DISALLOW_COPY_AND_ASSIGN(CHAGuardVisitor); +}; + +void CHAGuardVisitor::VisitBasicBlock(HBasicBlock* block) { + if (number_of_guards_to_visit_ == 0) { + return; + } + // Skip phis, just iterate through instructions. + HInstructionIterator it(block->GetInstructions()); + instruction_iterator_ = ⁢ + for (; !it.Done(); it.Advance()) { + DCHECK(it.Current()->IsInBlock()); + it.Current()->Accept(this); + } +} + +void CHAGuardVisitor::RemoveGuard(HShouldDeoptimizeFlag* flag) { + HBasicBlock* block = flag->GetBlock(); + HInstruction* compare = flag->GetNext(); + DCHECK(compare->IsNotEqual()); + HInstruction* deopt = compare->GetNext(); + DCHECK(deopt->IsDeoptimize()); + + // Advance instruction iterator first before we remove the guard. + // We need to do it twice since we remove three instructions and the + // visitor is responsible for advancing it once. + instruction_iterator_->Advance(); + instruction_iterator_->Advance(); + block->RemoveInstruction(deopt); + block->RemoveInstruction(compare); + block->RemoveInstruction(flag); +} + +bool CHAGuardVisitor::OptimizeForParameter(HShouldDeoptimizeFlag* flag, + HInstruction* receiver) { + // If some compiled code is invalidated by CHA due to class loading, the + // compiled code will not be entered anymore. So the very fact that the + // compiled code is invoked guarantees that a parameter receiver conforms + // to all the CHA devirtualization assumptions made by the compiled code, + // since all parameter receivers pre-exist any (potential) invalidation of + // the compiled code. + // + // TODO: allow more cases such as a phi whose inputs are all parameters. + if (receiver->IsParameterValue()) { + RemoveGuard(flag); + return true; + } + return false; +} + +bool CHAGuardVisitor::OptimizeWithDominatingGuard(HShouldDeoptimizeFlag* flag, + HInstruction* receiver) { + // If there is another guard that dominates the current guard, and + // that guard is dominated by receiver's definition, then the current + // guard can be eliminated, since receiver must pre-exist that other + // guard, and passing that guard guarantees that receiver conforms to + // all the CHA devirtualization assumptions. + HBasicBlock* dominator = flag->GetBlock(); + HBasicBlock* receiver_def_block = receiver->GetBlock(); + + // Complexity of the following algorithm: + // We potentially need to traverse the full dominator chain to receiver_def_block, + // plus a (partial) linear search within one block for each guard. + // So the worst case for each guard is bounded by the size of the + // biggest block plus the depth of the dominating tree. + + while (dominator != receiver_def_block) { + if (block_has_cha_guard_[dominator->GetBlockId()] == 1) { + RemoveGuard(flag); + return true; + } + dominator = dominator->GetDominator(); + } + + // At this point dominator is the block where receiver is defined. + // We do a linear search within dominator to see if there is a guard after + // receiver's definition. + HInstruction* instruction; + if (dominator == flag->GetBlock()) { + // Flag and receiver are defined in the same block. Search backward from + // the current guard. + instruction = flag->GetPrevious(); + } else { + // Search backward from the last instruction of that dominator. + instruction = dominator->GetLastInstruction(); + } + while (instruction != receiver) { + if (instruction == nullptr) { + // receiver must be defined in this block, we didn't find it + // in the instruction list, so it must be a Phi. + DCHECK(receiver->IsPhi()); + break; + } + if (instruction->IsShouldDeoptimizeFlag()) { + RemoveGuard(flag); + return true; + } + instruction = instruction->GetPrevious(); + } + return false; +} + +bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag, + HInstruction* receiver) { + // If receiver is loop invariant, we can hoist the guard out of the + // loop since passing a guard before entering the loop guarantees that + // receiver conforms to all the CHA devirtualization assumptions. + // We only hoist guards out of the inner loop since that offers most of the + // benefit and it might help remove other guards in the inner loop. + HBasicBlock* block = flag->GetBlock(); + HLoopInformation* loop_info = block->GetLoopInformation(); + if (loop_info != nullptr && + !loop_info->IsIrreducible() && + loop_info->IsDefinedOutOfTheLoop(receiver)) { + HInstruction* compare = flag->GetNext(); + DCHECK(compare->IsNotEqual()); + HInstruction* deopt = compare->GetNext(); + DCHECK(deopt->IsDeoptimize()); + + // Advance instruction iterator first before we move the guard. + // We need to do it twice since we move three instructions and the + // visitor is responsible for advancing it once. + instruction_iterator_->Advance(); + instruction_iterator_->Advance(); + + HBasicBlock* pre_header = loop_info->GetPreHeader(); + flag->MoveBefore(pre_header->GetLastInstruction()); + compare->MoveBefore(pre_header->GetLastInstruction()); + + block->RemoveInstruction(deopt); + HInstruction* suspend = loop_info->GetSuspendCheck(); + // Need a new deoptimize instruction that copies the environment + // of the suspend instruction for the loop. + HDeoptimize* deoptimize = + new (GetGraph()->GetArena()) HDeoptimize(compare, suspend->GetDexPc()); + pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); + deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( + suspend->GetEnvironment(), loop_info->GetHeader()); + block_has_cha_guard_[pre_header->GetBlockId()] = 1; + GetGraph()->IncrementNumberOfCHAGuards(); + return true; + } + return false; +} + +void CHAGuardVisitor::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { + number_of_guards_to_visit_--; + HInstruction* receiver = flag->InputAt(0); + // Don't need the receiver anymore. + flag->RemoveInputAt(0); + if (receiver->IsNullCheck()) { + receiver = receiver->InputAt(0); + } + + if (OptimizeForParameter(flag, receiver)) { + DCHECK(!flag->IsInBlock()); + return; + } + if (OptimizeWithDominatingGuard(flag, receiver)) { + DCHECK(!flag->IsInBlock()); + return; + } + if (HoistGuard(flag, receiver)) { + DCHECK(flag->IsInBlock()); + return; + } + + // Need to keep the CHA guard in place. + block_has_cha_guard_[flag->GetBlock()->GetBlockId()] = 1; + GetGraph()->IncrementNumberOfCHAGuards(); +} + +void CHAGuardOptimization::Run() { + if (graph_->GetNumberOfCHAGuards() == 0) { + return; + } + CHAGuardVisitor visitor(graph_); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + visitor.VisitBasicBlock(block); + } +} + +} // namespace art diff --git a/compiler/optimizing/cha_guard_optimization.h b/compiler/optimizing/cha_guard_optimization.h new file mode 100644 index 0000000000..ba0cdb81fd --- /dev/null +++ b/compiler/optimizing/cha_guard_optimization.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_ +#define ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_ + +#include "optimization.h" + +namespace art { + +/** + * Optimize CHA guards by removing/moving them. + */ +class CHAGuardOptimization : public HOptimization { + public: + explicit CHAGuardOptimization(HGraph* graph) + : HOptimization(graph, kCHAGuardOptimizationPassName) {} + + void Run() OVERRIDE; + + static constexpr const char* kCHAGuardOptimizationPassName = "cha_guard_optimization"; + + private: + DISALLOW_COPY_AND_ASSIGN(CHAGuardOptimization); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_ diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 1df00fff9a..517bdbf4c2 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -805,7 +805,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL // as-is. vixl32::Label done; __ Cmp(temp1_, ref_reg); - __ B(eq, &done); + __ B(eq, &done, /* far_target */ false); // Update the the holder's field atomically. This may fail if // mutator updates before us, but it's OK. This is achieved @@ -857,11 +857,11 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL __ clrex(ne); } - __ B(ne, &exit_loop); + __ B(ne, &exit_loop, /* far_target */ false); __ Strex(tmp, value, MemOperand(tmp_ptr)); __ Cmp(tmp, 1); - __ B(eq, &loop_head); + __ B(eq, &loop_head, /* far_target */ false); __ Bind(&exit_loop); @@ -3642,7 +3642,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { __ And(shift_right, RegisterFrom(rhs), 0x1F); __ Lsrs(shift_left, RegisterFrom(rhs), 6); __ Rsb(LeaveFlags, shift_left, shift_right, Operand::From(kArmBitsPerWord)); - __ B(cc, &shift_by_32_plus_shift_right); + __ B(cc, &shift_by_32_plus_shift_right, /* far_target */ false); // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right). // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right). @@ -4114,8 +4114,8 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { } case Primitive::kPrimLong: { __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare. - __ B(lt, &less); - __ B(gt, &greater); + __ B(lt, &less, /* far_target */ false); + __ B(gt, &greater, /* far_target */ false); // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags. __ Mov(out, 0); __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare. @@ -4136,8 +4136,8 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { UNREACHABLE(); } - __ B(eq, &done); - __ B(less_cond, &less); + __ B(eq, &done, /* far_target */ false); + __ B(less_cond, &less, /* far_target */ false); __ Bind(&greater); __ Mov(out, 1); @@ -4934,7 +4934,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); - __ B(cs, &uncompressed_load); + __ B(cs, &uncompressed_load, /* far_target */ false); GetAssembler()->LoadFromOffset(kLoadUnsignedByte, RegisterFrom(out_loc), obj, @@ -4973,7 +4973,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); - __ B(cs, &uncompressed_load); + __ B(cs, &uncompressed_load, /* far_target */ false); __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0)); __ B(&done); __ Bind(&uncompressed_load); @@ -5272,7 +5272,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { if (instruction->StaticTypeOfArrayIsObjectArray()) { vixl32::Label do_put; - __ B(eq, &do_put); + __ B(eq, &do_put, /* far_target */ false); // If heap poisoning is enabled, the `temp1` reference has // not been unpoisoned yet; unpoison it now. GetAssembler()->MaybeUnpoisonHeapReference(temp1); @@ -6213,7 +6213,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) kCompilerReadBarrierOption); __ Cmp(out, cls); // Classes must be equal for the instanceof to succeed. - __ B(ne, &zero); + __ B(ne, &zero, /* far_target */ false); __ Mov(out, 1); __ B(&done); break; @@ -6240,7 +6240,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done, /* far_target */ false); __ Cmp(out, cls); - __ B(ne, &loop); + __ B(ne, &loop, /* far_target */ false); __ Mov(out, 1); if (zero.IsReferenced()) { __ B(&done); @@ -6260,7 +6260,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) vixl32::Label loop, success; __ Bind(&loop); __ Cmp(out, cls); - __ B(eq, &success); + __ B(eq, &success, /* far_target */ false); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, @@ -6289,7 +6289,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) // Do an exact check. vixl32::Label exact_check; __ Cmp(out, cls); - __ B(eq, &exact_check); + __ B(eq, &exact_check, /* far_target */ false); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -6491,7 +6491,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { // Otherwise, compare the classes. __ Cmp(temp, cls); - __ B(ne, &loop); + __ B(ne, &loop, /* far_target */ false); break; } @@ -6508,7 +6508,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { vixl32::Label loop; __ Bind(&loop); __ Cmp(temp, cls); - __ B(eq, &done); + __ B(eq, &done, /* far_target */ false); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, @@ -6536,7 +6536,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { // Do an exact check. __ Cmp(temp, cls); - __ B(eq, &done); + __ B(eq, &done, /* far_target */ false); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ @@ -6600,7 +6600,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2); // Compare the classes and continue the loop if they do not match. __ Cmp(cls, RegisterFrom(maybe_temp3_loc)); - __ B(ne, &start_loop); + __ B(ne, &start_loop, /* far_target */ false); break; } } diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 456c5c6a92..897b719180 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -5689,11 +5689,7 @@ void InstructionCodeGeneratorMIPS::VisitClearException(HClearException* clear AT } void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier) - ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) - ? LocationSummary::kCallOnMainOnly - : LocationSummary::kCallOnSlowPath) - : LocationSummary::kNoCall; + LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); switch (load_kind) { @@ -5743,14 +5739,12 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { switch (load_kind) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!kEmitCompilerReadBarrier); __ LoadLiteral(out, base_or_current_method_reg, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(), load->GetStringIndex())); return; // No dex cache slow path. case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(!kEmitCompilerReadBarrier); DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_); @@ -5758,7 +5752,6 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { - DCHECK(!kEmitCompilerReadBarrier); DCHECK_NE(load->GetAddress(), 0u); uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); __ LoadLiteral(out, diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 44d3759978..cf9f9d4b77 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -91,8 +91,6 @@ Location InvokeDexCallingConventionVisitorMIPS64::GetNextLocation(Primitive::Typ // Space on the stack is reserved for all arguments. stack_index_ += Primitive::Is64BitType(type) ? 2 : 1; - // TODO: review - // TODO: shouldn't we use a whole machine word per argument on the stack? // Implicit 4-byte method pointer (and such) will cause misalignment. @@ -235,6 +233,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; + HLoadString* load = instruction_->AsLoadString(); const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex().index_; __ LoadConst32(calling_convention.GetRegisterAt(0), string_index); mips64_codegen->InvokeRuntime(kQuickResolveString, @@ -248,6 +247,17 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { type); RestoreLiveRegisters(codegen, locations); + + // Store the resolved String to the BSS entry. + // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the + // .bss entry address in the fast path, so that we can avoid another calculation here. + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + DCHECK_NE(out, AT); + CodeGeneratorMIPS64::PcRelativePatchInfo* info = + mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info, AT); + __ Sw(out, AT, /* placeholder */ 0x5678); + __ Bc(GetExitLabel()); } @@ -401,6 +411,8 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, move_resolver_(graph->GetArena(), this), assembler_(graph->GetArena()), isa_features_(isa_features), + uint32_literals_(std::less<uint32_t>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), method_patches_(MethodReferenceComparator(), @@ -408,7 +420,15 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(StringReferenceValueComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(TypeReferenceValueComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_address_patches_(std::less<uint32_t>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Save RA (containing the return address) to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(RA)); } @@ -907,7 +927,12 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat method_patches_.size() + call_patches_.size() + pc_relative_dex_cache_patches_.size() + - relative_call_patches_.size(); + relative_call_patches_.size() + + pc_relative_string_patches_.size() + + pc_relative_type_patches_.size() + + boot_image_string_patches_.size() + + boot_image_type_patches_.size() + + boot_image_address_patches_.size(); linker_patches->reserve(size); for (const auto& entry : method_patches_) { const MethodReference& target_method = entry.first; @@ -937,6 +962,50 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat linker_patches->push_back( LinkerPatch::RelativeCodePatch(pc_rel_offset, &dex_file, method_index)); } + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); + } else { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, + linker_patches); + } + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, + linker_patches); + for (const auto& entry : boot_image_string_patches_) { + const StringReference& target_string = entry.first; + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::StringPatch(literal_offset, + target_string.dex_file, + target_string.string_index.index_)); + } + for (const auto& entry : boot_image_type_patches_) { + const TypeReference& target_type = entry.first; + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, + target_type.dex_file, + target_type.type_index.index_)); + } + for (const auto& entry : boot_image_address_patches_) { + DCHECK(GetCompilerOptions().GetIncludePatchInformation()); + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset)); + } +} + +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch( + const DexFile& dex_file, uint32_t string_index) { + return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_); +} + +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeTypePatch( + const DexFile& dex_file, dex::TypeIndex type_index) { + return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_); } CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeDexCacheArrayPatch( @@ -955,6 +1024,12 @@ CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativePatc return &patches->back(); } +Literal* CodeGeneratorMIPS64::DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map) { + return map->GetOrCreate( + value, + [this, value]() { return __ NewLiteral<uint32_t>(value); }); +} + Literal* CodeGeneratorMIPS64::DeduplicateUint64Literal(uint64_t value) { return uint64_literals_.GetOrCreate( value, @@ -976,13 +1051,33 @@ Literal* CodeGeneratorMIPS64::DeduplicateMethodCodeLiteral(MethodReference targe return DeduplicateMethodLiteral(target_method, &call_patches_); } +Literal* CodeGeneratorMIPS64::DeduplicateBootImageStringLiteral(const DexFile& dex_file, + dex::StringIndex string_index) { + return boot_image_string_patches_.GetOrCreate( + StringReference(&dex_file, string_index), + [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); +} + +Literal* CodeGeneratorMIPS64::DeduplicateBootImageTypeLiteral(const DexFile& dex_file, + dex::TypeIndex type_index) { + return boot_image_type_patches_.GetOrCreate( + TypeReference(&dex_file, type_index), + [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); +} + +Literal* CodeGeneratorMIPS64::DeduplicateBootImageAddressLiteral(uint64_t address) { + bool needs_patch = GetCompilerOptions().GetIncludePatchInformation(); + Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_; + return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map); +} + void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, GpuRegister out) { __ Bind(&info->pc_rel_label); // Add the high half of a 32-bit offset to PC. __ Auipc(out, /* placeholder */ 0x1234); // The immediately following instruction will add the sign-extended low half of the 32-bit - // offset to `out` (e.g. ld, jialc, addiu). + // offset to `out` (e.g. ld, jialc, daddiu). } void CodeGeneratorMIPS64::SetupBlockedRegisters() const { @@ -1008,8 +1103,6 @@ void CodeGeneratorMIPS64::SetupBlockedRegisters() const { // Reserve T9 for function calls blocked_core_registers_[T9] = true; - // TODO: review; anything else? - if (GetGraph()->IsDebuggable()) { // Stubs do not save callee-save floating point registers. If the graph // is debuggable, we need to deal with these registers differently. For @@ -2929,6 +3022,31 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceFieldSet(HInstanceFieldSet* in HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); } +void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad( + HInstruction* instruction ATTRIBUTE_UNUSED, + Location root, + GpuRegister obj, + uint32_t offset) { + // When handling HLoadClass::LoadKind::kDexCachePcRelative, the caller calls + // EmitPcRelativeAddressPlaceholderHigh() and then GenerateGcRootFieldLoad(). + // The relative patcher expects the two methods to emit the following patchable + // sequence of instructions in this case: + // auipc reg1, 0x1234 // 0x1234 is a placeholder for offset_high. + // lwu reg2, 0x5678(reg1) // 0x5678 is a placeholder for offset_low. + // TODO: Adjust GenerateGcRootFieldLoad() and its caller when this method is + // extended (e.g. for read barriers) so as not to break the relative patcher. + GpuRegister root_reg = root.AsRegister<GpuRegister>(); + if (kEmitCompilerReadBarrier) { + UNIMPLEMENTED(FATAL) << "for read barrier"; + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. + } +} + void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath; @@ -3080,16 +3198,69 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codeg } HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) { - // TODO: Implement other kinds. - return HLoadString::LoadKind::kDexCacheViaMethod; + HLoadString::LoadKind desired_string_load_kind) { + if (kEmitCompilerReadBarrier) { + UNIMPLEMENTED(FATAL) << "for read barrier"; + } + bool fallback_load = false; + switch (desired_string_load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!GetCompilerOptions().GetCompilePic()); + break; + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().GetCompilePic()); + break; + case HLoadString::LoadKind::kBootImageAddress: + break; + case HLoadString::LoadKind::kBssEntry: + DCHECK(!Runtime::Current()->UseJitCompilation()); + break; + case HLoadString::LoadKind::kDexCacheViaMethod: + break; + case HLoadString::LoadKind::kJitTableAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + // TODO: implement. + fallback_load = true; + break; + } + if (fallback_load) { + desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + } + return desired_string_load_kind; } HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { - DCHECK_NE(desired_class_load_kind, HLoadClass::LoadKind::kReferrersClass); - // TODO: Implement other kinds. - return HLoadClass::LoadKind::kDexCacheViaMethod; + if (kEmitCompilerReadBarrier) { + UNIMPLEMENTED(FATAL) << "for read barrier"; + } + bool fallback_load = false; + switch (desired_class_load_kind) { + case HLoadClass::LoadKind::kReferrersClass: + break; + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!GetCompilerOptions().GetCompilePic()); + break; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().GetCompilePic()); + break; + case HLoadClass::LoadKind::kBootImageAddress: + break; + case HLoadClass::LoadKind::kJitTableAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + // TODO: implement. + fallback_load = true; + break; + case HLoadClass::LoadKind::kDexCachePcRelative: + DCHECK(!Runtime::Current()->UseJitCompilation()); + break; + case HLoadClass::LoadKind::kDexCacheViaMethod: + break; + } + if (fallback_load) { + desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + } + return desired_class_load_kind; } HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStaticOrDirectDispatch( @@ -3271,11 +3442,26 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) } void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { - InvokeRuntimeCallingConvention calling_convention; - CodeGenerator::CreateLoadClassLocationSummary( - cls, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - calling_convention.GetReturnLocation(cls->GetType())); + if (cls->NeedsAccessCheck()) { + InvokeRuntimeCallingConvention calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + calling_convention.GetReturnLocation(Primitive::kPrimNot), + /* code_generator_supports_read_barrier */ false); + return; + } + + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); + if (load_kind == HLoadClass::LoadKind::kReferrersClass || + load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { @@ -3287,35 +3473,90 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { return; } - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>(); - if (cls->IsReferrersClass()) { - DCHECK(!cls->CanCallRuntime()); - DCHECK(!cls->MustGenerateClinitCheck()); - __ LoadFromOffset(kLoadUnsignedWord, out, current_method, - ArtMethod::DeclaringClassOffset().Int32Value()); - } else { - __ LoadFromOffset(kLoadDoubleword, out, current_method, - ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value()); - __ LoadFromOffset( - kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_)); - // TODO: We will need a read barrier here. - if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { - DCHECK(cls->CanCallRuntime()); - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( - cls, - cls, - cls->GetDexPc(), - cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - if (!cls->IsInDexCache()) { - __ Beqzc(out, slow_path->GetEntryLabel()); - } - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); - } + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); + Location out_loc = locations->Out(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); + GpuRegister current_method_reg = ZERO; + if (load_kind == HLoadClass::LoadKind::kReferrersClass || + load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + current_method_reg = locations->InAt(0).AsRegister<GpuRegister>(); + } + + bool generate_null_check = false; + switch (load_kind) { + case HLoadClass::LoadKind::kReferrersClass: + DCHECK(!cls->CanCallRuntime()); + DCHECK(!cls->MustGenerateClinitCheck()); + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad(cls, + out_loc, + current_method_reg, + ArtMethod::DeclaringClassOffset().Int32Value()); + break; + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!kEmitCompilerReadBarrier); + __ LoadLiteral(out, + kLoadUnsignedWord, + codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), + cls->GetTypeIndex())); + break; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(!kEmitCompilerReadBarrier); + CodeGeneratorMIPS64::PcRelativePatchInfo* info = + codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT); + __ Daddiu(out, AT, /* placeholder */ 0x5678); + break; + } + case HLoadClass::LoadKind::kBootImageAddress: { + DCHECK(!kEmitCompilerReadBarrier); + DCHECK_NE(cls->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); + __ LoadLiteral(out, + kLoadUnsignedWord, + codegen_->DeduplicateBootImageAddressLiteral(address)); + break; + } + case HLoadClass::LoadKind::kJitTableAddress: { + LOG(FATAL) << "Unimplemented"; + break; + } + case HLoadClass::LoadKind::kDexCachePcRelative: { + uint32_t element_offset = cls->GetDexCacheElementOffset(); + CodeGeneratorMIPS64::PcRelativePatchInfo* info = + codegen_->NewPcRelativeDexCacheArrayPatch(cls->GetDexFile(), element_offset); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT); + // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ + GenerateGcRootFieldLoad(cls, out_loc, AT, /* placeholder */ 0x5678); + generate_null_check = !cls->IsInDexCache(); + break; + } + case HLoadClass::LoadKind::kDexCacheViaMethod: { + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + __ LoadFromOffset(kLoadDoubleword, + out, + current_method_reg, + ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value()); + // /* GcRoot<mirror::Class> */ out = out[type_index] + size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_); + GenerateGcRootFieldLoad(cls, out_loc, out, offset); + generate_null_check = !cls->IsInDexCache(); + } + } + + if (generate_null_check || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (generate_null_check) { + __ Beqzc(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); } } } @@ -3344,20 +3585,68 @@ void InstructionCodeGeneratorMIPS64::VisitClearException(HClearException* clear } void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = load->NeedsEnvironment() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; + HLoadString::LoadKind load_kind = load->GetLoadKind(); + LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + InvokeRuntimeCallingConvention calling_convention; + locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); + } else { + locations->SetOut(Location::RequiresRegister()); + } } void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) { + HLoadString::LoadKind load_kind = load->GetLoadKind(); + LocationSummary* locations = load->GetLocations(); + Location out_loc = locations->Out(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); + + switch (load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimeAddress: + __ LoadLiteral(out, + kLoadUnsignedWord, + codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(), + load->GetStringIndex())); + return; // No dex cache slow path. + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT); + __ Daddiu(out, AT, /* placeholder */ 0x5678); + return; // No dex cache slow path. + } + case HLoadString::LoadKind::kBootImageAddress: { + DCHECK_NE(load->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); + __ LoadLiteral(out, + kLoadUnsignedWord, + codegen_->DeduplicateBootImageAddressLiteral(address)); + return; // No dex cache slow path. + } + case HLoadString::LoadKind::kBssEntry: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT); + __ Lwu(out, AT, /* placeholder */ 0x5678); + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); + codegen_->AddSlowPath(slow_path); + __ Beqzc(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + default: + break; + } + // TODO: Re-add the compiler code to do string dex cache lookup again. - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); - codegen_->AddSlowPath(slow_path); - __ Bc(slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); + InvokeRuntimeCallingConvention calling_convention; + __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); + codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); } void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) { diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 067c1f940f..cbd4957e4a 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -22,6 +22,7 @@ #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/mips64/assembler_mips64.h" +#include "utils/type_reference.h" namespace art { namespace mips64 { @@ -227,6 +228,15 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + GpuRegister obj, + uint32_t offset); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, Mips64Label* true_target, @@ -375,16 +385,31 @@ class CodeGeneratorMIPS64 : public CodeGenerator { Mips64Label pc_rel_label; }; + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index); + PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); PcRelativePatchInfo* NewPcRelativeCallPatch(const DexFile& dex_file, uint32_t method_index); + Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, + dex::StringIndex string_index); + Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index); + Literal* DeduplicateBootImageAddressLiteral(uint64_t address); void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, GpuRegister out); private: + using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, Literal*>; using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; + using BootStringToLiteralMap = ArenaSafeMap<StringReference, + Literal*, + StringReferenceValueComparator>; + using BootTypeToLiteralMap = ArenaSafeMap<TypeReference, + Literal*, + TypeReferenceValueComparator>; + + Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); Literal* DeduplicateUint64Literal(uint64_t value); Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); Literal* DeduplicateMethodAddressLiteral(MethodReference target_method); @@ -407,6 +432,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator { Mips64Assembler assembler_; const Mips64InstructionSetFeatures& isa_features_; + // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. + Uint32ToLiteralMap uint32_literals_; // Deduplication map for 64-bit literals, used for non-patchable method address or method code // address. Uint64ToLiteralMap uint64_literals_; @@ -416,6 +443,16 @@ class CodeGeneratorMIPS64 : public CodeGenerator { // PC-relative patch info. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; ArenaDeque<PcRelativePatchInfo> relative_call_patches_; + // Deduplication map for boot string literals for kBootImageLinkTimeAddress. + BootStringToLiteralMap boot_image_string_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // Deduplication map for boot type literals for kBootImageLinkTimeAddress. + BootTypeToLiteralMap boot_image_type_patches_; + // PC-relative type patch info. + ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + // Deduplication map for patchable boot image addresses. + Uint32ToLiteralMap boot_image_address_patches_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS64); }; diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 5456b1e9bf..88473f02e5 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -431,16 +431,17 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferAddSub(Indu if (IsNarrowingLinear(a) || IsNarrowingLinear(b)) { return nullptr; // no transfer } else if (a->induction_class == kInvariant && b->induction_class == kInvariant) { - return CreateInvariantOp(op, a, b); + return CreateInvariantOp(op, a, b); // direct invariant } else if ((a->induction_class == kLinear && b->induction_class == kLinear) || (a->induction_class == kPolynomial && b->induction_class == kPolynomial)) { - return CreateInduction(a->induction_class, - a->operation, - TransferAddSub(a->op_a, b->op_a, op), - TransferAddSub(a->op_b, b->op_b, op), - /*fetch*/ nullptr, - type_); + // Rule induc(a, b) + induc(a', b') -> induc(a + a', b + b'). + InductionInfo* new_a = TransferAddSub(a->op_a, b->op_a, op); + InductionInfo* new_b = TransferAddSub(a->op_b, b->op_b, op); + if (new_a != nullptr && new_b != nullptr) { + return CreateInduction(a->induction_class, a->operation, new_a, new_b, a->fetch, type_); + } } else if (a->induction_class == kInvariant) { + // Rule a + induc(a', b') -> induc(a', a + b') or induc(a + a', a + b'). InductionInfo* new_a = b->op_a; InductionInfo* new_b = TransferAddSub(a, b->op_b, op); if (b->induction_class == kWrapAround || b->induction_class == kPeriodic) { @@ -448,14 +449,19 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferAddSub(Indu } else if (op == kSub) { // Negation required. new_a = TransferNeg(new_a); } - return CreateInduction(b->induction_class, b->operation, new_a, new_b, b->fetch, type_); + if (new_a != nullptr && new_b != nullptr) { + return CreateInduction(b->induction_class, b->operation, new_a, new_b, b->fetch, type_); + } } else if (b->induction_class == kInvariant) { + // Rule induc(a, b) + b' -> induc(a, b + b') or induc(a + b', b + b'). InductionInfo* new_a = a->op_a; InductionInfo* new_b = TransferAddSub(a->op_b, b, op); if (a->induction_class == kWrapAround || a->induction_class == kPeriodic) { new_a = TransferAddSub(new_a, b, op); } - return CreateInduction(a->induction_class, a->operation, new_a, new_b, a->fetch, type_); + if (new_a != nullptr && new_b != nullptr) { + return CreateInduction(a->induction_class, a->operation, new_a, new_b, a->fetch, type_); + } } } return nullptr; @@ -468,14 +474,14 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferNeg(Inducti if (IsNarrowingLinear(a)) { return nullptr; // no transfer } else if (a->induction_class == kInvariant) { - return CreateInvariantOp(kNeg, nullptr, a); + return CreateInvariantOp(kNeg, nullptr, a); // direct invariant } else if (a->induction_class != kGeometric || a->operation == kMul) { - return CreateInduction(a->induction_class, - a->operation, - TransferNeg(a->op_a), - TransferNeg(a->op_b), - a->fetch, - type_); + // Rule - induc(a, b) -> induc(-a, -b). + InductionInfo* new_a = TransferNeg(a->op_a); + InductionInfo* new_b = TransferNeg(a->op_b); + if (new_a != nullptr && new_b != nullptr) { + return CreateInduction(a->induction_class, a->operation, new_a, new_b, a->fetch, type_); + } } } return nullptr; @@ -490,23 +496,23 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferMul(Inducti if (IsNarrowingLinear(a) || IsNarrowingLinear(b)) { return nullptr; // no transfer } else if (a->induction_class == kInvariant && b->induction_class == kInvariant) { - return CreateInvariantOp(kMul, a, b); + return CreateInvariantOp(kMul, a, b); // direct invariant } else if (a->induction_class == kInvariant && (b->induction_class != kGeometric || b->operation == kMul)) { - return CreateInduction(b->induction_class, - b->operation, - TransferMul(a, b->op_a), - TransferMul(a, b->op_b), - b->fetch, - type_); + // Rule a * induc(a', b') -> induc(a * a', b * b'). + InductionInfo* new_a = TransferMul(a, b->op_a); + InductionInfo* new_b = TransferMul(a, b->op_b); + if (new_a != nullptr && new_b != nullptr) { + return CreateInduction(b->induction_class, b->operation, new_a, new_b, b->fetch, type_); + } } else if (b->induction_class == kInvariant && (a->induction_class != kGeometric || a->operation == kMul)) { - return CreateInduction(a->induction_class, - a->operation, - TransferMul(a->op_a, b), - TransferMul(a->op_b, b), - a->fetch, - type_); + // Rule induc(a, b) * b' -> induc(a * b', b * b'). + InductionInfo* new_a = TransferMul(a->op_a, b); + InductionInfo* new_b = TransferMul(a->op_b, b); + if (new_a != nullptr && new_b != nullptr) { + return CreateInduction(a->induction_class, a->operation, new_a, new_b, a->fetch, type_); + } } } return nullptr; @@ -522,7 +528,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferConversion( if (IsNarrowingIntegralConversion(from, to) && a->induction_class == kLinear && (a->type == to || IsNarrowingIntegralConversion(a->type, to))) { - return CreateInduction(kLinear, kNop, a->op_a, a->op_b, /*fetch*/ nullptr, to); + return CreateInduction(kLinear, kNop, a->op_a, a->op_b, a->fetch, to); } } return nullptr; @@ -600,17 +606,15 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveAddSub(HLoopIn return CreateInvariantOp(op, a, b); } } - } else if (b->induction_class == kLinear) { + } else if (b->induction_class == kLinear && b->type == type_) { // Solve within a tight cycle that adds a term that is already classified as a linear // induction for a polynomial induction k = k + i (represented as sum over linear terms). if (x == entry_phi && entry_phi->InputCount() == 2 && instruction == entry_phi->InputAt(1)) { InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0)); - return CreateInduction(kPolynomial, - kNop, - op == kAdd ? b : TransferNeg(b), - initial, - /*fetch*/ nullptr, - type_); + InductionInfo* new_a = op == kAdd ? b : TransferNeg(b); + if (new_a != nullptr) { + return CreateInduction(kPolynomial, kNop, new_a, initial, /*fetch*/ nullptr, type_); + } } } } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index fe4662abb1..d84787984d 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -480,13 +480,11 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, } // We successfully inlined, now add a guard. - bool is_referrer = - (GetMonomorphicType(classes) == outermost_graph_->GetArtMethod()->GetDeclaringClass()); AddTypeGuard(receiver, cursor, bb_cursor, class_index, - is_referrer, + GetMonomorphicType(classes), invoke_instruction, /* with_deoptimization */ true); @@ -506,52 +504,62 @@ void HInliner::AddCHAGuard(HInstruction* invoke_instruction, uint32_t dex_pc, HInstruction* cursor, HBasicBlock* bb_cursor) { - HInstruction* deopt_flag = new (graph_->GetArena()) HShouldDeoptimizeFlag(dex_pc); - HInstruction* should_deopt = new (graph_->GetArena()) HNotEqual( + HShouldDeoptimizeFlag* deopt_flag = new (graph_->GetArena()) + HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc); + HInstruction* compare = new (graph_->GetArena()) HNotEqual( deopt_flag, graph_->GetIntConstant(0, dex_pc)); - HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(should_deopt, dex_pc); + HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(compare, dex_pc); if (cursor != nullptr) { bb_cursor->InsertInstructionAfter(deopt_flag, cursor); } else { bb_cursor->InsertInstructionBefore(deopt_flag, bb_cursor->GetFirstInstruction()); } - bb_cursor->InsertInstructionAfter(should_deopt, deopt_flag); - bb_cursor->InsertInstructionAfter(deopt, should_deopt); + bb_cursor->InsertInstructionAfter(compare, deopt_flag); + bb_cursor->InsertInstructionAfter(deopt, compare); + + // Add receiver as input to aid CHA guard optimization later. + deopt_flag->AddInput(invoke_instruction->InputAt(0)); + DCHECK_EQ(deopt_flag->InputCount(), 1u); deopt->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); + outermost_graph_->IncrementNumberOfCHAGuards(); } HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, HInstruction* cursor, HBasicBlock* bb_cursor, dex::TypeIndex class_index, - bool is_referrer, + mirror::Class* klass, HInstruction* invoke_instruction, bool with_deoptimization) { + ScopedAssertNoThreadSuspension sants("Adding compiler type guard"); + ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); HInstanceFieldGet* receiver_class = BuildGetReceiverClass( class_linker, receiver, invoke_instruction->GetDexPc()); + if (cursor != nullptr) { + bb_cursor->InsertInstructionAfter(receiver_class, cursor); + } else { + bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction()); + } const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + bool is_referrer = (klass == outermost_graph_->GetArtMethod()->GetDeclaringClass()); // Note that we will just compare the classes, so we don't need Java semantics access checks. - // Also, the caller of `AddTypeGuard` must have guaranteed that the class is in the dex cache. + // Note that the type index and the dex file are relative to the method this type guard is + // inlined into. HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(), class_index, caller_dex_file, is_referrer, invoke_instruction->GetDexPc(), - /* needs_access_check */ false, - /* is_in_dex_cache */ true, - /* is_in_boot_image */ false); + /* needs_access_check */ false); + bb_cursor->InsertInstructionAfter(load_class, receiver_class); + // Sharpen after adding the instruction, as the sharpening may remove inputs. + HSharpening::SharpenClass(load_class, klass, handles_, codegen_, compiler_driver_); - HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class); // TODO: Extend reference type propagation to understand the guard. - if (cursor != nullptr) { - bb_cursor->InsertInstructionAfter(receiver_class, cursor); - } else { - bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction()); - } - bb_cursor->InsertInstructionAfter(load_class, receiver_class); + HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class); bb_cursor->InsertInstructionAfter(compare, load_class); if (with_deoptimization) { HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( @@ -604,7 +612,6 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, all_targets_inlined = false; } else { one_target_inlined = true; - bool is_referrer = (classes->Get(i) == outermost_graph_->GetArtMethod()->GetDeclaringClass()); // If we have inlined all targets before, and this receiver is the last seen, // we deoptimize instead of keeping the original invoke instruction. @@ -616,8 +623,13 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, // We do not support HDeoptimize in OSR methods. deoptimize = false; } - HInstruction* compare = AddTypeGuard( - receiver, cursor, bb_cursor, class_index, is_referrer, invoke_instruction, deoptimize); + HInstruction* compare = AddTypeGuard(receiver, + cursor, + bb_cursor, + class_index, + classes->Get(i), + invoke_instruction, + deoptimize); if (deoptimize) { if (return_replacement != nullptr) { invoke_instruction->ReplaceWith(return_replacement); diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index ffebd97cb8..0c6436235f 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -167,7 +167,7 @@ class HInliner : public HOptimization { HInstruction* cursor, HBasicBlock* bb_cursor, dex::TypeIndex class_index, - bool is_referrer, + mirror::Class* klass, HInstruction* invoke_instruction, bool with_deoptimization) REQUIRES_SHARED(Locks::mutator_lock_); diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index b97581beb3..1ca3218d77 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -937,9 +937,7 @@ bool HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t d outer_dex_file, IsOutermostCompilingClass(type_index), dex_pc, - needs_access_check, - /* is_in_dex_cache */ false, - /* is_in_boot_image */ false); + needs_access_check); AppendInstruction(load_class); HInstruction* cls = load_class; @@ -1029,9 +1027,7 @@ HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke( outer_dex_file, is_outer_class, dex_pc, - /*needs_access_check*/ false, - /* is_in_dex_cache */ false, - /* is_in_boot_image */ false); + /*needs_access_check*/ false); AppendInstruction(load_class); clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); AppendInstruction(clinit_check); @@ -1388,9 +1384,7 @@ bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, outer_dex_file, is_outer_class, dex_pc, - /*needs_access_check*/ false, - /* is_in_dex_cache */ false, - /* is_in_boot_image */ false); + /*needs_access_check*/ false); AppendInstruction(constant); HInstruction* cls = constant; @@ -1664,9 +1658,7 @@ void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction, dex_file, IsOutermostCompilingClass(type_index), dex_pc, - !can_access, - /* is_in_dex_cache */ false, - /* is_in_boot_image */ false); + !can_access); AppendInstruction(cls); TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class); @@ -2656,9 +2648,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, *dex_file_, IsOutermostCompilingClass(type_index), dex_pc, - !can_access, - /* is_in_dex_cache */ false, - /* is_in_boot_image */ false)); + !can_access)); UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction()); break; } diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 641a5c92ea..85e84d8d2c 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -187,7 +187,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { assembler->MaybePoisonHeapReference(tmp); __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); __ Cmp(src_curr_addr, src_stop_addr); - __ B(ne, &loop); + __ B(ne, &loop, /* far_target */ false); __ B(GetExitLabel()); } @@ -851,7 +851,7 @@ static void GenUnsafePut(LocationSummary* locations, __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg)); __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg)); __ Cmp(temp_lo, 0); - __ B(ne, &loop_head); + __ B(ne, &loop_head, /* far_target */ false); } else { __ Strd(value_lo, value_hi, MemOperand(base, offset)); } @@ -1062,7 +1062,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* __ cmp(eq, tmp, 1); } - __ B(eq, &loop_head); + __ B(eq, &loop_head, /* far_target */ false); __ Dmb(vixl32::ISH); @@ -1238,23 +1238,23 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { __ Ldr(temp_reg, MemOperand(str, temp1)); __ Ldr(temp2, MemOperand(arg, temp1)); __ Cmp(temp_reg, temp2); - __ B(ne, &find_char_diff); + __ B(ne, &find_char_diff, /* far_target */ false); __ Add(temp1, temp1, char_size * 2); __ Ldr(temp_reg, MemOperand(str, temp1)); __ Ldr(temp2, MemOperand(arg, temp1)); __ Cmp(temp_reg, temp2); - __ B(ne, &find_char_diff_2nd_cmp); + __ B(ne, &find_char_diff_2nd_cmp, /* far_target */ false); __ Add(temp1, temp1, char_size * 2); // With string compression, we have compared 8 bytes, otherwise 4 chars. __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4)); - __ B(hi, &loop); + __ B(hi, &loop, /* far_target */ false); __ B(&end); __ Bind(&find_char_diff_2nd_cmp); if (mirror::kUseStringCompression) { __ Subs(temp0, temp0, 4); // 4 bytes previously compared. - __ B(ls, &end); // Was the second comparison fully beyond the end? + __ B(ls, &end, /* far_target */ false); // Was the second comparison fully beyond the end? } else { // Without string compression, we can start treating temp0 as signed // and rely on the signed comparison below. @@ -1282,7 +1282,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { // the remaining string data, so just return length diff (out). // The comparison is unsigned for string compression, otherwise signed. __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4))); - __ B((mirror::kUseStringCompression ? ls : le), &end); + __ B((mirror::kUseStringCompression ? ls : le), &end, /* far_target */ false); // Extract the characters and calculate the difference. if (mirror::kUseStringCompression) { @@ -1349,9 +1349,9 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex)); __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex)); __ Cmp(temp_reg, temp3); - __ B(ne, &different_compression_diff); + __ B(ne, &different_compression_diff, /* far_target */ false); __ Subs(temp0, temp0, 2); - __ B(hi, &different_compression_loop); + __ B(hi, &different_compression_loop, /* far_target */ false); __ B(&end); // Calculate the difference. @@ -1427,7 +1427,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { // Reference equality check, return true if same reference. __ Cmp(str, arg); - __ B(eq, &return_true); + __ B(eq, &return_true, /* far_target */ false); if (!optimizations.GetArgumentIsString()) { // Instanceof check for the argument by comparing class fields. @@ -1437,7 +1437,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { __ Ldr(temp, MemOperand(str, class_offset)); __ Ldr(temp1, MemOperand(arg, class_offset)); __ Cmp(temp, temp1); - __ B(ne, &return_false); + __ B(ne, &return_false, /* far_target */ false); } // Load `count` fields of this and argument strings. @@ -1446,7 +1446,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { // Check if `count` fields are equal, return false if they're not. // Also compares the compression style, if differs return false. __ Cmp(temp, temp1); - __ B(ne, &return_false); + __ B(ne, &return_false, /* far_target */ false); // Return true if both strings are empty. Even with string compression `count == 0` means empty. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); @@ -1477,10 +1477,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { __ Ldr(temp2, MemOperand(arg, temp1)); __ Add(temp1, temp1, Operand::From(sizeof(uint32_t))); __ Cmp(out, temp2); - __ B(ne, &return_false); + __ B(ne, &return_false, /* far_target */ false); // With string compression, we have compared 4 bytes, otherwise 2 chars. __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2); - __ B(hi, &loop); + __ B(hi, &loop, /* far_target */ false); // Return true and exit the function. // If loop does not result in returning false, we return true. @@ -1800,7 +1800,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } else { if (!optimizations.GetDestinationIsSource()) { __ Cmp(src, dest); - __ B(ne, &conditions_on_positions_validated); + __ B(ne, &conditions_on_positions_validated, /* far_target */ false); } __ Cmp(RegisterFrom(dest_pos), src_pos_constant); __ B(gt, intrinsic_slow_path->GetEntryLabel()); @@ -1808,7 +1808,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } else { if (!optimizations.GetDestinationIsSource()) { __ Cmp(src, dest); - __ B(ne, &conditions_on_positions_validated); + __ B(ne, &conditions_on_positions_validated, /* far_target */ false); } if (dest_pos.IsConstant()) { int32_t dest_pos_constant = Int32ConstantFrom(dest_pos); @@ -1916,7 +1916,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { if (optimizations.GetDestinationIsTypedObjectArray()) { vixl32::Label do_copy; - __ B(eq, &do_copy); + __ B(eq, &do_copy, /* far_target */ false); // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); @@ -1976,7 +1976,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { if (optimizations.GetDestinationIsTypedObjectArray()) { vixl32::Label do_copy; - __ B(eq, &do_copy); + __ B(eq, &do_copy, /* far_target */ false); if (!did_unpoison) { assembler->MaybeUnpoisonHeapReference(temp1); } @@ -2069,7 +2069,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // Don't enter copy loop if `length == 0`. __ Cmp(temp1, temp3); - __ B(eq, &done); + __ B(eq, &done, /* far_target */ false); // /* int32_t */ monitor = src->monitor_ __ Ldr(temp2, MemOperand(src, monitor_offset)); @@ -2122,7 +2122,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } __ Cmp(temp1, temp3); - __ B(ne, &loop); + __ B(ne, &loop, /* far_target */ false); __ Bind(read_barrier_slow_path->GetExitLabel()); __ Bind(&done); @@ -2142,7 +2142,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // poison/unpoison. vixl32::Label loop, done; __ Cmp(temp1, temp3); - __ B(eq, &done); + __ B(eq, &done, /* far_target */ false); __ Bind(&loop); { @@ -2154,7 +2154,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } __ Cmp(temp1, temp3); - __ B(ne, &loop); + __ B(ne, &loop, /* far_target */ false); __ Bind(&done); } @@ -2560,7 +2560,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Subs(num_chr, srcEnd, srcBegin); // Early out for valid zero-length retrievals. - __ B(eq, &done); + __ B(eq, &done, /* far_target */ false); // src range to copy. __ Add(src_ptr, srcObj, value_offset); @@ -2576,7 +2576,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Ldr(temp, MemOperand(srcObj, count_offset)); __ Tst(temp, 1); temps.Release(temp); - __ B(eq, &compressed_string_preloop); + __ B(eq, &compressed_string_preloop, /* far_target */ false); } __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1)); @@ -2586,7 +2586,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) temp = temps.Acquire(); // Save repairing the value of num_chr on the < 4 character path. __ Subs(temp, num_chr, 4); - __ B(lt, &remainder); + __ B(lt, &remainder, /* far_target */ false); // Keep the result of the earlier subs, we are going to fetch at least 4 characters. __ Mov(num_chr, temp); @@ -2601,10 +2601,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex)); __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex)); temps.Release(temp); - __ B(ge, &loop); + __ B(ge, &loop, /* far_target */ false); __ Adds(num_chr, num_chr, 4); - __ B(eq, &done); + __ B(eq, &done, /* far_target */ false); // Main loop for < 4 character case and remainder handling. Loads and stores one // 16-bit Java character at a time. @@ -2614,7 +2614,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Subs(num_chr, num_chr, 1); __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); temps.Release(temp); - __ B(gt, &remainder); + __ B(gt, &remainder, /* far_target */ false); if (mirror::kUseStringCompression) { __ B(&done); @@ -2630,7 +2630,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); temps.Release(temp); __ Subs(num_chr, num_chr, 1); - __ B(gt, &compressed_string_loop); + __ B(gt, &compressed_string_loop, /* far_target */ false); } __ Bind(&done); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index cabc0782ca..b9e284f6f8 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1357,7 +1357,9 @@ std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind& void HInstruction::MoveBefore(HInstruction* cursor) { DCHECK(!IsPhi()); DCHECK(!IsControlFlow()); - DCHECK(CanBeMoved()); + DCHECK(CanBeMoved() || + // HShouldDeoptimizeFlag can only be moved by CHAGuardOptimization. + IsShouldDeoptimizeFlag()); DCHECK(!cursor->IsPhi()); next_->previous_ = previous_; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 4a77bed44a..1f0c8e805e 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -330,6 +330,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { invoke_type_(invoke_type), in_ssa_form_(false), should_generate_constructor_barrier_(should_generate_constructor_barrier), + number_of_cha_guards_(0), instruction_set_(instruction_set), cached_null_constant_(nullptr), cached_int_constants_(std::less<int32_t>(), arena->Adapter(kArenaAllocConstantsMap)), @@ -551,9 +552,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { } bool HasShouldDeoptimizeFlag() const { - // TODO: if all CHA guards can be eliminated, there is no need for the flag - // even if cha_single_implementation_list_ is not empty. - return !cha_single_implementation_list_.empty(); + return number_of_cha_guards_ != 0; } bool HasTryCatch() const { return has_try_catch_; } @@ -572,6 +571,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { ReferenceTypeInfo GetInexactObjectRti() const { return inexact_object_rti_; } + uint32_t GetNumberOfCHAGuards() { return number_of_cha_guards_; } + void SetNumberOfCHAGuards(uint32_t num) { number_of_cha_guards_ = num; } + void IncrementNumberOfCHAGuards() { number_of_cha_guards_++; } + private: void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited); @@ -667,6 +670,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { const bool should_generate_constructor_barrier_; + // Number of CHA guards in the graph. Used to short-circuit the + // CHA guard optimization pass when there is no CHA guard left. + uint32_t number_of_cha_guards_; + const InstructionSet instruction_set_; // Cached constants. @@ -2349,6 +2356,11 @@ class HBackwardInstructionIterator : public ValueObject { class HVariableInputSizeInstruction : public HInstruction { public: + using HInstruction::GetInputRecords; // Keep the const version visible. + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE { + return ArrayRef<HUserRecord<HInstruction*>>(inputs_); + } + void AddInput(HInstruction* input); void InsertInputAt(size_t index, HInstruction* input); void RemoveInputAt(size_t index); @@ -2489,11 +2501,6 @@ class HPhi FINAL : public HVariableInputSizeInstruction { bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); } - using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL { - return ArrayRef<HUserRecord<HInstruction*>>(inputs_); - } - Primitive::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); } void SetType(Primitive::Type new_type) { // Make sure that only valid type changes occur. The following are allowed: @@ -2925,14 +2932,20 @@ class HDeoptimize FINAL : public HTemplateInstruction<1> { // if it's true, starts to do deoptimization. // It has a 4-byte slot on stack. // TODO: allocate a register for this flag. -class HShouldDeoptimizeFlag FINAL : public HExpression<0> { +class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { public: - // TODO: use SideEffects to aid eliminating some CHA guards. - explicit HShouldDeoptimizeFlag(uint32_t dex_pc) - : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) { + // CHA guards are only optimized in a separate pass and it has no side effects + // with regard to other passes. + HShouldDeoptimizeFlag(ArenaAllocator* arena, uint32_t dex_pc) + : HVariableInputSizeInstruction(SideEffects::None(), dex_pc, arena, 0, kArenaAllocCHA) { } - // We don't eliminate CHA guards yet. + Primitive::Type GetType() const OVERRIDE { return Primitive::kPrimInt; } + + // We do all CHA guard elimination/motion in a single pass, after which there is no + // further guard elimination/motion since a guard might have been used for justification + // of the elimination of another guard. Therefore, we pretend this guard cannot be moved + // to avoid other optimizations trying to move it. bool CanBeMoved() const OVERRIDE { return false; } DECLARE_INSTRUCTION(ShouldDeoptimizeFlag); @@ -3816,11 +3829,6 @@ class HInvoke : public HVariableInputSizeInstruction { public: bool NeedsEnvironment() const OVERRIDE; - using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE { - return ArrayRef<HUserRecord<HInstruction*>>(inputs_); - } - void SetArgumentAt(size_t index, HInstruction* argument) { SetRawInputAt(index, argument); } @@ -5537,9 +5545,7 @@ class HLoadClass FINAL : public HInstruction { const DexFile& dex_file, bool is_referrers_class, uint32_t dex_pc, - bool needs_access_check, - bool is_in_dex_cache, - bool is_in_boot_image) + bool needs_access_check) : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc), special_input_(HUserRecord<HInstruction*>(current_method)), type_index_(type_index), @@ -5552,8 +5558,8 @@ class HLoadClass FINAL : public HInstruction { SetPackedField<LoadKindField>( is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod); SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check); - SetPackedFlag<kFlagIsInDexCache>(is_in_dex_cache); - SetPackedFlag<kFlagIsInBootImage>(is_in_boot_image); + SetPackedFlag<kFlagIsInDexCache>(false); + SetPackedFlag<kFlagIsInBootImage>(false); SetPackedFlag<kFlagGenerateClInitCheck>(false); } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 0d0f62a55c..4bf5b080a7 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -54,6 +54,7 @@ #include "base/timing_logger.h" #include "bounds_check_elimination.h" #include "builder.h" +#include "cha_guard_optimization.h" #include "code_generator.h" #include "compiled_method.h" #include "compiler.h" @@ -517,6 +518,8 @@ static HOptimization* BuildOptimization( return new (arena) SideEffectsAnalysis(graph); } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) { return new (arena) HLoopOptimization(graph, most_recent_induction); + } else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) { + return new (arena) CHAGuardOptimization(graph); #ifdef ART_ENABLE_CODEGEN_arm } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) { return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats); @@ -779,6 +782,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier$before_codegen"); IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats); + CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph); HOptimization* optimizations1[] = { intrinsics, @@ -807,6 +811,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, fold3, // evaluates code generated by dynamic bce simplify3, lse, + cha_guard, dce3, // The codegen has a few assumptions that only the instruction simplifier // can satisfy. For example, the code generator does not expect to see a diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 91efb80015..91826cf671 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -140,6 +140,25 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { } void HSharpening::ProcessLoadClass(HLoadClass* load_class) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + Runtime* runtime = Runtime::Current(); + ClassLinker* class_linker = runtime->GetClassLinker(); + const DexFile& dex_file = load_class->GetDexFile(); + dex::TypeIndex type_index = load_class->GetTypeIndex(); + Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile()) + ? compilation_unit_.GetDexCache() + : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); + mirror::Class* cls = dex_cache->GetResolvedType(type_index); + SharpenClass(load_class, cls, handles_, codegen_, compiler_driver_); +} + +void HSharpening::SharpenClass(HLoadClass* load_class, + mirror::Class* klass, + VariableSizedHandleScope* handles, + CodeGenerator* codegen, + CompilerDriver* compiler_driver) { + ScopedAssertNoThreadSuspension sants("Sharpening class in compiler"); DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod || load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) << load_class->GetLoadKind(); @@ -153,69 +172,60 @@ void HSharpening::ProcessLoadClass(HLoadClass* load_class) { bool is_in_boot_image = false; HLoadClass::LoadKind desired_load_kind = static_cast<HLoadClass::LoadKind>(-1); uint64_t address = 0u; // Class or dex cache element address. - { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<1> hs(soa.Self()); - Runtime* runtime = Runtime::Current(); - ClassLinker* class_linker = runtime->GetClassLinker(); - Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile()) - ? compilation_unit_.GetDexCache() - : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); - mirror::Class* klass = dex_cache->GetResolvedType(type_index); - if (codegen_->GetCompilerOptions().IsBootImage()) { - // Compiling boot image. Check if the class is a boot image class. - DCHECK(!runtime->UseJitCompilation()); - if (!compiler_driver_->GetSupportBootImageFixup()) { - // MIPS64 or compiler_driver_test. Do not sharpen. - desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; - } else if ((klass != nullptr) && compiler_driver_->IsImageClass( - dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) { - is_in_boot_image = true; - is_in_dex_cache = true; - desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic() - ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative - : HLoadClass::LoadKind::kBootImageLinkTimeAddress; - } else { - // Not a boot image class. We must go through the dex cache. - DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)); - desired_load_kind = HLoadClass::LoadKind::kDexCachePcRelative; - } + Runtime* runtime = Runtime::Current(); + if (codegen->GetCompilerOptions().IsBootImage()) { + // Compiling boot image. Check if the class is a boot image class. + DCHECK(!runtime->UseJitCompilation()); + if (!compiler_driver->GetSupportBootImageFixup()) { + // MIPS64 or compiler_driver_test. Do not sharpen. + desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + } else if ((klass != nullptr) && compiler_driver->IsImageClass( + dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) { + is_in_boot_image = true; + is_in_dex_cache = true; + desired_load_kind = codegen->GetCompilerOptions().GetCompilePic() + ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative + : HLoadClass::LoadKind::kBootImageLinkTimeAddress; } else { - is_in_boot_image = (klass != nullptr) && runtime->GetHeap()->ObjectIsInBootImageSpace(klass); - if (runtime->UseJitCompilation()) { - // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. - // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); - is_in_dex_cache = (klass != nullptr); - if (is_in_boot_image) { - // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787 - desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; - address = reinterpret_cast64<uint64_t>(klass); - } else if (is_in_dex_cache) { - desired_load_kind = HLoadClass::LoadKind::kJitTableAddress; - // We store in the address field the location of the stack reference maintained - // by the handle. We do this now so that the code generation does not need to figure - // out which class loader to use. - address = reinterpret_cast<uint64_t>(handles_->NewHandle(klass).GetReference()); - } else { - // Class not loaded yet. This happens when the dex code requesting - // this `HLoadClass` hasn't been executed in the interpreter. - // Fallback to the dex cache. - // TODO(ngeoffray): Generate HDeoptimize instead. - desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; - } - } else if (is_in_boot_image && !codegen_->GetCompilerOptions().GetCompilePic()) { - // AOT app compilation. Check if the class is in the boot image. + // Not a boot image class. We must go through the dex cache. + DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file)); + desired_load_kind = HLoadClass::LoadKind::kDexCachePcRelative; + } + } else { + is_in_boot_image = (klass != nullptr) && runtime->GetHeap()->ObjectIsInBootImageSpace(klass); + if (runtime->UseJitCompilation()) { + // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. + // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); + is_in_dex_cache = (klass != nullptr); + if (is_in_boot_image) { + // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787 desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; address = reinterpret_cast64<uint64_t>(klass); + } else if (is_in_dex_cache) { + desired_load_kind = HLoadClass::LoadKind::kJitTableAddress; + // We store in the address field the location of the stack reference maintained + // by the handle. We do this now so that the code generation does not need to figure + // out which class loader to use. + address = reinterpret_cast<uint64_t>(handles->NewHandle(klass).GetReference()); } else { - // Not JIT and either the klass is not in boot image or we are compiling in PIC mode. - // Use PC-relative load from the dex cache if the dex file belongs - // to the oat file that we're currently compiling. - desired_load_kind = - ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &load_class->GetDexFile()) - ? HLoadClass::LoadKind::kDexCachePcRelative - : HLoadClass::LoadKind::kDexCacheViaMethod; + // Class not loaded yet. This happens when the dex code requesting + // this `HLoadClass` hasn't been executed in the interpreter. + // Fallback to the dex cache. + // TODO(ngeoffray): Generate HDeoptimize instead. + desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; } + } else if (is_in_boot_image && !codegen->GetCompilerOptions().GetCompilePic()) { + // AOT app compilation. Check if the class is in the boot image. + desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; + address = reinterpret_cast64<uint64_t>(klass); + } else { + // Not JIT and either the klass is not in boot image or we are compiling in PIC mode. + // Use PC-relative load from the dex cache if the dex file belongs + // to the oat file that we're currently compiling. + desired_load_kind = + ContainsElement(compiler_driver->GetDexFilesForOatFile(), &load_class->GetDexFile()) + ? HLoadClass::LoadKind::kDexCachePcRelative + : HLoadClass::LoadKind::kDexCacheViaMethod; } } DCHECK_NE(desired_load_kind, static_cast<HLoadClass::LoadKind>(-1)); @@ -241,7 +251,7 @@ void HSharpening::ProcessLoadClass(HLoadClass* load_class) { load_class->MarkInDexCache(); } - HLoadClass::LoadKind load_kind = codegen_->GetSupportedLoadClassKind(desired_load_kind); + HLoadClass::LoadKind load_kind = codegen->GetSupportedLoadClassKind(desired_load_kind); switch (load_kind) { case HLoadClass::LoadKind::kBootImageLinkTimeAddress: case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: @@ -254,7 +264,7 @@ void HSharpening::ProcessLoadClass(HLoadClass* load_class) { load_class->SetLoadKindWithAddress(load_kind, address); break; case HLoadClass::LoadKind::kDexCachePcRelative: { - PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet()); + PointerSize pointer_size = InstructionSetPointerSize(codegen->GetInstructionSet()); DexCacheArraysLayout layout(pointer_size, &dex_file); size_t element_index = layout.TypeOffset(type_index); load_class->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index); diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index 74189549fd..ae5ccb33ab 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -47,6 +47,14 @@ class HSharpening : public HOptimization { static constexpr const char* kSharpeningPassName = "sharpening"; + // Used internally but also by the inliner. + static void SharpenClass(HLoadClass* load_class, + mirror::Class* klass, + VariableSizedHandleScope* handles, + CodeGenerator* codegen, + CompilerDriver* compiler_driver) + REQUIRES_SHARED(Locks::mutator_lock_); + private: void ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke); void ProcessLoadClass(HLoadClass* load_class); diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc index 453c90ab2e..e5eef37b7b 100644 --- a/compiler/utils/arm/assembler_arm_vixl.cc +++ b/compiler/utils/arm/assembler_arm_vixl.cc @@ -444,7 +444,7 @@ void ArmVIXLMacroAssembler::CompareAndBranchIfZero(vixl32::Register rn, return; } Cmp(rn, 0); - B(eq, label); + B(eq, label, is_far_target); } void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn, @@ -455,16 +455,16 @@ void ArmVIXLMacroAssembler::CompareAndBranchIfNonZero(vixl32::Register rn, return; } Cmp(rn, 0); - B(ne, label); + B(ne, label, is_far_target); } void ArmVIXLMacroAssembler::B(vixl32::Label* label) { if (!label->IsBound()) { // Try to use 16-bit T2 encoding of B instruction. DCHECK(OutsideITBlock()); - ExactAssemblyScope ass(this, - kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); + ExactAssemblyScope guard(this, + k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); b(al, Narrow, label); AddBranchLabel(label); return; @@ -472,7 +472,17 @@ void ArmVIXLMacroAssembler::B(vixl32::Label* label) { MacroAssembler::B(label); } -void ArmVIXLMacroAssembler::B(vixl32::Condition cond, vixl32::Label* label) { +void ArmVIXLMacroAssembler::B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target) { + if (!label->IsBound() && !is_far_target) { + // Try to use 16-bit T2 encoding of B instruction. + DCHECK(OutsideITBlock()); + ExactAssemblyScope guard(this, + k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + b(cond, Narrow, label); + AddBranchLabel(label); + return; + } // To further reduce the Bcc encoding size and use 16-bit T1 encoding, // we can provide a hint to this function: i.e. far_target=false. // By default this function uses 'EncodingSizeType::Best' which generates 32-bit T3 encoding. diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h index 5661249695..3cf6a2ea84 100644 --- a/compiler/utils/arm/assembler_arm_vixl.h +++ b/compiler/utils/arm/assembler_arm_vixl.h @@ -114,7 +114,7 @@ class ArmVIXLMacroAssembler FINAL : public vixl32::MacroAssembler { // TODO: Remove when MacroAssembler::Add(FlagsUpdate, Condition, Register, Register, Operand) // makes the right decision about 16-bit encodings. void Add(vixl32::Register rd, vixl32::Register rn, const vixl32::Operand& operand) { - if (rd.Is(rn)) { + if (rd.Is(rn) && operand.IsPlainRegister()) { MacroAssembler::Add(rd, rn, operand); } else { MacroAssembler::Add(vixl32::DontCare, rd, rn, operand); @@ -124,7 +124,10 @@ class ArmVIXLMacroAssembler FINAL : public vixl32::MacroAssembler { // These interfaces try to use 16-bit T2 encoding of B instruction. void B(vixl32::Label* label); - void B(vixl32::Condition cond, vixl32::Label* label); + // For B(label), we always try to use Narrow encoding, because 16-bit T2 encoding supports + // jumping within 2KB range. For B(cond, label), because the supported branch range is 256 + // bytes; we use the far_target hint to try to use 16-bit T1 encoding for short range jumps. + void B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target = true); }; class ArmVIXLAssembler FINAL : public Assembler { diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 84280b9c98..04430b13f1 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -1829,10 +1829,10 @@ void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { // Far label. case Branch::kFarLabel: - offset += (offset & 0x8000) << 1; // Account for sign extension in addiu. + offset += (offset & 0x8000) << 1; // Account for sign extension in daddiu. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Auipc(AT, High16Bits(offset)); - Addiu(lhs, AT, Low16Bits(offset)); + Daddiu(lhs, AT, Low16Bits(offset)); break; // Far literals. case Branch::kFarLiteral: diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index f62822d73a..9d0d0fcc60 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -879,7 +879,7 @@ TEST_F(AssemblerMIPS64Test, LoadNearestFarLabelAddress) { std::string expected = "1:\n" "auipc $at, %hi(2f - 1b)\n" - "addiu $v0, $at, %lo(2f - 1b)\n" + + "daddiu $v0, $at, %lo(2f - 1b)\n" + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + "2:\n"; DriverStr(expected, "LoadNearestFarLabelAddress"); @@ -1070,7 +1070,7 @@ TEST_F(AssemblerMIPS64Test, FarLongLiteralAlignmentNop) { "ld $v0, %lo(3f - 1b)($at)\n" "2:\n" "auipc $at, %hi(3f - 2b)\n" - "addiu $v1, $at, %lo(3f - 2b)\n" + + "daddiu $v1, $at, %lo(3f - 2b)\n" + RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") + "nop\n" "3:\n" |