diff options
Diffstat (limited to 'compiler/optimizing')
40 files changed, 2571 insertions, 1557 deletions
diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc new file mode 100644 index 0000000000..fe423012ca --- /dev/null +++ b/compiler/optimizing/cha_guard_optimization.cc @@ -0,0 +1,253 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cha_guard_optimization.h" + +namespace art { + +// Note we can only do CHA guard elimination/motion in a single pass, since +// if a guard is not removed, another guard might be removed due to +// the existence of the first guard. The first guard should not be further +// removed in another pass. For example, due to further optimizations, +// a receiver of a guard might turn out to be a parameter value, or defined at +// a different site, which makes the guard removable as a result. However +// it's not safe to remove the guard in another pass since another guard might +// have been removed due to the existence of this guard. +// +// As a consequence, we decided not to rely on other passes to remove them +// (such as GVN or instruction simplifier). + +class CHAGuardVisitor : HGraphVisitor { + public: + explicit CHAGuardVisitor(HGraph* graph) + : HGraphVisitor(graph), + block_has_cha_guard_(GetGraph()->GetBlocks().size(), + 0, + graph->GetArena()->Adapter(kArenaAllocCHA)) { + number_of_guards_to_visit_ = GetGraph()->GetNumberOfCHAGuards(); + DCHECK_NE(number_of_guards_to_visit_, 0u); + // Will recount number of guards during guard optimization. + GetGraph()->SetNumberOfCHAGuards(0); + } + + void VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) OVERRIDE; + + void VisitBasicBlock(HBasicBlock* block) OVERRIDE; + + private: + void RemoveGuard(HShouldDeoptimizeFlag* flag); + // Return true if `flag` is removed. + bool OptimizeForParameter(HShouldDeoptimizeFlag* flag, HInstruction* receiver); + // Return true if `flag` is removed. + bool OptimizeWithDominatingGuard(HShouldDeoptimizeFlag* flag, HInstruction* receiver); + // Return true if `flag` is hoisted. + bool HoistGuard(HShouldDeoptimizeFlag* flag, HInstruction* receiver); + + // Record if each block has any CHA guard. It's updated during the + // reverse post order visit. Use int instead of bool since ArenaVector + // does not support bool. + ArenaVector<int> block_has_cha_guard_; + + // The iterator that's being used for this visitor. Need it to manually + // advance the iterator due to removing/moving more than one instruction. + HInstructionIterator* instruction_iterator_; + + // Used to short-circuit the pass when there is no more guards left to visit. + uint32_t number_of_guards_to_visit_; + + DISALLOW_COPY_AND_ASSIGN(CHAGuardVisitor); +}; + +void CHAGuardVisitor::VisitBasicBlock(HBasicBlock* block) { + if (number_of_guards_to_visit_ == 0) { + return; + } + // Skip phis, just iterate through instructions. + HInstructionIterator it(block->GetInstructions()); + instruction_iterator_ = ⁢ + for (; !it.Done(); it.Advance()) { + DCHECK(it.Current()->IsInBlock()); + it.Current()->Accept(this); + } +} + +void CHAGuardVisitor::RemoveGuard(HShouldDeoptimizeFlag* flag) { + HBasicBlock* block = flag->GetBlock(); + HInstruction* compare = flag->GetNext(); + DCHECK(compare->IsNotEqual()); + HInstruction* deopt = compare->GetNext(); + DCHECK(deopt->IsDeoptimize()); + + // Advance instruction iterator first before we remove the guard. + // We need to do it twice since we remove three instructions and the + // visitor is responsible for advancing it once. + instruction_iterator_->Advance(); + instruction_iterator_->Advance(); + block->RemoveInstruction(deopt); + block->RemoveInstruction(compare); + block->RemoveInstruction(flag); +} + +bool CHAGuardVisitor::OptimizeForParameter(HShouldDeoptimizeFlag* flag, + HInstruction* receiver) { + // If some compiled code is invalidated by CHA due to class loading, the + // compiled code will not be entered anymore. So the very fact that the + // compiled code is invoked guarantees that a parameter receiver conforms + // to all the CHA devirtualization assumptions made by the compiled code, + // since all parameter receivers pre-exist any (potential) invalidation of + // the compiled code. + // + // TODO: allow more cases such as a phi whose inputs are all parameters. + if (receiver->IsParameterValue()) { + RemoveGuard(flag); + return true; + } + return false; +} + +bool CHAGuardVisitor::OptimizeWithDominatingGuard(HShouldDeoptimizeFlag* flag, + HInstruction* receiver) { + // If there is another guard that dominates the current guard, and + // that guard is dominated by receiver's definition, then the current + // guard can be eliminated, since receiver must pre-exist that other + // guard, and passing that guard guarantees that receiver conforms to + // all the CHA devirtualization assumptions. + HBasicBlock* dominator = flag->GetBlock(); + HBasicBlock* receiver_def_block = receiver->GetBlock(); + + // Complexity of the following algorithm: + // We potentially need to traverse the full dominator chain to receiver_def_block, + // plus a (partial) linear search within one block for each guard. + // So the worst case for each guard is bounded by the size of the + // biggest block plus the depth of the dominating tree. + + while (dominator != receiver_def_block) { + if (block_has_cha_guard_[dominator->GetBlockId()] == 1) { + RemoveGuard(flag); + return true; + } + dominator = dominator->GetDominator(); + } + + // At this point dominator is the block where receiver is defined. + // We do a linear search within dominator to see if there is a guard after + // receiver's definition. + HInstruction* instruction; + if (dominator == flag->GetBlock()) { + // Flag and receiver are defined in the same block. Search backward from + // the current guard. + instruction = flag->GetPrevious(); + } else { + // Search backward from the last instruction of that dominator. + instruction = dominator->GetLastInstruction(); + } + while (instruction != receiver) { + if (instruction == nullptr) { + // receiver must be defined in this block, we didn't find it + // in the instruction list, so it must be a Phi. + DCHECK(receiver->IsPhi()); + break; + } + if (instruction->IsShouldDeoptimizeFlag()) { + RemoveGuard(flag); + return true; + } + instruction = instruction->GetPrevious(); + } + return false; +} + +bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag, + HInstruction* receiver) { + // If receiver is loop invariant, we can hoist the guard out of the + // loop since passing a guard before entering the loop guarantees that + // receiver conforms to all the CHA devirtualization assumptions. + // We only hoist guards out of the inner loop since that offers most of the + // benefit and it might help remove other guards in the inner loop. + HBasicBlock* block = flag->GetBlock(); + HLoopInformation* loop_info = block->GetLoopInformation(); + if (loop_info != nullptr && + !loop_info->IsIrreducible() && + loop_info->IsDefinedOutOfTheLoop(receiver)) { + HInstruction* compare = flag->GetNext(); + DCHECK(compare->IsNotEqual()); + HInstruction* deopt = compare->GetNext(); + DCHECK(deopt->IsDeoptimize()); + + // Advance instruction iterator first before we move the guard. + // We need to do it twice since we move three instructions and the + // visitor is responsible for advancing it once. + instruction_iterator_->Advance(); + instruction_iterator_->Advance(); + + HBasicBlock* pre_header = loop_info->GetPreHeader(); + flag->MoveBefore(pre_header->GetLastInstruction()); + compare->MoveBefore(pre_header->GetLastInstruction()); + + block->RemoveInstruction(deopt); + HInstruction* suspend = loop_info->GetSuspendCheck(); + // Need a new deoptimize instruction that copies the environment + // of the suspend instruction for the loop. + HDeoptimize* deoptimize = + new (GetGraph()->GetArena()) HDeoptimize(compare, suspend->GetDexPc()); + pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); + deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( + suspend->GetEnvironment(), loop_info->GetHeader()); + block_has_cha_guard_[pre_header->GetBlockId()] = 1; + GetGraph()->IncrementNumberOfCHAGuards(); + return true; + } + return false; +} + +void CHAGuardVisitor::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { + number_of_guards_to_visit_--; + HInstruction* receiver = flag->InputAt(0); + // Don't need the receiver anymore. + flag->RemoveInputAt(0); + if (receiver->IsNullCheck()) { + receiver = receiver->InputAt(0); + } + + if (OptimizeForParameter(flag, receiver)) { + DCHECK(!flag->IsInBlock()); + return; + } + if (OptimizeWithDominatingGuard(flag, receiver)) { + DCHECK(!flag->IsInBlock()); + return; + } + if (HoistGuard(flag, receiver)) { + DCHECK(flag->IsInBlock()); + return; + } + + // Need to keep the CHA guard in place. + block_has_cha_guard_[flag->GetBlock()->GetBlockId()] = 1; + GetGraph()->IncrementNumberOfCHAGuards(); +} + +void CHAGuardOptimization::Run() { + if (graph_->GetNumberOfCHAGuards() == 0) { + return; + } + CHAGuardVisitor visitor(graph_); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + visitor.VisitBasicBlock(block); + } +} + +} // namespace art diff --git a/compiler/optimizing/cha_guard_optimization.h b/compiler/optimizing/cha_guard_optimization.h new file mode 100644 index 0000000000..ba0cdb81fd --- /dev/null +++ b/compiler/optimizing/cha_guard_optimization.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_ +#define ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_ + +#include "optimization.h" + +namespace art { + +/** + * Optimize CHA guards by removing/moving them. + */ +class CHAGuardOptimization : public HOptimization { + public: + explicit CHAGuardOptimization(HGraph* graph) + : HOptimization(graph, kCHAGuardOptimizationPassName) {} + + void Run() OVERRIDE; + + static constexpr const char* kCHAGuardOptimizationPassName = "cha_guard_optimization"; + + private: + DISALLOW_COPY_AND_ASSIGN(CHAGuardOptimization); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_ diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 402eeee65f..f00648f570 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -1378,28 +1378,21 @@ uint32_t CodeGenerator::GetReferenceDisableFlagOffset() const { void CodeGenerator::EmitJitRoots(uint8_t* code, Handle<mirror::ObjectArray<mirror::Object>> roots, - const uint8_t* roots_data, - Handle<mirror::DexCache> outer_dex_cache) { + const uint8_t* roots_data) { DCHECK_EQ(static_cast<size_t>(roots->GetLength()), GetNumberOfJitRoots()); - StackHandleScope<1> hs(Thread::Current()); - MutableHandle<mirror::DexCache> h_dex_cache(hs.NewHandle<mirror::DexCache>(nullptr)); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); size_t index = 0; for (auto& entry : jit_string_roots_) { - const DexFile& entry_dex_file = *entry.first.dex_file; - // Avoid the expensive FindDexCache call by checking if the string is - // in the compiled method's dex file. - h_dex_cache.Assign(IsSameDexFile(*outer_dex_cache->GetDexFile(), entry_dex_file) - ? outer_dex_cache.Get() - : class_linker->FindDexCache(hs.Self(), entry_dex_file)); - mirror::String* string = class_linker->LookupString( - entry_dex_file, entry.first.string_index, h_dex_cache); - DCHECK(string != nullptr) << "JIT roots require strings to have been loaded"; + // Update the `roots` with the string, and replace the address temporarily + // stored to the index in the table. + uint64_t address = entry.second; + roots->Set(index, reinterpret_cast<StackReference<mirror::String>*>(address)->AsMirrorPtr()); + DCHECK(roots->Get(index) != nullptr); + entry.second = index; // Ensure the string is strongly interned. This is a requirement on how the JIT // handles strings. b/32995596 - class_linker->GetInternTable()->InternStrong(string); - roots->Set(index, string); - entry.second = index; + class_linker->GetInternTable()->InternStrong( + reinterpret_cast<mirror::String*>(roots->Get(index))); ++index; } for (auto& entry : jit_class_roots_) { @@ -1407,6 +1400,7 @@ void CodeGenerator::EmitJitRoots(uint8_t* code, // stored to the index in the table. uint64_t address = entry.second; roots->Set(index, reinterpret_cast<StackReference<mirror::Class>*>(address)->AsMirrorPtr()); + DCHECK(roots->Get(index) != nullptr); entry.second = index; ++index; } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 2e2c3c00af..6366b9838f 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -351,8 +351,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // Also emits literal patches. void EmitJitRoots(uint8_t* code, Handle<mirror::ObjectArray<mirror::Object>> roots, - const uint8_t* roots_data, - Handle<mirror::DexCache> outer_dex_cache) + const uint8_t* roots_data) REQUIRES_SHARED(Locks::mutator_lock_); bool IsLeafMethod() const { @@ -713,9 +712,9 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { const ArenaVector<HBasicBlock*>* block_order_; // Maps a StringReference (dex_file, string_index) to the index in the literal table. - // Entries are intially added with a 0 index, and `EmitJitRoots` will compute all the - // indices. - ArenaSafeMap<StringReference, uint32_t, StringReferenceValueComparator> jit_string_roots_; + // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots` + // will compute all the indices. + ArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_; // Maps a ClassReference (dex_file, type_index) to the index in the literal table. // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots` diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 8104613d3f..3009103ac7 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1201,11 +1201,6 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - method_patches_(MethodReferenceComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - call_patches_(MethodReferenceComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -5942,7 +5937,9 @@ void LocationsBuilderARM::VisitLoadString(HLoadString* load) { } } -void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { +// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not +// move. +void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { LocationSummary* locations = load->GetLocations(); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); @@ -5967,8 +5964,9 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { - DCHECK_NE(load->GetAddress(), 0u); - uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); + uint32_t address = dchecked_integral_cast<uint32_t>( + reinterpret_cast<uintptr_t>(load->GetString().Get())); + DCHECK_NE(address, 0u); __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address)); return; // No dex cache slow path. } @@ -5992,7 +5990,8 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { } case HLoadString::LoadKind::kJitTableAddress: { __ LoadLiteral(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(), - load->GetStringIndex())); + load->GetStringIndex(), + load->GetString())); // /* GcRoot<mirror::String> */ out = *out GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption); return; @@ -7139,7 +7138,7 @@ void CodeGeneratorARM::GenerateReadBarrierForRootSlow(HInstruction* instruction, HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) { + HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; // We disable pc-relative load when there is an irreducible loop, as the optimization // is incompatible with it. @@ -7151,24 +7150,6 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOr dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; } - if (dispatch_info.code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) { - const DexFile& outer_dex_file = GetGraph()->GetDexFile(); - if (&outer_dex_file != invoke->GetTargetMethod().dex_file) { - // Calls across dex files are more likely to exceed the available BL range, - // so use absolute patch with fixup if available and kCallArtMethod otherwise. - HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = - (desired_dispatch_info.method_load_kind == - HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) - ? HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup - : HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; - return HInvokeStaticOrDirect::DispatchInfo { - dispatch_info.method_load_kind, - code_ptr_location, - dispatch_info.method_load_data, - 0u - }; - } - } return dispatch_info; } @@ -7199,20 +7180,6 @@ Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr } void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { - // For better instruction scheduling we load the direct code pointer before the method pointer. - switch (invoke->GetCodePtrLocation()) { - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: - // LR = code address from literal pool with link-time patch. - __ LoadLiteral(LR, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod())); - break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // LR = invoke->GetDirectCodePtr(); - __ LoadImmediate(LR, invoke->GetDirectCodePtr()); - break; - default: - break; - } - Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -7228,10 +7195,6 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: - __ LoadLiteral(temp.AsRegister<Register>(), - DeduplicateMethodAddressLiteral(invoke->GetTargetMethod())); - break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { HArmDexCacheArraysBase* base = invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase(); @@ -7270,19 +7233,6 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: __ bl(GetFrameEntryLabel()); break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: - relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, - invoke->GetTargetMethod().dex_method_index); - __ BindTrackedLabel(&relative_call_patches_.back().label); - // Arbitrarily branch to the BL itself, override at link time. - __ bl(&relative_call_patches_.back().label); - break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // LR prepared above for better instruction scheduling. - // LR() - __ blx(LR); - break; case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // LR = callee_method->entry_point_from_quick_compiled_code_ __ LoadFromOffset( @@ -7371,8 +7321,10 @@ Literal* CodeGeneratorARM::DeduplicateBootImageAddressLiteral(uint32_t address) } Literal* CodeGeneratorARM::DeduplicateJitStringLiteral(const DexFile& dex_file, - dex::StringIndex string_index) { - jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), /* placeholder */ 0u); + dex::StringIndex string_index, + Handle<mirror::String> handle) { + jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), + reinterpret_cast64<uint64_t>(handle.GetReference())); return jit_string_patches_.GetOrCreate( StringReference(&dex_file, string_index), [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); @@ -7410,9 +7362,6 @@ inline void CodeGeneratorARM::EmitPcRelativeLinkerPatches( void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - method_patches_.size() + - call_patches_.size() + - relative_call_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + boot_image_string_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + @@ -7420,29 +7369,6 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + boot_image_address_patches_.size(); linker_patches->reserve(size); - for (const auto& entry : method_patches_) { - const MethodReference& target_method = entry.first; - Literal* literal = entry.second; - DCHECK(literal->GetLabel()->IsBound()); - uint32_t literal_offset = literal->GetLabel()->Position(); - linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, - target_method.dex_file, - target_method.dex_method_index)); - } - for (const auto& entry : call_patches_) { - const MethodReference& target_method = entry.first; - Literal* literal = entry.second; - DCHECK(literal->GetLabel()->IsBound()); - uint32_t literal_offset = literal->GetLabel()->Position(); - linker_patches->push_back(LinkerPatch::CodePatch(literal_offset, - target_method.dex_file, - target_method.dex_method_index)); - } - for (const PatchInfo<Label>& info : relative_call_patches_) { - uint32_t literal_offset = info.label.Position(); - linker_patches->push_back( - LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index)); - } EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); for (const auto& entry : boot_image_string_patches_) { @@ -7494,14 +7420,6 @@ Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_metho [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); } -Literal* CodeGeneratorARM::DeduplicateMethodAddressLiteral(MethodReference target_method) { - return DeduplicateMethodLiteral(target_method, &method_patches_); -} - -Literal* CodeGeneratorARM::DeduplicateMethodCodeLiteral(MethodReference target_method) { - return DeduplicateMethodLiteral(target_method, &call_patches_); -} - void LocationsBuilderARM::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 605169deed..d5968e0764 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -489,7 +489,9 @@ class CodeGeneratorARM : public CodeGenerator { dex::StringIndex string_index); Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index); Literal* DeduplicateBootImageAddressLiteral(uint32_t address); - Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, dex::StringIndex string_index); + Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, + dex::StringIndex string_index, + Handle<mirror::String> handle); Literal* DeduplicateJitClassLiteral(const DexFile& dex_file, dex::TypeIndex type_index, uint64_t address); @@ -607,8 +609,6 @@ class CodeGeneratorARM : public CodeGenerator { Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); - Literal* DeduplicateMethodAddressLiteral(MethodReference target_method); - Literal* DeduplicateMethodCodeLiteral(MethodReference target_method); PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches); @@ -627,12 +627,6 @@ class CodeGeneratorARM : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // Method patch info, map MethodReference to a literal for method address and method code. - MethodToLiteralMap method_patches_; - MethodToLiteralMap call_patches_; - // Relative call patch info. - // Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<PatchInfo<Label>> relative_call_patches_; // PC-relative patch info for each HArmDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // Deduplication map for boot string literals for kBootImageLinkTimeAddress. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 5cff303e2e..4b6a9bed61 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1147,11 +1147,6 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - method_patches_(MethodReferenceComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - call_patches_(MethodReferenceComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -1538,8 +1533,17 @@ void CodeGeneratorARM64::MoveLocation(Location destination, DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot()); DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot()); UseScratchRegisterScope temps(GetVIXLAssembler()); - // There is generally less pressure on FP registers. - FPRegister temp = destination.IsDoubleStackSlot() ? temps.AcquireD() : temps.AcquireS(); + // Use any scratch register (a core or a floating-point one) + // from VIXL scratch register pools as a temporary. + // + // We used to only use the FP scratch register pool, but in some + // rare cases the only register from this pool (D31) would + // already be used (e.g. within a ParallelMove instruction, when + // a move is blocked by a another move requiring a scratch FP + // register, which would reserve D31). To prevent this issue, we + // ask for a scratch register of any type (core or FP). + CPURegister temp = + temps.AcquireCPURegisterOfSize(destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize); __ Ldr(temp, StackOperandFrom(source)); __ Str(temp, StackOperandFrom(destination)); } @@ -3971,23 +3975,6 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStatic } void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { - // For better instruction scheduling we load the direct code pointer before the method pointer. - bool direct_code_loaded = false; - switch (invoke->GetCodePtrLocation()) { - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: - // LR = code address from literal pool with link-time patch. - __ Ldr(lr, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod())); - direct_code_loaded = true; - break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // LR = invoke->GetDirectCodePtr(); - __ Ldr(lr, DeduplicateUint64Literal(invoke->GetDirectCodePtr())); - direct_code_loaded = true; - break; - default: - break; - } - // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention. Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { @@ -4005,11 +3992,6 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok // Load method address from literal pool. __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: - // Load method address from literal pool with a link-time patch. - __ Ldr(XRegisterFrom(temp), - DeduplicateMethodAddressLiteral(invoke->GetTargetMethod())); - break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { // Add ADRP with its PC-relative DexCache access patch. const DexFile& dex_file = invoke->GetDexFile(); @@ -4051,23 +4033,6 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: __ Bl(&frame_entry_label_); break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { - relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, - invoke->GetTargetMethod().dex_method_index); - vixl::aarch64::Label* label = &relative_call_patches_.back().label; - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(label); - // Branch and link to itself. This will be overriden at link time. - __ bl(static_cast<int64_t>(0)); - break; - } - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // LR prepared above for better instruction scheduling. - DCHECK(direct_code_loaded); - // lr() - __ Blr(lr); - break; case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // LR = callee_method->entry_point_from_quick_compiled_code_; __ Ldr(lr, MemOperand( @@ -4172,8 +4137,9 @@ vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddres } vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral( - const DexFile& dex_file, dex::StringIndex string_index) { - jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), /* placeholder */ 0u); + const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) { + jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), + reinterpret_cast64<uint64_t>(handle.GetReference())); return jit_string_patches_.GetOrCreate( StringReference(&dex_file, string_index), [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); }); @@ -4229,9 +4195,6 @@ inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - method_patches_.size() + - call_patches_.size() + - relative_call_patches_.size() + pc_relative_dex_cache_patches_.size() + boot_image_string_patches_.size() + pc_relative_string_patches_.size() + @@ -4239,24 +4202,6 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc pc_relative_type_patches_.size() + boot_image_address_patches_.size(); linker_patches->reserve(size); - for (const auto& entry : method_patches_) { - const MethodReference& target_method = entry.first; - vixl::aarch64::Literal<uint64_t>* literal = entry.second; - linker_patches->push_back(LinkerPatch::MethodPatch(literal->GetOffset(), - target_method.dex_file, - target_method.dex_method_index)); - } - for (const auto& entry : call_patches_) { - const MethodReference& target_method = entry.first; - vixl::aarch64::Literal<uint64_t>* literal = entry.second; - linker_patches->push_back(LinkerPatch::CodePatch(literal->GetOffset(), - target_method.dex_file, - target_method.dex_method_index)); - } - for (const PatchInfo<vixl::aarch64::Label>& info : relative_call_patches_) { - linker_patches->push_back( - LinkerPatch::RelativeCodePatch(info.label.GetLocation(), &info.dex_file, info.index)); - } for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(), &info.target_dex_file, @@ -4314,17 +4259,6 @@ vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodLiteral( [this]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(/* placeholder */ 0u); }); } -vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodAddressLiteral( - MethodReference target_method) { - return DeduplicateMethodLiteral(target_method, &method_patches_); -} - -vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateMethodCodeLiteral( - MethodReference target_method) { - return DeduplicateMethodLiteral(target_method, &call_patches_); -} - - void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { // Explicit clinit checks triggered by static invokes must have been pruned by // art::PrepareForRegisterAllocation. @@ -4594,7 +4528,9 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { } } -void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { +// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not +// move. +void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { Register out = OutputRegister(load); Location out_loc = load->GetLocations()->Out(); @@ -4617,8 +4553,10 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { - DCHECK(load->GetAddress() != 0u && IsUint<32>(load->GetAddress())); - __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(load->GetAddress())); + uint32_t address = dchecked_integral_cast<uint32_t>( + reinterpret_cast<uintptr_t>(load->GetString().Get())); + DCHECK_NE(address, 0u); + __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); return; // No dex cache slow path. } case HLoadString::LoadKind::kBssEntry: { @@ -4649,7 +4587,8 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { } case HLoadString::LoadKind::kJitTableAddress: { __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(), - load->GetStringIndex())); + load->GetStringIndex(), + load->GetString())); GenerateGcRootFieldLoad(load, out_loc, out.X(), diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 85b6f9faf5..d6a5f9d1fa 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -567,7 +567,8 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::TypeIndex type_index); vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address); vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file, - dex::StringIndex string_index); + dex::StringIndex string_index, + Handle<mirror::String> handle); vixl::aarch64::Literal<uint32_t>* DeduplicateJitClassLiteral(const DexFile& dex_file, dex::TypeIndex string_index, uint64_t address); @@ -693,8 +694,6 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value); vixl::aarch64::Literal<uint64_t>* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); - vixl::aarch64::Literal<uint64_t>* DeduplicateMethodAddressLiteral(MethodReference target_method); - vixl::aarch64::Literal<uint64_t>* DeduplicateMethodCodeLiteral(MethodReference target_method); // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays // and boot image strings/types. The only difference is the interpretation of the @@ -737,12 +736,6 @@ class CodeGeneratorARM64 : public CodeGenerator { Uint32ToLiteralMap uint32_literals_; // Deduplication map for 64-bit literals, used for non-patchable method address or method code. Uint64ToLiteralMap uint64_literals_; - // Method patch info, map MethodReference to a literal for method address and method code. - MethodToLiteralMap method_patches_; - MethodToLiteralMap call_patches_; - // Relative call patch info. - // Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<PatchInfo<vixl::aarch64::Label>> relative_call_patches_; // PC-relative DexCache access info. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // Deduplication map for boot string literals for kBootImageLinkTimeAddress. diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 1c5aec01c6..b1f6d599ab 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -805,7 +805,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL // as-is. vixl32::Label done; __ Cmp(temp1_, ref_reg); - __ B(eq, &done); + __ B(eq, &done, /* far_target */ false); // Update the the holder's field atomically. This may fail if // mutator updates before us, but it's OK. This is achieved @@ -857,11 +857,11 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL __ clrex(ne); } - __ B(ne, &exit_loop); + __ B(ne, &exit_loop, /* far_target */ false); __ Strex(tmp, value, MemOperand(tmp_ptr)); __ Cmp(tmp, 1); - __ B(eq, &loop_head); + __ B(eq, &loop_head, /* far_target */ false); __ Bind(&exit_loop); @@ -1243,10 +1243,21 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, move_resolver_(graph->GetArena(), this), assembler_(graph->GetArena()), isa_features_(isa_features), - relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + uint32_literals_(std::less<uint32_t>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(StringReferenceValueComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + boot_image_type_patches_(TypeReferenceValueComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_address_patches_(std::less<uint32_t>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + jit_string_patches_(StringReferenceValueComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + jit_class_patches_(TypeReferenceValueComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Always save the LR register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(LR)); // Give d14 and d15 as scratch registers to VIXL. @@ -3626,7 +3637,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { __ And(shift_right, RegisterFrom(rhs), 0x1F); __ Lsrs(shift_left, RegisterFrom(rhs), 6); __ Rsb(LeaveFlags, shift_left, shift_right, Operand::From(kArmBitsPerWord)); - __ B(cc, &shift_by_32_plus_shift_right); + __ B(cc, &shift_by_32_plus_shift_right, /* far_target */ false); // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right). // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right). @@ -4098,8 +4109,8 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { } case Primitive::kPrimLong: { __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare. - __ B(lt, &less); - __ B(gt, &greater); + __ B(lt, &less, /* far_target */ false); + __ B(gt, &greater, /* far_target */ false); // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags. __ Mov(out, 0); __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare. @@ -4120,8 +4131,8 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { UNREACHABLE(); } - __ B(eq, &done); - __ B(less_cond, &less); + __ B(eq, &done, /* far_target */ false); + __ B(less_cond, &less, /* far_target */ false); __ Bind(&greater); __ Mov(out, 1); @@ -4415,7 +4426,7 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, locations->AddTemp(Location::RequiresRegister()); } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier. + // path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier. locations->AddTemp(Location::RequiresRegister()); } } @@ -4877,7 +4888,7 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. + // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier. // Also need for String compression feature. if ((object_array_get_with_read_barrier && kUseBakerReadBarrier) || (mirror::kUseStringCompression && instruction->IsStringCharAt())) { @@ -4918,7 +4929,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); - __ B(cs, &uncompressed_load); + __ B(cs, &uncompressed_load, /* far_target */ false); GetAssembler()->LoadFromOffset(kLoadUnsignedByte, RegisterFrom(out_loc), obj, @@ -4957,7 +4968,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); - __ B(cs, &uncompressed_load); + __ B(cs, &uncompressed_load, /* far_target */ false); __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0)); __ B(&done); __ Bind(&uncompressed_load); @@ -5256,7 +5267,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { if (instruction->StaticTypeOfArrayIsObjectArray()) { vixl32::Label do_put; - __ B(eq, &do_put); + __ B(eq, &do_put, /* far_target */ false); // If heap poisoning is enabled, the `temp1` reference has // not been unpoisoned yet; unpoison it now. GetAssembler()->MaybeUnpoisonHeapReference(temp1); @@ -5670,13 +5681,13 @@ void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) { void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) { // TODO(VIXL32): Double check the performance of this implementation. UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); - vixl32::SRegister temp_s = temps.AcquireS(); + vixl32::SRegister temp_1 = temps.AcquireS(); + vixl32::SRegister temp_2 = temps.AcquireS(); - __ Ldr(temp, MemOperand(sp, mem1)); - __ Vldr(temp_s, MemOperand(sp, mem2)); - __ Str(temp, MemOperand(sp, mem2)); - __ Vstr(temp_s, MemOperand(sp, mem1)); + __ Vldr(temp_1, MemOperand(sp, mem1)); + __ Vldr(temp_2, MemOperand(sp, mem2)); + __ Vstr(temp_1, MemOperand(sp, mem2)); + __ Vstr(temp_2, MemOperand(sp, mem1)); } void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) { @@ -5772,17 +5783,15 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimeAddress: - // TODO(VIXL): Enable it back when literal pools are fixed in VIXL. - return HLoadClass::LoadKind::kDexCacheViaMethod; + DCHECK(!GetCompilerOptions().GetCompilePic()); + break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: DCHECK(GetCompilerOptions().GetCompilePic()); break; case HLoadClass::LoadKind::kBootImageAddress: - // TODO(VIXL): Enable it back when literal pools are fixed in VIXL. - return HLoadClass::LoadKind::kDexCacheViaMethod; + break; case HLoadClass::LoadKind::kJitTableAddress: - // TODO(VIXL): Enable it back when literal pools are fixed in VIXL. - return HLoadClass::LoadKind::kDexCacheViaMethod; + break; case HLoadClass::LoadKind::kDexCachePcRelative: DCHECK(!Runtime::Current()->UseJitCompilation()); // We disable pc-relative load when there is an irreducible loop, as the optimization @@ -5858,7 +5867,9 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) { break; } case HLoadClass::LoadKind::kBootImageLinkTimeAddress: { - TODO_VIXL32(FATAL); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), + cls->GetTypeIndex())); break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { @@ -5869,11 +5880,18 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) { break; } case HLoadClass::LoadKind::kBootImageAddress: { - TODO_VIXL32(FATAL); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + DCHECK_NE(cls->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); + __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); break; } case HLoadClass::LoadKind::kJitTableAddress: { - TODO_VIXL32(FATAL); + __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(), + cls->GetTypeIndex(), + cls->GetAddress())); + // /* GcRoot<mirror::Class> */ out = *out + GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption); break; } case HLoadClass::LoadKind::kDexCachePcRelative: { @@ -5958,21 +5976,19 @@ HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: - // TODO(VIXL): Implement missing optimization. - return HLoadString::LoadKind::kDexCacheViaMethod; + DCHECK(!GetCompilerOptions().GetCompilePic()); + break; case HLoadString::LoadKind::kBootImageLinkTimePcRelative: DCHECK(GetCompilerOptions().GetCompilePic()); break; case HLoadString::LoadKind::kBootImageAddress: - // TODO(VIXL): Implement missing optimization. - return HLoadString::LoadKind::kDexCacheViaMethod; + break; case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); - // TODO(VIXL): Implement missing optimization. - return HLoadString::LoadKind::kDexCacheViaMethod; + break; case HLoadString::LoadKind::kDexCacheViaMethod: break; } @@ -6006,7 +6022,9 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { } } -void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) { +// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not +// move. +void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { LocationSummary* locations = load->GetLocations(); Location out_loc = locations->Out(); vixl32::Register out = OutputRegister(load); @@ -6014,8 +6032,9 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) { switch (load_kind) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: { - TODO_VIXL32(FATAL); - break; + __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(), + load->GetStringIndex())); + return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); @@ -6025,8 +6044,11 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) { return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { - TODO_VIXL32(FATAL); - break; + uint32_t address = dchecked_integral_cast<uint32_t>( + reinterpret_cast<uintptr_t>(load->GetString().Get())); + DCHECK_NE(address, 0u); + __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); + return; // No dex cache slow path. } case HLoadString::LoadKind::kBssEntry: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); @@ -6043,8 +6065,12 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) { return; } case HLoadString::LoadKind::kJitTableAddress: { - TODO_VIXL32(FATAL); - break; + __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(), + load->GetStringIndex(), + load->GetString())); + // /* GcRoot<mirror::String> */ out = *out + GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption); + return; } default: break; @@ -6186,7 +6212,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) kCompilerReadBarrierOption); __ Cmp(out, cls); // Classes must be equal for the instanceof to succeed. - __ B(ne, &zero); + __ B(ne, &zero, /* far_target */ false); __ Mov(out, 1); __ B(&done); break; @@ -6213,7 +6239,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done, /* far_target */ false); __ Cmp(out, cls); - __ B(ne, &loop); + __ B(ne, &loop, /* far_target */ false); __ Mov(out, 1); if (zero.IsReferenced()) { __ B(&done); @@ -6233,7 +6259,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) vixl32::Label loop, success; __ Bind(&loop); __ Cmp(out, cls); - __ B(eq, &success); + __ B(eq, &success, /* far_target */ false); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, @@ -6262,7 +6288,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) // Do an exact check. vixl32::Label exact_check; __ Cmp(out, cls); - __ B(eq, &exact_check); + __ B(eq, &exact_check, /* far_target */ false); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -6464,7 +6490,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { // Otherwise, compare the classes. __ Cmp(temp, cls); - __ B(ne, &loop); + __ B(ne, &loop, /* far_target */ false); break; } @@ -6481,7 +6507,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { vixl32::Label loop; __ Bind(&loop); __ Cmp(temp, cls); - __ B(eq, &done); + __ B(eq, &done, /* far_target */ false); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, @@ -6509,7 +6535,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { // Do an exact check. __ Cmp(temp, cls); - __ B(eq, &done); + __ B(eq, &done, /* far_target */ false); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ @@ -6573,7 +6599,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2); // Compare the classes and continue the loop if they do not match. __ Cmp(cls, RegisterFrom(maybe_temp3_loc)); - __ B(ne, &start_loop); + __ B(ne, &start_loop, /* far_target */ false); break; } } @@ -7206,20 +7232,7 @@ void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruct // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) { - // TODO(VIXL): Implement optimized code paths. - if (desired_dispatch_info.method_load_kind == - HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup || - desired_dispatch_info.code_ptr_location == - HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup) { - return { - HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u, - 0u - }; - } - + HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; // We disable pc-relative load when there is an irreducible loop, as the optimization // is incompatible with it. @@ -7231,24 +7244,6 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStat dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; } - if (dispatch_info.code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) { - const DexFile& outer_dex_file = GetGraph()->GetDexFile(); - if (&outer_dex_file != invoke->GetTargetMethod().dex_file) { - // Calls across dex files are more likely to exceed the available BL range, - // so use absolute patch with fixup if available and kCallArtMethod otherwise. - HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = - (desired_dispatch_info.method_load_kind == - HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) - ? HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup - : HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; - return HInvokeStaticOrDirect::DispatchInfo { - dispatch_info.method_load_kind, - code_ptr_location, - dispatch_info.method_load_data, - 0u - }; - } - } return dispatch_info; } @@ -7280,20 +7275,6 @@ vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter( void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( HInvokeStaticOrDirect* invoke, Location temp) { - // For better instruction scheduling we load the direct code pointer before the method pointer. - switch (invoke->GetCodePtrLocation()) { - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: - // LR = code address from literal pool with link-time patch. - TODO_VIXL32(FATAL); - break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // LR = invoke->GetDirectCodePtr(); - __ Mov(lr, Operand::From(invoke->GetDirectCodePtr())); - break; - default: - break; - } - Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -7309,9 +7290,6 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: - TODO_VIXL32(FATAL); - break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { HArmDexCacheArraysBase* base = invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase(); @@ -7351,30 +7329,6 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: __ Bl(GetFrameEntryLabel()); break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: - relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, - invoke->GetTargetMethod().dex_method_index); - { - ExactAssemblyScope aas(GetVIXLAssembler(), - vixl32::kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ bind(&relative_call_patches_.back().label); - // Arbitrarily branch to the BL itself, override at link time. - __ bl(&relative_call_patches_.back().label); - } - break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // LR prepared above for better instruction scheduling. - // LR() - { - // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. - ExactAssemblyScope aas(GetVIXLAssembler(), - vixl32::k16BitT32InstructionSizeInBytes, - CodeBufferCheckScope::kExactSize); - __ blx(lr); - } - break; case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // LR = callee_method->entry_point_from_quick_compiled_code_ GetAssembler()->LoadFromOffset( @@ -7464,6 +7418,60 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa return &patches->back(); } +VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageStringLiteral( + const DexFile& dex_file, + dex::StringIndex string_index) { + return boot_image_string_patches_.GetOrCreate( + StringReference(&dex_file, string_index), + [this]() { + return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); + }); +} + +VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageTypeLiteral( + const DexFile& dex_file, + dex::TypeIndex type_index) { + return boot_image_type_patches_.GetOrCreate( + TypeReference(&dex_file, type_index), + [this]() { + return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); + }); +} + +VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) { + bool needs_patch = GetCompilerOptions().GetIncludePatchInformation(); + Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_; + return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map); +} + +VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateDexCacheAddressLiteral(uint32_t address) { + return DeduplicateUint32Literal(address, &uint32_literals_); +} + +VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral( + const DexFile& dex_file, + dex::StringIndex string_index, + Handle<mirror::String> handle) { + jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), + reinterpret_cast64<uint64_t>(handle.GetReference())); + return jit_string_patches_.GetOrCreate( + StringReference(&dex_file, string_index), + [this]() { + return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); + }); +} + +VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFile& dex_file, + dex::TypeIndex type_index, + uint64_t address) { + jit_class_roots_.Overwrite(TypeReference(&dex_file, type_index), address); + return jit_class_patches_.GetOrCreate( + TypeReference(&dex_file, type_index), + [this]() { + return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); + }); +} + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, @@ -7487,18 +7495,24 @@ inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - relative_call_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + + boot_image_string_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size(); + boot_image_type_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + + boot_image_address_patches_.size(); linker_patches->reserve(size); - for (const PatchInfo<vixl32::Label>& info : relative_call_patches_) { - uint32_t literal_offset = info.label.GetLocation(); - linker_patches->push_back( - LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index)); - } EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); + for (const auto& entry : boot_image_string_patches_) { + const StringReference& target_string = entry.first; + VIXLUInt32Literal* literal = entry.second; + DCHECK(literal->IsBound()); + uint32_t literal_offset = literal->GetLocation(); + linker_patches->push_back(LinkerPatch::StringPatch(literal_offset, + target_string.dex_file, + target_string.string_index.index_)); + } if (!GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, linker_patches); @@ -7506,8 +7520,44 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, linker_patches); } + for (const auto& entry : boot_image_type_patches_) { + const TypeReference& target_type = entry.first; + VIXLUInt32Literal* literal = entry.second; + DCHECK(literal->IsBound()); + uint32_t literal_offset = literal->GetLocation(); + linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, + target_type.dex_file, + target_type.type_index.index_)); + } EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, linker_patches); + for (const auto& entry : boot_image_address_patches_) { + DCHECK(GetCompilerOptions().GetIncludePatchInformation()); + VIXLUInt32Literal* literal = entry.second; + DCHECK(literal->IsBound()); + uint32_t literal_offset = literal->GetLocation(); + linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset)); + } +} + +VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal( + uint32_t value, + Uint32ToLiteralMap* map) { + return map->GetOrCreate( + value, + [this, value]() { + return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ value); + }); +} + +VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateMethodLiteral( + MethodReference target_method, + MethodToLiteralMap* map) { + return map->GetOrCreate( + target_method, + [this]() { + return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); + }); } void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { @@ -7703,6 +7753,31 @@ void InstructionCodeGeneratorARMVIXL::VisitClassTableGet(HClassTableGet* instruc } } +static void PatchJitRootUse(uint8_t* code, + const uint8_t* roots_data, + VIXLUInt32Literal* literal, + uint64_t index_in_table) { + DCHECK(literal->IsBound()); + uint32_t literal_offset = literal->GetLocation(); + uintptr_t address = + reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); + uint8_t* data = code + literal_offset; + reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address); +} + +void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { + for (const auto& entry : jit_string_patches_) { + const auto& it = jit_string_roots_.find(entry.first); + DCHECK(it != jit_string_roots_.end()); + PatchJitRootUse(code, roots_data, entry.second, it->second); + } + for (const auto& entry : jit_class_patches_) { + const auto& it = jit_class_roots_.find(entry.first); + DCHECK(it != jit_class_roots_.end()); + PatchJitRootUse(code, roots_data, entry.second, it->second); + } +} + void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder( CodeGeneratorARMVIXL::PcRelativePatchInfo* labels, vixl32::Register out) { diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 93ea601ed8..200a463c75 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -107,20 +107,20 @@ static const size_t kRuntimeParameterFpuRegistersLengthVIXL = arraysize(kRuntimeParameterFpuRegistersVIXL); class LoadClassSlowPathARMVIXL; - class CodeGeneratorARMVIXL; +using VIXLInt32Literal = vixl::aarch32::Literal<int32_t>; +using VIXLUInt32Literal = vixl::aarch32::Literal<uint32_t>; + class JumpTableARMVIXL : public DeletableArenaObject<kArenaAllocSwitchTable> { public: - typedef vixl::aarch32::Literal<int32_t> IntLiteral; - explicit JumpTableARMVIXL(HPackedSwitch* switch_instr) : switch_instr_(switch_instr), table_start_(), bb_addresses_(switch_instr->GetArena()->Adapter(kArenaAllocCodeGenerator)) { uint32_t num_entries = switch_instr_->GetNumEntries(); for (uint32_t i = 0; i < num_entries; i++) { - IntLiteral *lit = new IntLiteral(0, vixl32::RawLiteral::kManuallyPlaced); + VIXLInt32Literal *lit = new VIXLInt32Literal(0, vixl32::RawLiteral::kManuallyPlaced); bb_addresses_.emplace_back(lit); } } @@ -133,7 +133,7 @@ class JumpTableARMVIXL : public DeletableArenaObject<kArenaAllocSwitchTable> { private: HPackedSwitch* const switch_instr_; vixl::aarch32::Label table_start_; - ArenaVector<std::unique_ptr<IntLiteral>> bb_addresses_; + ArenaVector<std::unique_ptr<VIXLInt32Literal>> bb_addresses_; DISALLOW_COPY_AND_ASSIGN(JumpTableARMVIXL); }; @@ -566,8 +566,23 @@ class CodeGeneratorARMVIXL : public CodeGenerator { PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); + VIXLUInt32Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, + dex::StringIndex string_index); + VIXLUInt32Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, + dex::TypeIndex type_index); + VIXLUInt32Literal* DeduplicateBootImageAddressLiteral(uint32_t address); + VIXLUInt32Literal* DeduplicateDexCacheAddressLiteral(uint32_t address); + VIXLUInt32Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, + dex::StringIndex string_index, + Handle<mirror::String> handle); + VIXLUInt32Literal* DeduplicateJitClassLiteral(const DexFile& dex_file, + dex::TypeIndex type_index, + uint64_t address); + void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -673,10 +688,19 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, vixl::aarch32::Register temp); - using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch32::Literal<uint32_t>*>; + using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, VIXLUInt32Literal*>; using MethodToLiteralMap = - ArenaSafeMap<MethodReference, vixl::aarch32::Literal<uint32_t>*, MethodReferenceComparator>; - + ArenaSafeMap<MethodReference, VIXLUInt32Literal*, MethodReferenceComparator>; + using StringToLiteralMap = ArenaSafeMap<StringReference, + VIXLUInt32Literal*, + StringReferenceValueComparator>; + using TypeToLiteralMap = ArenaSafeMap<TypeReference, + VIXLUInt32Literal*, + TypeReferenceValueComparator>; + + VIXLUInt32Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); + VIXLUInt32Literal* DeduplicateMethodLiteral(MethodReference target_method, + MethodToLiteralMap* map); PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches); @@ -697,15 +721,25 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ArmVIXLAssembler assembler_; const ArmInstructionSetFeatures& isa_features_; - // Relative call patch info. - // Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<PatchInfo<vixl::aarch32::Label>> relative_call_patches_; + // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. + Uint32ToLiteralMap uint32_literals_; // PC-relative patch info for each HArmDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; + // Deduplication map for boot string literals for kBootImageLinkTimeAddress. + StringToLiteralMap boot_image_string_patches_; // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // Deduplication map for boot type literals for kBootImageLinkTimeAddress. + TypeToLiteralMap boot_image_type_patches_; // PC-relative type patch info. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + // Deduplication map for patchable boot image addresses. + Uint32ToLiteralMap boot_image_address_patches_; + + // Patches for string literals in JIT compiled code. + StringToLiteralMap jit_string_patches_; + // Patches for class literals in JIT compiled code. + TypeToLiteralMap jit_class_patches_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARMVIXL); }; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 456c5c6a92..9af03e8153 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -458,10 +458,6 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - method_patches_(MethodReferenceComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - call_patches_(MethodReferenceComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -1008,8 +1004,6 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - method_patches_.size() + - call_patches_.size() + pc_relative_dex_cache_patches_.size() + pc_relative_string_patches_.size() + pc_relative_type_patches_.size() + @@ -1017,24 +1011,6 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch boot_image_type_patches_.size() + boot_image_address_patches_.size(); linker_patches->reserve(size); - for (const auto& entry : method_patches_) { - const MethodReference& target_method = entry.first; - Literal* literal = entry.second; - DCHECK(literal->GetLabel()->IsBound()); - uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); - linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, - target_method.dex_file, - target_method.dex_method_index)); - } - for (const auto& entry : call_patches_) { - const MethodReference& target_method = entry.first; - Literal* literal = entry.second; - DCHECK(literal->GetLabel()->IsBound()); - uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); - linker_patches->push_back(LinkerPatch::CodePatch(literal_offset, - target_method.dex_file, - target_method.dex_method_index)); - } EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); if (!GetCompilerOptions().IsBootImage()) { @@ -1107,14 +1083,6 @@ Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_meth [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); } -Literal* CodeGeneratorMIPS::DeduplicateMethodAddressLiteral(MethodReference target_method) { - return DeduplicateMethodLiteral(target_method, &method_patches_); -} - -Literal* CodeGeneratorMIPS::DeduplicateMethodCodeLiteral(MethodReference target_method) { - return DeduplicateMethodLiteral(target_method, &call_patches_); -} - Literal* CodeGeneratorMIPS::DeduplicateBootImageStringLiteral(const DexFile& dex_file, dex::StringIndex string_index) { return boot_image_string_patches_.GetOrCreate( @@ -1165,11 +1133,15 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholder( __ SetReorder(reordering); } -void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) { +void CodeGeneratorMIPS::MarkGCCard(Register object, + Register value, + bool value_can_be_null) { MipsLabel done; Register card = AT; Register temp = TMP; - __ Beqz(value, &done); + if (value_can_be_null) { + __ Beqz(value, &done); + } __ LoadFromOffset(kLoadWord, card, TR, @@ -1177,7 +1149,9 @@ void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) { __ Srl(temp, object, gc::accounting::CardTable::kCardShift); __ Addu(temp, card, temp); __ Sb(card, temp, 0); - __ Bind(&done); + if (value_can_be_null) { + __ Bind(&done); + } } void CodeGeneratorMIPS::SetupBlockedRegisters() const { @@ -2096,7 +2070,7 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); if (needs_write_barrier) { DCHECK_EQ(value_type, Primitive::kPrimNot); - codegen_->MarkGCCard(obj, value); + codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull()); } } } else { @@ -4900,7 +4874,8 @@ void LocationsBuilderMIPS::HandleFieldSet(HInstruction* instruction, const Field void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - uint32_t dex_pc) { + uint32_t dex_pc, + bool value_can_be_null) { Primitive::Type type = field_info.GetFieldType(); LocationSummary* locations = instruction->GetLocations(); Register obj = locations->InAt(0).AsRegister<Register>(); @@ -4995,7 +4970,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, // TODO: memory barriers? if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) { Register src = value_location.AsRegister<Register>(); - codegen_->MarkGCCard(obj, src); + codegen_->MarkGCCard(obj, src, value_can_be_null); } if (is_volatile) { @@ -5016,7 +4991,10 @@ void LocationsBuilderMIPS::VisitInstanceFieldSet(HInstanceFieldSet* instruction) } void InstructionCodeGeneratorMIPS::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetDexPc()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetDexPc(), + instruction->GetValueCanBeNull()); } void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad( @@ -5157,22 +5135,7 @@ void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invo // art::PrepareForRegisterAllocation. DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); - HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); - HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); - bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - - // kDirectAddressWithFixup and kCallDirectWithFixup need no extra input on R6 because - // R6 has PC-relative addressing. - bool has_extra_input = !isR6 && - ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) || - (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup)); - - if (invoke->HasPcRelativeDexCache()) { - // kDexCachePcRelative is mutually exclusive with - // kDirectAddressWithFixup/kCallDirectWithFixup. - CHECK(!has_extra_input); - has_extra_input = true; - } + bool has_extra_input = invoke->HasPcRelativeDexCache(); IntrinsicLocationsBuilderMIPS intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -5312,9 +5275,7 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticO // is incompatible with it. bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); bool fallback_load = true; - bool fallback_call = true; switch (dispatch_info.method_load_kind) { - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: fallback_load = has_irreducible_loops; break; @@ -5322,25 +5283,10 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticO fallback_load = false; break; } - switch (dispatch_info.code_ptr_location) { - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: - fallback_call = has_irreducible_loops; - break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: - // TODO: Implement this type. - break; - default: - fallback_call = false; - break; - } if (fallback_load) { dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; dispatch_info.method_load_data = 0; } - if (fallback_call) { - dispatch_info.code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; - dispatch_info.direct_code_ptr = 0; - } return dispatch_info; } @@ -5349,31 +5295,10 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); - bool isR6 = isa_features_.IsR6(); - // kDirectAddressWithFixup and kCallDirectWithFixup have no extra input on R6 because - // R6 has PC-relative addressing. - bool has_extra_input = invoke->HasPcRelativeDexCache() || - (!isR6 && - ((method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) || - (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup))); - Register base_reg = has_extra_input + Register base_reg = invoke->HasPcRelativeDexCache() ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()) : ZERO; - // For better instruction scheduling we load the direct code pointer before the method pointer. - switch (code_ptr_location) { - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // T9 = invoke->GetDirectCodePtr(); - __ LoadConst32(T9, invoke->GetDirectCodePtr()); - break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: - // T9 = code address from literal pool with link-time patch. - __ LoadLiteral(T9, base_reg, DeduplicateMethodCodeLiteral(invoke->GetTargetMethod())); - break; - default: - break; - } - switch (method_load_kind) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { // temp = thread->string_init_entrypoint @@ -5391,11 +5316,6 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: - __ LoadLiteral(temp.AsRegister<Register>(), - base_reg, - DeduplicateMethodAddressLiteral(invoke->GetTargetMethod())); - break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { HMipsDexCacheArraysBase* base = invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase(); @@ -5438,18 +5358,6 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: __ Bal(&frame_entry_label_); break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: - // T9 prepared above for better instruction scheduling. - // T9() - __ Jalr(T9); - __ NopIfNoReordering(); - break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: - // TODO: Implement this type. - // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch(). - LOG(FATAL) << "Unsupported"; - UNREACHABLE(); case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // T9 = callee_method->entry_point_from_quick_compiled_code_; __ LoadFromOffset(kLoadWord, @@ -5689,11 +5597,7 @@ void InstructionCodeGeneratorMIPS::VisitClearException(HClearException* clear AT } void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier) - ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) - ? LocationSummary::kCallOnMainOnly - : LocationSummary::kCallOnSlowPath) - : LocationSummary::kNoCall; + LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); switch (load_kind) { @@ -5721,7 +5625,9 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { } } -void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { +// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not +// move. +void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { HLoadString::LoadKind load_kind = load->GetLoadKind(); LocationSummary* locations = load->GetLocations(); Location out_loc = locations->Out(); @@ -5743,14 +5649,12 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { switch (load_kind) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: - DCHECK(!kEmitCompilerReadBarrier); __ LoadLiteral(out, base_or_current_method_reg, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(), load->GetStringIndex())); return; // No dex cache slow path. case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { - DCHECK(!kEmitCompilerReadBarrier); DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_); @@ -5758,9 +5662,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { - DCHECK(!kEmitCompilerReadBarrier); - DCHECK_NE(load->GetAddress(), 0u); - uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); + uint32_t address = dchecked_integral_cast<uint32_t>( + reinterpret_cast<uintptr_t>(load->GetString().Get())); + DCHECK_NE(address, 0u); __ LoadLiteral(out, base_or_current_method_reg, codegen_->DeduplicateBootImageAddressLiteral(address)); @@ -6284,7 +6188,10 @@ void LocationsBuilderMIPS::VisitStaticFieldSet(HStaticFieldSet* instruction) { } void InstructionCodeGeneratorMIPS::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetDexPc()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetDexPc(), + instruction->GetValueCanBeNull()); } void LocationsBuilderMIPS::VisitUnresolvedInstanceFieldGet( diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index f03f29c5d4..7b0812cb7b 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -236,7 +236,10 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* operation); - void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); + void HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + uint32_t dex_pc, + bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); // Generate a GC root reference load: // @@ -350,7 +353,7 @@ class CodeGeneratorMIPS : public CodeGenerator { // Emit linker patches. void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; - void MarkGCCard(Register object, Register value); + void MarkGCCard(Register object, Register value, bool value_can_be_null); // Register allocation. @@ -474,8 +477,6 @@ class CodeGeneratorMIPS : public CodeGenerator { Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); - Literal* DeduplicateMethodAddressLiteral(MethodReference target_method); - Literal* DeduplicateMethodCodeLiteral(MethodReference target_method); PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches); @@ -495,9 +496,6 @@ class CodeGeneratorMIPS : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // Method patch info, map MethodReference to a literal for method address and method code. - MethodToLiteralMap method_patches_; - MethodToLiteralMap call_patches_; // PC-relative patch info for each HMipsDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // Deduplication map for boot string literals for kBootImageLinkTimeAddress. diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 44d3759978..046d59cee7 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -91,8 +91,6 @@ Location InvokeDexCallingConventionVisitorMIPS64::GetNextLocation(Primitive::Typ // Space on the stack is reserved for all arguments. stack_index_ += Primitive::Is64BitType(type) ? 2 : 1; - // TODO: review - // TODO: shouldn't we use a whole machine word per argument on the stack? // Implicit 4-byte method pointer (and such) will cause misalignment. @@ -235,6 +233,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; + HLoadString* load = instruction_->AsLoadString(); const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex().index_; __ LoadConst32(calling_convention.GetRegisterAt(0), string_index); mips64_codegen->InvokeRuntime(kQuickResolveString, @@ -248,6 +247,17 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { type); RestoreLiveRegisters(codegen, locations); + + // Store the resolved String to the BSS entry. + // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the + // .bss entry address in the fast path, so that we can avoid another calculation here. + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + DCHECK_NE(out, AT); + CodeGeneratorMIPS64::PcRelativePatchInfo* info = + mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info, AT); + __ Sw(out, AT, /* placeholder */ 0x5678); + __ Bc(GetExitLabel()); } @@ -401,14 +411,19 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, move_resolver_(graph->GetArena(), this), assembler_(graph->GetArena()), isa_features_(isa_features), + uint32_literals_(std::less<uint32_t>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - method_patches_(MethodReferenceComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - call_patches_(MethodReferenceComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + boot_image_string_patches_(StringReferenceValueComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(TypeReferenceValueComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_address_patches_(std::less<uint32_t>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Save RA (containing the return address) to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(RA)); } @@ -904,57 +919,78 @@ inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches( void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - method_patches_.size() + - call_patches_.size() + pc_relative_dex_cache_patches_.size() + - relative_call_patches_.size(); + pc_relative_string_patches_.size() + + pc_relative_type_patches_.size() + + boot_image_string_patches_.size() + + boot_image_type_patches_.size() + + boot_image_address_patches_.size(); linker_patches->reserve(size); - for (const auto& entry : method_patches_) { - const MethodReference& target_method = entry.first; + EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, + linker_patches); + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); + } else { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, + linker_patches); + } + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, + linker_patches); + for (const auto& entry : boot_image_string_patches_) { + const StringReference& target_string = entry.first; Literal* literal = entry.second; DCHECK(literal->GetLabel()->IsBound()); uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); - linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, - target_method.dex_file, - target_method.dex_method_index)); + linker_patches->push_back(LinkerPatch::StringPatch(literal_offset, + target_string.dex_file, + target_string.string_index.index_)); } - for (const auto& entry : call_patches_) { - const MethodReference& target_method = entry.first; + for (const auto& entry : boot_image_type_patches_) { + const TypeReference& target_type = entry.first; Literal* literal = entry.second; DCHECK(literal->GetLabel()->IsBound()); uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); - linker_patches->push_back(LinkerPatch::CodePatch(literal_offset, - target_method.dex_file, - target_method.dex_method_index)); + linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, + target_type.dex_file, + target_type.type_index.index_)); } - EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, - linker_patches); - for (const PcRelativePatchInfo& info : relative_call_patches_) { - const DexFile& dex_file = info.target_dex_file; - uint32_t method_index = info.offset_or_index; - DCHECK(info.pc_rel_label.IsBound()); - uint32_t pc_rel_offset = __ GetLabelLocation(&info.pc_rel_label); - linker_patches->push_back( - LinkerPatch::RelativeCodePatch(pc_rel_offset, &dex_file, method_index)); + for (const auto& entry : boot_image_address_patches_) { + DCHECK(GetCompilerOptions().GetIncludePatchInformation()); + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset)); } } +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch( + const DexFile& dex_file, uint32_t string_index) { + return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_); +} + +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeTypePatch( + const DexFile& dex_file, dex::TypeIndex type_index) { + return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_); +} + CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeDexCacheArrayPatch( const DexFile& dex_file, uint32_t element_offset) { return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeCallPatch( - const DexFile& dex_file, uint32_t method_index) { - return NewPcRelativePatch(dex_file, method_index, &relative_call_patches_); -} - CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { patches->emplace_back(dex_file, offset_or_index); return &patches->back(); } +Literal* CodeGeneratorMIPS64::DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map) { + return map->GetOrCreate( + value, + [this, value]() { return __ NewLiteral<uint32_t>(value); }); +} + Literal* CodeGeneratorMIPS64::DeduplicateUint64Literal(uint64_t value) { return uint64_literals_.GetOrCreate( value, @@ -968,12 +1004,24 @@ Literal* CodeGeneratorMIPS64::DeduplicateMethodLiteral(MethodReference target_me [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); } -Literal* CodeGeneratorMIPS64::DeduplicateMethodAddressLiteral(MethodReference target_method) { - return DeduplicateMethodLiteral(target_method, &method_patches_); +Literal* CodeGeneratorMIPS64::DeduplicateBootImageStringLiteral(const DexFile& dex_file, + dex::StringIndex string_index) { + return boot_image_string_patches_.GetOrCreate( + StringReference(&dex_file, string_index), + [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); } -Literal* CodeGeneratorMIPS64::DeduplicateMethodCodeLiteral(MethodReference target_method) { - return DeduplicateMethodLiteral(target_method, &call_patches_); +Literal* CodeGeneratorMIPS64::DeduplicateBootImageTypeLiteral(const DexFile& dex_file, + dex::TypeIndex type_index) { + return boot_image_type_patches_.GetOrCreate( + TypeReference(&dex_file, type_index), + [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); +} + +Literal* CodeGeneratorMIPS64::DeduplicateBootImageAddressLiteral(uint64_t address) { + bool needs_patch = GetCompilerOptions().GetIncludePatchInformation(); + Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_; + return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map); } void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, @@ -982,7 +1030,7 @@ void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchIn // Add the high half of a 32-bit offset to PC. __ Auipc(out, /* placeholder */ 0x1234); // The immediately following instruction will add the sign-extended low half of the 32-bit - // offset to `out` (e.g. ld, jialc, addiu). + // offset to `out` (e.g. ld, jialc, daddiu). } void CodeGeneratorMIPS64::SetupBlockedRegisters() const { @@ -1008,8 +1056,6 @@ void CodeGeneratorMIPS64::SetupBlockedRegisters() const { // Reserve T9 for function calls blocked_core_registers_[T9] = true; - // TODO: review; anything else? - if (GetGraph()->IsDebuggable()) { // Stubs do not save callee-save floating point registers. If the graph // is debuggable, we need to deal with these registers differently. For @@ -1883,9 +1929,6 @@ void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) { Primitive::Type type = instruction->InputAt(0)->GetType(); LocationSummary* locations = instruction->GetLocations(); - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - Mips64Label true_label; - switch (type) { default: // Integer case. @@ -1894,29 +1937,11 @@ void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) { case Primitive::kPrimLong: GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ true, locations); return; - case Primitive::kPrimFloat: case Primitive::kPrimDouble: - // TODO: don't use branches. - GenerateFpCompareAndBranch(instruction->GetCondition(), - instruction->IsGtBias(), - type, - locations, - &true_label); - break; + GenerateFpCompare(instruction->GetCondition(), instruction->IsGtBias(), type, locations); + return; } - - // Convert the branches into the result. - Mips64Label done; - - // False case: result = 0. - __ LoadConst32(dst, 0); - __ Bc(&done); - - // True case: result = 1. - __ Bind(&true_label); - __ LoadConst32(dst, 1); - __ Bind(&done); } void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { @@ -2331,19 +2356,40 @@ void InstructionCodeGeneratorMIPS64::GenerateIntLongCompare(IfCondition cond, switch (cond) { case kCondEQ: case kCondNE: - if (use_imm && IsUint<16>(rhs_imm)) { - __ Xori(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst64(rhs_reg, rhs_imm); + if (use_imm && IsInt<16>(-rhs_imm)) { + if (rhs_imm == 0) { + if (cond == kCondEQ) { + __ Sltiu(dst, lhs, 1); + } else { + __ Sltu(dst, ZERO, lhs); + } + } else { + if (is64bit) { + __ Daddiu(dst, lhs, -rhs_imm); + } else { + __ Addiu(dst, lhs, -rhs_imm); + } + if (cond == kCondEQ) { + __ Sltiu(dst, dst, 1); + } else { + __ Sltu(dst, ZERO, dst); + } } - __ Xor(dst, lhs, rhs_reg); - } - if (cond == kCondEQ) { - __ Sltiu(dst, dst, 1); } else { - __ Sltu(dst, ZERO, dst); + if (use_imm && IsUint<16>(rhs_imm)) { + __ Xori(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Xor(dst, lhs, rhs_reg); + } + if (cond == kCondEQ) { + __ Sltiu(dst, dst, 1); + } else { + __ Sltu(dst, ZERO, dst); + } } break; @@ -2530,6 +2576,121 @@ void InstructionCodeGeneratorMIPS64::GenerateIntLongCompareAndBranch(IfCondition } } +void InstructionCodeGeneratorMIPS64::GenerateFpCompare(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* locations) { + GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + if (type == Primitive::kPrimFloat) { + switch (cond) { + case kCondEQ: + __ CmpEqS(FTMP, lhs, rhs); + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondNE: + __ CmpEqS(FTMP, lhs, rhs); + __ Mfc1(dst, FTMP); + __ Addiu(dst, dst, 1); + break; + case kCondLT: + if (gt_bias) { + __ CmpLtS(FTMP, lhs, rhs); + } else { + __ CmpUltS(FTMP, lhs, rhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondLE: + if (gt_bias) { + __ CmpLeS(FTMP, lhs, rhs); + } else { + __ CmpUleS(FTMP, lhs, rhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondGT: + if (gt_bias) { + __ CmpUltS(FTMP, rhs, lhs); + } else { + __ CmpLtS(FTMP, rhs, lhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondGE: + if (gt_bias) { + __ CmpUleS(FTMP, rhs, lhs); + } else { + __ CmpLeS(FTMP, rhs, lhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition " << cond; + UNREACHABLE(); + } + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + switch (cond) { + case kCondEQ: + __ CmpEqD(FTMP, lhs, rhs); + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondNE: + __ CmpEqD(FTMP, lhs, rhs); + __ Mfc1(dst, FTMP); + __ Addiu(dst, dst, 1); + break; + case kCondLT: + if (gt_bias) { + __ CmpLtD(FTMP, lhs, rhs); + } else { + __ CmpUltD(FTMP, lhs, rhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondLE: + if (gt_bias) { + __ CmpLeD(FTMP, lhs, rhs); + } else { + __ CmpUleD(FTMP, lhs, rhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondGT: + if (gt_bias) { + __ CmpUltD(FTMP, rhs, lhs); + } else { + __ CmpLtD(FTMP, rhs, lhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + case kCondGE: + if (gt_bias) { + __ CmpUleD(FTMP, rhs, lhs); + } else { + __ CmpLeD(FTMP, rhs, lhs); + } + __ Mfc1(dst, FTMP); + __ Andi(dst, dst, 1); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition " << cond; + UNREACHABLE(); + } + } +} + void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond, bool gt_bias, Primitive::Type type, @@ -2929,6 +3090,31 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceFieldSet(HInstanceFieldSet* in HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); } +void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad( + HInstruction* instruction ATTRIBUTE_UNUSED, + Location root, + GpuRegister obj, + uint32_t offset) { + // When handling HLoadClass::LoadKind::kDexCachePcRelative, the caller calls + // EmitPcRelativeAddressPlaceholderHigh() and then GenerateGcRootFieldLoad(). + // The relative patcher expects the two methods to emit the following patchable + // sequence of instructions in this case: + // auipc reg1, 0x1234 // 0x1234 is a placeholder for offset_high. + // lwu reg2, 0x5678(reg1) // 0x5678 is a placeholder for offset_low. + // TODO: Adjust GenerateGcRootFieldLoad() and its caller when this method is + // extended (e.g. for read barriers) so as not to break the relative patcher. + GpuRegister root_reg = root.AsRegister<GpuRegister>(); + if (kEmitCompilerReadBarrier) { + UNIMPLEMENTED(FATAL) << "for read barrier"; + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. + } +} + void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath; @@ -3080,16 +3266,69 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codeg } HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) { - // TODO: Implement other kinds. - return HLoadString::LoadKind::kDexCacheViaMethod; + HLoadString::LoadKind desired_string_load_kind) { + if (kEmitCompilerReadBarrier) { + UNIMPLEMENTED(FATAL) << "for read barrier"; + } + bool fallback_load = false; + switch (desired_string_load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!GetCompilerOptions().GetCompilePic()); + break; + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().GetCompilePic()); + break; + case HLoadString::LoadKind::kBootImageAddress: + break; + case HLoadString::LoadKind::kBssEntry: + DCHECK(!Runtime::Current()->UseJitCompilation()); + break; + case HLoadString::LoadKind::kDexCacheViaMethod: + break; + case HLoadString::LoadKind::kJitTableAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + // TODO: implement. + fallback_load = true; + break; + } + if (fallback_load) { + desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + } + return desired_string_load_kind; } HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { - DCHECK_NE(desired_class_load_kind, HLoadClass::LoadKind::kReferrersClass); - // TODO: Implement other kinds. - return HLoadClass::LoadKind::kDexCacheViaMethod; + if (kEmitCompilerReadBarrier) { + UNIMPLEMENTED(FATAL) << "for read barrier"; + } + bool fallback_load = false; + switch (desired_class_load_kind) { + case HLoadClass::LoadKind::kReferrersClass: + break; + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!GetCompilerOptions().GetCompilePic()); + break; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().GetCompilePic()); + break; + case HLoadClass::LoadKind::kBootImageAddress: + break; + case HLoadClass::LoadKind::kJitTableAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + // TODO: implement. + fallback_load = true; + break; + case HLoadClass::LoadKind::kDexCachePcRelative: + DCHECK(!Runtime::Current()->UseJitCompilation()); + break; + case HLoadClass::LoadKind::kDexCacheViaMethod: + break; + } + if (fallback_load) { + desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + } + return desired_class_load_kind; } HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStaticOrDirectDispatch( @@ -3105,22 +3344,6 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); - // For better instruction scheduling we load the direct code pointer before the method pointer. - switch (code_ptr_location) { - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // T9 = invoke->GetDirectCodePtr(); - __ LoadLiteral(T9, kLoadDoubleword, DeduplicateUint64Literal(invoke->GetDirectCodePtr())); - break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: - // T9 = code address from literal pool with link-time patch. - __ LoadLiteral(T9, - kLoadUnsignedWord, - DeduplicateMethodCodeLiteral(invoke->GetTargetMethod())); - break; - default: - break; - } - switch (method_load_kind) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { // temp = thread->string_init_entrypoint @@ -3140,11 +3363,6 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo kLoadDoubleword, DeduplicateUint64Literal(invoke->GetMethodAddress())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: - __ LoadLiteral(temp.AsRegister<GpuRegister>(), - kLoadUnsignedWord, - DeduplicateMethodAddressLiteral(invoke->GetTargetMethod())); - break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { uint32_t offset = invoke->GetDexCacheArrayOffset(); CodeGeneratorMIPS64::PcRelativePatchInfo* info = @@ -3187,21 +3405,6 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: __ Balc(&frame_entry_label_); break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: - // T9 prepared above for better instruction scheduling. - // T9() - __ Jalr(T9); - __ Nop(); - break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { - CodeGeneratorMIPS64::PcRelativePatchInfo* info = - NewPcRelativeCallPatch(*invoke->GetTargetMethod().dex_file, - invoke->GetTargetMethod().dex_method_index); - EmitPcRelativeAddressPlaceholderHigh(info, AT); - __ Jialc(AT, /* placeholder */ 0x5678); - break; - } case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // T9 = callee_method->entry_point_from_quick_compiled_code_; __ LoadFromOffset(kLoadDoubleword, @@ -3271,11 +3474,26 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) } void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { - InvokeRuntimeCallingConvention calling_convention; - CodeGenerator::CreateLoadClassLocationSummary( - cls, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - calling_convention.GetReturnLocation(cls->GetType())); + if (cls->NeedsAccessCheck()) { + InvokeRuntimeCallingConvention calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + calling_convention.GetReturnLocation(Primitive::kPrimNot), + /* code_generator_supports_read_barrier */ false); + return; + } + + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); + if (load_kind == HLoadClass::LoadKind::kReferrersClass || + load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { @@ -3287,35 +3505,90 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) { return; } - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - GpuRegister current_method = locations->InAt(0).AsRegister<GpuRegister>(); - if (cls->IsReferrersClass()) { - DCHECK(!cls->CanCallRuntime()); - DCHECK(!cls->MustGenerateClinitCheck()); - __ LoadFromOffset(kLoadUnsignedWord, out, current_method, - ArtMethod::DeclaringClassOffset().Int32Value()); - } else { - __ LoadFromOffset(kLoadDoubleword, out, current_method, - ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value()); - __ LoadFromOffset( - kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_)); - // TODO: We will need a read barrier here. - if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { - DCHECK(cls->CanCallRuntime()); - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( - cls, - cls, - cls->GetDexPc(), - cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - if (!cls->IsInDexCache()) { - __ Beqzc(out, slow_path->GetEntryLabel()); - } - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); - } else { - __ Bind(slow_path->GetExitLabel()); - } + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); + Location out_loc = locations->Out(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); + GpuRegister current_method_reg = ZERO; + if (load_kind == HLoadClass::LoadKind::kReferrersClass || + load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + current_method_reg = locations->InAt(0).AsRegister<GpuRegister>(); + } + + bool generate_null_check = false; + switch (load_kind) { + case HLoadClass::LoadKind::kReferrersClass: + DCHECK(!cls->CanCallRuntime()); + DCHECK(!cls->MustGenerateClinitCheck()); + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad(cls, + out_loc, + current_method_reg, + ArtMethod::DeclaringClassOffset().Int32Value()); + break; + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!kEmitCompilerReadBarrier); + __ LoadLiteral(out, + kLoadUnsignedWord, + codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), + cls->GetTypeIndex())); + break; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(!kEmitCompilerReadBarrier); + CodeGeneratorMIPS64::PcRelativePatchInfo* info = + codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT); + __ Daddiu(out, AT, /* placeholder */ 0x5678); + break; + } + case HLoadClass::LoadKind::kBootImageAddress: { + DCHECK(!kEmitCompilerReadBarrier); + DCHECK_NE(cls->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); + __ LoadLiteral(out, + kLoadUnsignedWord, + codegen_->DeduplicateBootImageAddressLiteral(address)); + break; + } + case HLoadClass::LoadKind::kJitTableAddress: { + LOG(FATAL) << "Unimplemented"; + break; + } + case HLoadClass::LoadKind::kDexCachePcRelative: { + uint32_t element_offset = cls->GetDexCacheElementOffset(); + CodeGeneratorMIPS64::PcRelativePatchInfo* info = + codegen_->NewPcRelativeDexCacheArrayPatch(cls->GetDexFile(), element_offset); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT); + // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ + GenerateGcRootFieldLoad(cls, out_loc, AT, /* placeholder */ 0x5678); + generate_null_check = !cls->IsInDexCache(); + break; + } + case HLoadClass::LoadKind::kDexCacheViaMethod: { + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + __ LoadFromOffset(kLoadDoubleword, + out, + current_method_reg, + ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value()); + // /* GcRoot<mirror::Class> */ out = out[type_index] + size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_); + GenerateGcRootFieldLoad(cls, out_loc, out, offset); + generate_null_check = !cls->IsInDexCache(); + } + } + + if (generate_null_check || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (generate_null_check) { + __ Beqzc(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); } } } @@ -3344,20 +3617,71 @@ void InstructionCodeGeneratorMIPS64::VisitClearException(HClearException* clear } void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = load->NeedsEnvironment() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; + HLoadString::LoadKind load_kind = load->GetLoadKind(); + LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + InvokeRuntimeCallingConvention calling_convention; + locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); + } else { + locations->SetOut(Location::RequiresRegister()); + } } -void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) { +// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not +// move. +void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { + HLoadString::LoadKind load_kind = load->GetLoadKind(); + LocationSummary* locations = load->GetLocations(); + Location out_loc = locations->Out(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); + + switch (load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimeAddress: + __ LoadLiteral(out, + kLoadUnsignedWord, + codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(), + load->GetStringIndex())); + return; // No dex cache slow path. + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT); + __ Daddiu(out, AT, /* placeholder */ 0x5678); + return; // No dex cache slow path. + } + case HLoadString::LoadKind::kBootImageAddress: { + uint32_t address = dchecked_integral_cast<uint32_t>( + reinterpret_cast<uintptr_t>(load->GetString().Get())); + DCHECK_NE(address, 0u); + __ LoadLiteral(out, + kLoadUnsignedWord, + codegen_->DeduplicateBootImageAddressLiteral(address)); + return; // No dex cache slow path. + } + case HLoadString::LoadKind::kBssEntry: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT); + __ Lwu(out, AT, /* placeholder */ 0x5678); + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); + codegen_->AddSlowPath(slow_path); + __ Beqzc(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + default: + break; + } + // TODO: Re-add the compiler code to do string dex cache lookup again. - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); - codegen_->AddSlowPath(slow_path); - __ Bc(slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); + InvokeRuntimeCallingConvention calling_convention; + __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); + codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); } void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) { @@ -3954,9 +4278,12 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver break; case Primitive::kPrimInt: case Primitive::kPrimLong: - // Sign-extend 32-bit int into bits 32 through 63 for - // int-to-long and long-to-int conversions - __ Sll(dst, src, 0); + // Sign-extend 32-bit int into bits 32 through 63 for int-to-long and long-to-int + // conversions, except when the input and output registers are the same and we are not + // converting longs to shorter types. In these cases, do nothing. + if ((input_type == Primitive::kPrimLong) || (dst != src)) { + __ Sll(dst, src, 0); + } break; default: @@ -4193,27 +4520,20 @@ void LocationsBuilderMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) { locations->SetInAt(0, Location::RequiresRegister()); } -void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) { - int32_t lower_bound = switch_instr->GetStartValue(); - int32_t num_entries = switch_instr->GetNumEntries(); - LocationSummary* locations = switch_instr->GetLocations(); - GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>(); - HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - +void InstructionCodeGeneratorMIPS64::GenPackedSwitchWithCompares(GpuRegister value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block) { // Create a set of compare/jumps. GpuRegister temp_reg = TMP; - if (IsInt<16>(-lower_bound)) { - __ Addiu(temp_reg, value_reg, -lower_bound); - } else { - __ LoadConst32(AT, -lower_bound); - __ Addu(temp_reg, value_reg, AT); - } + __ Addiu32(temp_reg, value_reg, -lower_bound); // Jump to default if index is negative // Note: We don't check the case that index is positive while value < lower_bound, because in // this case, index >= num_entries must be true. So that we can save one branch instruction. __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block)); - const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); // Jump to successors[0] if value == lower_bound. __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0])); int32_t last_index = 0; @@ -4231,11 +4551,66 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins } // And the default for any other value. - if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { + if (!codegen_->GoesToNextBlock(switch_block, default_block)) { __ Bc(codegen_->GetLabelOf(default_block)); } } +void InstructionCodeGeneratorMIPS64::GenTableBasedPackedSwitch(GpuRegister value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block) { + // Create a jump table. + std::vector<Mips64Label*> labels(num_entries); + const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors(); + for (uint32_t i = 0; i < num_entries; i++) { + labels[i] = codegen_->GetLabelOf(successors[i]); + } + JumpTable* table = __ CreateJumpTable(std::move(labels)); + + // Is the value in range? + __ Addiu32(TMP, value_reg, -lower_bound); + __ LoadConst32(AT, num_entries); + __ Bgeuc(TMP, AT, codegen_->GetLabelOf(default_block)); + + // We are in the range of the table. + // Load the target address from the jump table, indexing by the value. + __ LoadLabelAddress(AT, table->GetLabel()); + __ Sll(TMP, TMP, 2); + __ Daddu(TMP, TMP, AT); + __ Lw(TMP, TMP, 0); + // Compute the absolute target address by adding the table start address + // (the table contains offsets to targets relative to its start). + __ Daddu(TMP, TMP, AT); + // And jump. + __ Jr(TMP); + __ Nop(); +} + +void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) { + int32_t lower_bound = switch_instr->GetStartValue(); + uint32_t num_entries = switch_instr->GetNumEntries(); + LocationSummary* locations = switch_instr->GetLocations(); + GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>(); + HBasicBlock* switch_block = switch_instr->GetBlock(); + HBasicBlock* default_block = switch_instr->GetDefaultBlock(); + + if (num_entries > kPackedSwitchJumpTableThreshold) { + GenTableBasedPackedSwitch(value_reg, + lower_bound, + num_entries, + switch_block, + default_block); + } else { + GenPackedSwitchWithCompares(value_reg, + lower_bound, + num_entries, + switch_block, + default_block); + } +} + void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet*) { UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64"; } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 067c1f940f..8ac919f47e 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -22,6 +22,7 @@ #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/mips64/assembler_mips64.h" +#include "utils/type_reference.h" namespace art { namespace mips64 { @@ -216,6 +217,14 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { Mips64Assembler* GetAssembler() const { return assembler_; } + // Compare-and-jump packed switch generates approx. 3 + 2.5 * N 32-bit + // instructions for N cases. + // Table-based packed switch generates approx. 11 32-bit instructions + // and N 32-bit data words for N cases. + // At N = 6 they come out as 18 and 17 32-bit words respectively. + // We switch to the table-based method starting with 7 cases. + static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; + private: void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg); void GenerateMemoryBarrier(MemBarrierKind kind); @@ -227,6 +236,15 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + GpuRegister obj, + uint32_t offset); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, Mips64Label* true_target, @@ -240,12 +258,26 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { bool is64bit, LocationSummary* locations, Mips64Label* label); + void GenerateFpCompare(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* locations); void GenerateFpCompareAndBranch(IfCondition cond, bool gt_bias, Primitive::Type type, LocationSummary* locations, Mips64Label* label); void HandleGoto(HInstruction* got, HBasicBlock* successor); + void GenPackedSwitchWithCompares(GpuRegister value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block); + void GenTableBasedPackedSwitch(GpuRegister value_reg, + int32_t lower_bound, + uint32_t num_entries, + HBasicBlock* switch_block, + HBasicBlock* default_block); Mips64Assembler* const assembler_; CodeGeneratorMIPS64* const codegen_; @@ -310,6 +342,10 @@ class CodeGeneratorMIPS64 : public CodeGenerator { block_labels_ = CommonInitializeLabels<Mips64Label>(); } + // We prefer aligned loads and stores (less code), so spill and restore registers in slow paths + // at aligned locations. + uint32_t GetPreferredSlotsAlignment() const OVERRIDE { return kMips64DoublewordSize; } + void Finalize(CodeAllocator* allocator) OVERRIDE; // Code generation helpers. @@ -375,20 +411,33 @@ class CodeGeneratorMIPS64 : public CodeGenerator { Mips64Label pc_rel_label; }; + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index); + PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); PcRelativePatchInfo* NewPcRelativeCallPatch(const DexFile& dex_file, uint32_t method_index); + Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, + dex::StringIndex string_index); + Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index); + Literal* DeduplicateBootImageAddressLiteral(uint64_t address); void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, GpuRegister out); private: + using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, Literal*>; using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; + using BootStringToLiteralMap = ArenaSafeMap<StringReference, + Literal*, + StringReferenceValueComparator>; + using BootTypeToLiteralMap = ArenaSafeMap<TypeReference, + Literal*, + TypeReferenceValueComparator>; + + Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); Literal* DeduplicateUint64Literal(uint64_t value); Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); - Literal* DeduplicateMethodAddressLiteral(MethodReference target_method); - Literal* DeduplicateMethodCodeLiteral(MethodReference target_method); PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, uint32_t offset_or_index, @@ -407,15 +456,23 @@ class CodeGeneratorMIPS64 : public CodeGenerator { Mips64Assembler assembler_; const Mips64InstructionSetFeatures& isa_features_; + // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. + Uint32ToLiteralMap uint32_literals_; // Deduplication map for 64-bit literals, used for non-patchable method address or method code // address. Uint64ToLiteralMap uint64_literals_; - // Method patch info, map MethodReference to a literal for method address and method code. - MethodToLiteralMap method_patches_; - MethodToLiteralMap call_patches_; // PC-relative patch info. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; - ArenaDeque<PcRelativePatchInfo> relative_call_patches_; + // Deduplication map for boot string literals for kBootImageLinkTimeAddress. + BootStringToLiteralMap boot_image_string_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // Deduplication map for boot type literals for kBootImageLinkTimeAddress. + BootTypeToLiteralMap boot_image_type_patches_; + // PC-relative type patch info. + ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + // Deduplication map for patchable boot image addresses. + Uint32ToLiteralMap boot_image_address_patches_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS64); }; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 8612a67c8b..f13b60aebf 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1006,8 +1006,6 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, move_resolver_(graph->GetArena(), this), assembler_(graph->GetArena()), isa_features_(isa_features), - method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -4454,20 +4452,7 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOr HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) { dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; } - switch (dispatch_info.code_ptr_location) { - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // For direct code, we actually prefer to call via the code pointer from ArtMethod*. - // (Though the direct CALL ptr16:32 is available for consideration). - return HInvokeStaticOrDirect::DispatchInfo { - dispatch_info.method_load_kind, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - dispatch_info.method_load_data, - 0u - }; - default: - return dispatch_info; - } + return dispatch_info; } Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, @@ -4514,12 +4499,6 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: - __ movl(temp.AsRegister<Register>(), Immediate(/* placeholder */ 0)); - method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, - invoke->GetTargetMethod().dex_method_index); - __ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn. - break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()); @@ -4561,19 +4540,6 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: __ call(GetFrameEntryLabel()); break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { - relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, - invoke->GetTargetMethod().dex_method_index); - Label* label = &relative_call_patches_.back().label; - __ call(label); // Bind to the patch label, override at link time. - __ Bind(label); // Bind the label at the end of the "call" insn. - break; - } - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // Filtered out by GetSupportedInvokeStaticOrDirectDispatch(). - LOG(FATAL) << "Unsupported"; - UNREACHABLE(); case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // (callee_method + offset_of_quick_compiled_code)() __ call(Address(callee_method.AsRegister<Register>(), @@ -4664,22 +4630,11 @@ inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - method_patches_.size() + - relative_call_patches_.size() + pc_relative_dex_cache_patches_.size() + simple_patches_.size() + string_patches_.size() + type_patches_.size(); linker_patches->reserve(size); - for (const PatchInfo<Label>& info : method_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, &info.dex_file, info.index)); - } - for (const PatchInfo<Label>& info : relative_call_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back( - LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index)); - } EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); for (const Label& label : simple_patches_) { @@ -6277,15 +6232,19 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { } Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file, - dex::StringIndex dex_index) { - jit_string_roots_.Overwrite(StringReference(&dex_file, dex_index), /* placeholder */ 0u); + dex::StringIndex dex_index, + Handle<mirror::String> handle) { + jit_string_roots_.Overwrite( + StringReference(&dex_file, dex_index), reinterpret_cast64<uint64_t>(handle.GetReference())); // Add a patch entry and return the label. jit_string_patches_.emplace_back(dex_file, dex_index.index_); PatchInfo<Label>* info = &jit_string_patches_.back(); return &info->label; } -void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { +// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not +// move. +void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { LocationSummary* locations = load->GetLocations(); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); @@ -6303,8 +6262,9 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { - DCHECK_NE(load->GetAddress(), 0u); - uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); + uint32_t address = dchecked_integral_cast<uint32_t>( + reinterpret_cast<uintptr_t>(load->GetString().Get())); + DCHECK_NE(address, 0u); __ movl(out, Immediate(address)); codegen_->RecordSimplePatch(); return; // No dex cache slow path. @@ -6325,7 +6285,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { case HLoadString::LoadKind::kJitTableAddress: { Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset); Label* fixup_label = codegen_->NewJitRootStringPatch( - load->GetDexFile(), load->GetStringIndex()); + load->GetDexFile(), load->GetStringIndex(), load->GetString()); // /* GcRoot<mirror::String> */ out = *address GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); return; diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index c44da97a90..dd1628c867 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -415,7 +415,9 @@ class CodeGeneratorX86 : public CodeGenerator { void RecordTypePatch(HLoadClass* load_class); Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); - Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex dex_index); + Label* NewJitRootStringPatch(const DexFile& dex_file, + dex::StringIndex dex_index, + Handle<mirror::String> handle); Label* NewJitRootClassPatch(const DexFile& dex_file, dex::TypeIndex dex_index, uint64_t address); void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; @@ -613,9 +615,6 @@ class CodeGeneratorX86 : public CodeGenerator { X86Assembler assembler_; const X86InstructionSetFeatures& isa_features_; - // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<PatchInfo<Label>> method_patches_; - ArenaDeque<PatchInfo<Label>> relative_call_patches_; // PC-relative DexCache access info. ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_; // Patch locations for patchoat where the linker doesn't do any other work. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 7dfc736d9c..89f4ae04d7 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -960,19 +960,7 @@ inline Condition X86_64FPCondition(IfCondition cond) { HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { - switch (desired_dispatch_info.code_ptr_location) { - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // For direct code, we actually prefer to call via the code pointer from ArtMethod*. - return HInvokeStaticOrDirect::DispatchInfo { - desired_dispatch_info.method_load_kind, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - desired_dispatch_info.method_load_data, - 0u - }; - default: - return desired_dispatch_info; - } + return desired_dispatch_info; } Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, @@ -993,12 +981,6 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: - __ movl(temp.AsRegister<CpuRegister>(), Immediate(0)); // Placeholder. - method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, - invoke->GetTargetMethod().dex_method_index); - __ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn. - break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { __ movq(temp.AsRegister<CpuRegister>(), Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); @@ -1042,19 +1024,6 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: __ call(&frame_entry_label_); break; - case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { - relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, - invoke->GetTargetMethod().dex_method_index); - Label* label = &relative_call_patches_.back().label; - __ call(label); // Bind to the patch label, override at link time. - __ Bind(label); // Bind the label at the end of the "call" insn. - break; - } - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: - case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: - // Filtered out by GetSupportedInvokeStaticOrDirectDispatch(). - LOG(FATAL) << "Unsupported"; - UNREACHABLE(); case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // (callee_method + offset_of_quick_compiled_code)() __ call(Address(callee_method.AsRegister<CpuRegister>(), @@ -1146,22 +1115,11 @@ inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - method_patches_.size() + - relative_call_patches_.size() + pc_relative_dex_cache_patches_.size() + simple_patches_.size() + string_patches_.size() + type_patches_.size(); linker_patches->reserve(size); - for (const PatchInfo<Label>& info : method_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, &info.dex_file, info.index)); - } - for (const PatchInfo<Label>& info : relative_call_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back( - LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index)); - } EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); for (const Label& label : simple_patches_) { @@ -1253,8 +1211,6 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, assembler_(graph->GetArena()), isa_features_(isa_features), constant_area_start_(0), - method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -5675,15 +5631,19 @@ void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { } Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file, - dex::StringIndex dex_index) { - jit_string_roots_.Overwrite(StringReference(&dex_file, dex_index), /* placeholder */ 0u); + dex::StringIndex dex_index, + Handle<mirror::String> handle) { + jit_string_roots_.Overwrite( + StringReference(&dex_file, dex_index), reinterpret_cast64<uint64_t>(handle.GetReference())); // Add a patch entry and return the label. jit_string_patches_.emplace_back(dex_file, dex_index.index_); PatchInfo<Label>* info = &jit_string_patches_.back(); return &info->label; } -void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { +// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not +// move. +void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { LocationSummary* locations = load->GetLocations(); Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); @@ -5695,8 +5655,9 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { - DCHECK_NE(load->GetAddress(), 0u); - uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); + uint32_t address = dchecked_integral_cast<uint32_t>( + reinterpret_cast<uintptr_t>(load->GetString().Get())); + DCHECK_NE(address, 0u); __ movl(out, Immediate(address)); // Zero-extended. codegen_->RecordSimplePatch(); return; // No dex cache slow path. @@ -5717,8 +5678,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { case HLoadString::LoadKind::kJitTableAddress: { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ true); - Label* fixup_label = - codegen_->NewJitRootStringPatch(load->GetDexFile(), load->GetStringIndex()); + Label* fixup_label = codegen_->NewJitRootStringPatch( + load->GetDexFile(), load->GetStringIndex(), load->GetString()); // /* GcRoot<mirror::String> */ out = *address GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); return; diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 391a23b7ce..32d006c5f3 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -412,7 +412,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { void RecordTypePatch(HLoadClass* load_class); Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); - Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex dex_index); + Label* NewJitRootStringPatch(const DexFile& dex_file, + dex::StringIndex dex_index, + Handle<mirror::String> handle); Label* NewJitRootClassPatch(const DexFile& dex_file, dex::TypeIndex dex_index, uint64_t address); void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; @@ -596,9 +598,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Used for fixups to the constant area. int constant_area_start_; - // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<PatchInfo<Label>> method_patches_; - ArenaDeque<PatchInfo<Label>> relative_call_patches_; // PC-relative DexCache access info. ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_; // Patch locations for patchoat where the linker doesn't do any other work. diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index ac83bd9b0c..e3f3df0ff5 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -15,6 +15,7 @@ */ #include <functional> +#include <memory> #include "arch/instruction_set.h" #include "arch/arm/instruction_set_features_arm.h" @@ -299,8 +300,8 @@ static void RunCode(CodegenTargetConfig target_config, bool has_result, Expected expected) { CompilerOptions compiler_options; - CodeGenerator* codegen = target_config.CreateCodeGenerator(graph, compiler_options); - RunCode(codegen, graph, hook_before_codegen, has_result, expected); + std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, compiler_options)); + RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected); } #ifdef ART_ENABLE_CODEGEN_arm @@ -1041,6 +1042,31 @@ TEST_F(CodegenTest, ComparisonsLong) { } } +#ifdef ART_ENABLE_CODEGEN_arm +TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) { + std::unique_ptr<const ArmInstructionSetFeatures> features( + ArmInstructionSetFeatures::FromCppDefines()); + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = CreateGraph(&allocator); + arm::CodeGeneratorARMVIXL codegen(graph, *features.get(), CompilerOptions()); + + codegen.Initialize(); + + // This will result in calling EmitSwap -> void ParallelMoveResolverARMVIXL::Exchange(int mem1, + // int mem2) which was faulty (before the fix). So previously GPR and FP scratch registers were + // used as temps; however GPR scratch register is required for big stack offsets which don't fit + // LDR encoding. So the following code is a regression test for that situation. + HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena()); + move->AddMove(Location::StackSlot(0), Location::StackSlot(8192), Primitive::kPrimInt, nullptr); + move->AddMove(Location::StackSlot(8192), Location::StackSlot(0), Primitive::kPrimInt, nullptr); + codegen.GetMoveResolver()->EmitNativeCode(move); + + InternalCodeAllocator code_allocator; + codegen.Finalize(&code_allocator); +} +#endif + #ifdef ART_ENABLE_CODEGEN_mips TEST_F(CodegenTest, MipsClobberRA) { std::unique_ptr<const MipsInstructionSetFeatures> features_mips( diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index 437d35ccb7..f8d37bd714 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -28,7 +28,6 @@ class GVNTest : public CommonCompilerTest {}; TEST_F(GVNTest, LocalFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - ScopedNullHandle<mirror::DexCache> dex_cache; HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -45,53 +44,53 @@ TEST_F(GVNTest, LocalFieldElimination) { entry->AddSuccessor(block); block->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimNot, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0)); block->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimNot, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0)); HInstruction* to_remove = block->GetLastInstruction(); block->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimNot, MemberOffset(43), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0)); HInstruction* different_offset = block->GetLastInstruction(); // Kill the value. block->AddInstruction(new (&allocator) HInstanceFieldSet(parameter, parameter, + nullptr, Primitive::kPrimNot, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0)); block->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimNot, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0)); HInstruction* use_after_kill = block->GetLastInstruction(); block->AddInstruction(new (&allocator) HExit()); @@ -113,7 +112,6 @@ TEST_F(GVNTest, LocalFieldElimination) { TEST_F(GVNTest, GlobalFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - ScopedNullHandle<mirror::DexCache> dex_cache; HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -129,13 +127,13 @@ TEST_F(GVNTest, GlobalFieldElimination) { graph->AddBlock(block); entry->AddSuccessor(block); block->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimBoolean, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0)); block->AddInstruction(new (&allocator) HIf(block->GetLastInstruction())); @@ -152,33 +150,33 @@ TEST_F(GVNTest, GlobalFieldElimination) { else_->AddSuccessor(join); then->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimBoolean, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0)); then->AddInstruction(new (&allocator) HGoto()); else_->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimBoolean, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0)); else_->AddInstruction(new (&allocator) HGoto()); join->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimBoolean, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0)); join->AddInstruction(new (&allocator) HExit()); @@ -196,7 +194,6 @@ TEST_F(GVNTest, GlobalFieldElimination) { TEST_F(GVNTest, LoopFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - ScopedNullHandle<mirror::DexCache> dex_cache; HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -213,13 +210,13 @@ TEST_F(GVNTest, LoopFieldElimination) { graph->AddBlock(block); entry->AddSuccessor(block); block->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimBoolean, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0)); block->AddInstruction(new (&allocator) HGoto()); @@ -236,13 +233,13 @@ TEST_F(GVNTest, LoopFieldElimination) { loop_body->AddSuccessor(loop_header); loop_header->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimBoolean, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0)); HInstruction* field_get_in_loop_header = loop_header->GetLastInstruction(); loop_header->AddInstruction(new (&allocator) HIf(block->GetLastInstruction())); @@ -251,35 +248,35 @@ TEST_F(GVNTest, LoopFieldElimination) { // and the body to be GVN'ed. loop_body->AddInstruction(new (&allocator) HInstanceFieldSet(parameter, parameter, + nullptr, Primitive::kPrimBoolean, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0)); HInstruction* field_set = loop_body->GetLastInstruction(); loop_body->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimBoolean, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0)); HInstruction* field_get_in_loop_body = loop_body->GetLastInstruction(); loop_body->AddInstruction(new (&allocator) HGoto()); exit->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimBoolean, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0)); HInstruction* field_get_in_exit = exit->GetLastInstruction(); exit->AddInstruction(new (&allocator) HExit()); @@ -319,7 +316,6 @@ TEST_F(GVNTest, LoopFieldElimination) { TEST_F(GVNTest, LoopSideEffects) { ArenaPool pool; ArenaAllocator allocator(&pool); - ScopedNullHandle<mirror::DexCache> dex_cache; static const SideEffects kCanTriggerGC = SideEffects::CanTriggerGC(); @@ -376,13 +372,13 @@ TEST_F(GVNTest, LoopSideEffects) { // Make one block with a side effect. entry->AddInstruction(new (&allocator) HInstanceFieldSet(parameter, parameter, + nullptr, Primitive::kPrimNot, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0)); SideEffectsAnalysis side_effects(graph); @@ -401,13 +397,13 @@ TEST_F(GVNTest, LoopSideEffects) { outer_loop_body->InsertInstructionBefore( new (&allocator) HInstanceFieldSet(parameter, parameter, + nullptr, Primitive::kPrimNot, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0), outer_loop_body->GetLastInstruction()); @@ -427,13 +423,13 @@ TEST_F(GVNTest, LoopSideEffects) { inner_loop_body->InsertInstructionBefore( new (&allocator) HInstanceFieldSet(parameter, parameter, + nullptr, Primitive::kPrimNot, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0), inner_loop_body->GetLastInstruction()); diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index b21bc09cbd..88473f02e5 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -73,10 +73,18 @@ static bool IsNarrowingIntegralConversion(Primitive::Type from, Primitive::Type } /** - * Returns narrowest data type. + * Returns result of implicit widening type conversion done in HIR. */ -static Primitive::Type Narrowest(Primitive::Type type1, Primitive::Type type2) { - return Primitive::ComponentSize(type1) <= Primitive::ComponentSize(type2) ? type1 : type2; +static Primitive::Type ImplicitConversion(Primitive::Type type) { + switch (type) { + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimByte: + case Primitive::kPrimBoolean: + return Primitive::kPrimInt; + default: + return type; + } } // @@ -232,9 +240,9 @@ void HInductionVarAnalysis::ClassifyTrivial(HLoopInformation* loop, HInstruction } else if (instruction->IsSelect()) { info = TransferPhi(loop, instruction, /*input_index*/ 0, /*adjust_input_size*/ 1); } else if (instruction->IsTypeConversion()) { - info = TransferCnv(LookupInfo(loop, instruction->InputAt(0)), - instruction->AsTypeConversion()->GetInputType(), - instruction->AsTypeConversion()->GetResultType()); + info = TransferConversion(LookupInfo(loop, instruction->InputAt(0)), + instruction->AsTypeConversion()->GetInputType(), + instruction->AsTypeConversion()->GetResultType()); } else if (instruction->IsBoundsCheck()) { info = LookupInfo(loop, instruction->InputAt(0)); // Pass-through. } @@ -267,8 +275,12 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) { return; } - // Store interesting cycle. - AssignCycle(phi->AsPhi()); + // Store interesting cycle in each loop phi. + for (size_t i = 0; i < size; i++) { + if (scc_[i]->IsLoopHeaderPhi()) { + AssignCycle(scc_[i]->AsPhi()); + } + } // Singleton is wrap-around induction if all internal links have the same meaning. if (size == 1) { @@ -326,7 +338,7 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) { } else if (instruction->IsSelect()) { update = SolvePhi(instruction, /*input_index*/ 0, /*adjust_input_size*/ 1); // acts like Phi } else if (instruction->IsTypeConversion()) { - update = SolveCnv(instruction->AsTypeConversion()); + update = SolveConversion(loop, phi, instruction->AsTypeConversion()); } if (update == nullptr) { return; @@ -416,18 +428,20 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferAddSub(Indu // wrap-around, or periodic can be combined with an invariant to yield a similar result. // Two linear or two polynomial inputs can be combined too. Other combinations fail. if (a != nullptr && b != nullptr) { - type_ = Narrowest(type_, Narrowest(a->type, b->type)); - if (a->induction_class == kInvariant && b->induction_class == kInvariant) { - return CreateInvariantOp(op, a, b); + if (IsNarrowingLinear(a) || IsNarrowingLinear(b)) { + return nullptr; // no transfer + } else if (a->induction_class == kInvariant && b->induction_class == kInvariant) { + return CreateInvariantOp(op, a, b); // direct invariant } else if ((a->induction_class == kLinear && b->induction_class == kLinear) || (a->induction_class == kPolynomial && b->induction_class == kPolynomial)) { - return CreateInduction(a->induction_class, - a->operation, - TransferAddSub(a->op_a, b->op_a, op), - TransferAddSub(a->op_b, b->op_b, op), - /*fetch*/ nullptr, - type_); + // Rule induc(a, b) + induc(a', b') -> induc(a + a', b + b'). + InductionInfo* new_a = TransferAddSub(a->op_a, b->op_a, op); + InductionInfo* new_b = TransferAddSub(a->op_b, b->op_b, op); + if (new_a != nullptr && new_b != nullptr) { + return CreateInduction(a->induction_class, a->operation, new_a, new_b, a->fetch, type_); + } } else if (a->induction_class == kInvariant) { + // Rule a + induc(a', b') -> induc(a', a + b') or induc(a + a', a + b'). InductionInfo* new_a = b->op_a; InductionInfo* new_b = TransferAddSub(a, b->op_b, op); if (b->induction_class == kWrapAround || b->induction_class == kPeriodic) { @@ -435,14 +449,19 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferAddSub(Indu } else if (op == kSub) { // Negation required. new_a = TransferNeg(new_a); } - return CreateInduction(b->induction_class, b->operation, new_a, new_b, b->fetch, type_); + if (new_a != nullptr && new_b != nullptr) { + return CreateInduction(b->induction_class, b->operation, new_a, new_b, b->fetch, type_); + } } else if (b->induction_class == kInvariant) { + // Rule induc(a, b) + b' -> induc(a, b + b') or induc(a + b', b + b'). InductionInfo* new_a = a->op_a; InductionInfo* new_b = TransferAddSub(a->op_b, b, op); if (a->induction_class == kWrapAround || a->induction_class == kPeriodic) { new_a = TransferAddSub(new_a, b, op); } - return CreateInduction(a->induction_class, a->operation, new_a, new_b, a->fetch, type_); + if (new_a != nullptr && new_b != nullptr) { + return CreateInduction(a->induction_class, a->operation, new_a, new_b, a->fetch, type_); + } } } return nullptr; @@ -452,16 +471,17 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferNeg(Inducti // Transfer over a unary negation: an invariant, linear, polynomial, geometric (mul), // wrap-around, or periodic input yields a similar but negated induction as result. if (a != nullptr) { - type_ = Narrowest(type_, a->type); - if (a->induction_class == kInvariant) { - return CreateInvariantOp(kNeg, nullptr, a); + if (IsNarrowingLinear(a)) { + return nullptr; // no transfer + } else if (a->induction_class == kInvariant) { + return CreateInvariantOp(kNeg, nullptr, a); // direct invariant } else if (a->induction_class != kGeometric || a->operation == kMul) { - return CreateInduction(a->induction_class, - a->operation, - TransferNeg(a->op_a), - TransferNeg(a->op_b), - a->fetch, - type_); + // Rule - induc(a, b) -> induc(-a, -b). + InductionInfo* new_a = TransferNeg(a->op_a); + InductionInfo* new_b = TransferNeg(a->op_b); + if (new_a != nullptr && new_b != nullptr) { + return CreateInduction(a->induction_class, a->operation, new_a, new_b, a->fetch, type_); + } } } return nullptr; @@ -473,41 +493,42 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferMul(Inducti // wrap-around, or periodic can be multiplied with an invariant to yield a similar // but multiplied result. Two non-invariant inputs cannot be multiplied, however. if (a != nullptr && b != nullptr) { - type_ = Narrowest(type_, Narrowest(a->type, b->type)); - if (a->induction_class == kInvariant && b->induction_class == kInvariant) { - return CreateInvariantOp(kMul, a, b); + if (IsNarrowingLinear(a) || IsNarrowingLinear(b)) { + return nullptr; // no transfer + } else if (a->induction_class == kInvariant && b->induction_class == kInvariant) { + return CreateInvariantOp(kMul, a, b); // direct invariant } else if (a->induction_class == kInvariant && (b->induction_class != kGeometric || b->operation == kMul)) { - return CreateInduction(b->induction_class, - b->operation, - TransferMul(a, b->op_a), - TransferMul(a, b->op_b), - b->fetch, - type_); + // Rule a * induc(a', b') -> induc(a * a', b * b'). + InductionInfo* new_a = TransferMul(a, b->op_a); + InductionInfo* new_b = TransferMul(a, b->op_b); + if (new_a != nullptr && new_b != nullptr) { + return CreateInduction(b->induction_class, b->operation, new_a, new_b, b->fetch, type_); + } } else if (b->induction_class == kInvariant && (a->induction_class != kGeometric || a->operation == kMul)) { - return CreateInduction(a->induction_class, - a->operation, - TransferMul(a->op_a, b), - TransferMul(a->op_b, b), - a->fetch, - type_); + // Rule induc(a, b) * b' -> induc(a * b', b * b'). + InductionInfo* new_a = TransferMul(a->op_a, b); + InductionInfo* new_b = TransferMul(a->op_b, b); + if (new_a != nullptr && new_b != nullptr) { + return CreateInduction(a->induction_class, a->operation, new_a, new_b, a->fetch, type_); + } } } return nullptr; } -HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferCnv(InductionInfo* a, - Primitive::Type from, - Primitive::Type to) { +HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferConversion( + InductionInfo* a, + Primitive::Type from, + Primitive::Type to) { if (a != nullptr) { - // Allow narrowing conversion on linear induction in certain cases. - if (IsNarrowingIntegralConversion(from, to)) { - if (a->induction_class == kLinear) { - if (a->type == to || (a->type == from && IsNarrowingIntegralConversion(from, to))) { - return CreateInduction(kLinear, kNop, a->op_a, a->op_b, /*fetch*/ nullptr, to); - } - } + // Allow narrowing conversion on linear induction in certain cases: + // induction is already at narrow type, or can be made narrower. + if (IsNarrowingIntegralConversion(from, to) && + a->induction_class == kLinear && + (a->type == to || IsNarrowingIntegralConversion(a->type, to))) { + return CreateInduction(kLinear, kNop, a->op_a, a->op_b, a->fetch, to); } } return nullptr; @@ -585,17 +606,15 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveAddSub(HLoopIn return CreateInvariantOp(op, a, b); } } - } else if (b->induction_class == kLinear) { + } else if (b->induction_class == kLinear && b->type == type_) { // Solve within a tight cycle that adds a term that is already classified as a linear // induction for a polynomial induction k = k + i (represented as sum over linear terms). if (x == entry_phi && entry_phi->InputCount() == 2 && instruction == entry_phi->InputAt(1)) { InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0)); - return CreateInduction(kPolynomial, - kNop, - op == kAdd ? b : TransferNeg(b), - initial, - /*fetch*/ nullptr, - type_); + InductionInfo* new_a = op == kAdd ? b : TransferNeg(b); + if (new_a != nullptr) { + return CreateInduction(kPolynomial, kNop, new_a, initial, /*fetch*/ nullptr, type_); + } } } } @@ -700,16 +719,29 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveTest(HLoopInfo return nullptr; } -HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveCnv(HTypeConversion* conversion) { +HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveConversion( + HLoopInformation* loop, + HInstruction* entry_phi, + HTypeConversion* conversion) { Primitive::Type from = conversion->GetInputType(); Primitive::Type to = conversion->GetResultType(); - // A narrowing conversion is allowed within the cycle of a linear induction, provided that the - // narrowest encountered type is recorded with the induction to account for the precision loss. - if (IsNarrowingIntegralConversion(from, to)) { - auto it = cycle_.find(conversion->GetInput()); - if (it != cycle_.end() && it->second->induction_class == kInvariant) { - type_ = Narrowest(type_, to); - return it->second; + // A narrowing conversion is allowed as *last* operation of the cycle of a linear induction + // with an initial value that fits the type, provided that the narrowest encountered type is + // recorded with the induction to account for the precision loss. The narrower induction does + // *not* transfer to any wider operations, however, since these may yield out-of-type values + if (entry_phi->InputCount() == 2 && conversion == entry_phi->InputAt(1)) { + int64_t min = Primitive::MinValueOfIntegralType(to); + int64_t max = Primitive::MaxValueOfIntegralType(to); + int64_t value = 0; + InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0)); + if (IsNarrowingIntegralConversion(from, to) && + IsAtLeast(initial, &value) && value >= min && + IsAtMost(initial, &value) && value <= max) { + auto it = cycle_.find(conversion->GetInput()); + if (it != cycle_.end() && it->second->induction_class == kInvariant) { + type_ = to; + return it->second; + } } } return nullptr; @@ -729,7 +761,7 @@ void HInductionVarAnalysis::VisitControl(HLoopInformation* loop) { HCondition* condition = if_expr->AsCondition(); InductionInfo* a = LookupInfo(loop, condition->InputAt(0)); InductionInfo* b = LookupInfo(loop, condition->InputAt(1)); - Primitive::Type type = condition->InputAt(0)->GetType(); + Primitive::Type type = ImplicitConversion(condition->InputAt(0)->GetType()); // Determine if the loop control uses a known sequence on an if-exit (X outside) or on // an if-iterate (X inside), expressed as if-iterate when passed into VisitCondition(). if (a == nullptr || b == nullptr) { @@ -901,8 +933,8 @@ bool HInductionVarAnalysis::IsFinite(InductionInfo* upper_expr, int64_t stride_value, Primitive::Type type, IfCondition cmp) { - const int64_t min = Primitive::MinValueOfIntegralType(type); - const int64_t max = Primitive::MaxValueOfIntegralType(type); + int64_t min = Primitive::MinValueOfIntegralType(type); + int64_t max = Primitive::MaxValueOfIntegralType(type); // Some rules under which it is certain at compile-time that the loop is finite. int64_t value; switch (cmp) { @@ -938,8 +970,6 @@ bool HInductionVarAnalysis::FitsNarrowerControl(InductionInfo* lower_expr, min++; } // Do both bounds fit the range? - // Note: The `value` is initialized to please valgrind - the compiler can reorder - // the return value check with the `value` check, b/27651442 . int64_t value = 0; return IsAtLeast(lower_expr, &value) && value >= min && IsAtMost(lower_expr, &value) && value <= max && @@ -1046,7 +1076,8 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv return CreateSimplifiedInvariant(kSub, b->op_b, b->op_a); } } - return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr, b->type); + return new (graph_->GetArena()) InductionInfo( + kInvariant, op, a, b, nullptr, ImplicitConversion(b->type)); } HInstruction* HInductionVarAnalysis::GetShiftConstant(HLoopInformation* loop, @@ -1108,6 +1139,16 @@ bool HInductionVarAnalysis::IsAtLeast(InductionInfo* info, int64_t* value) { return InductionVarRange(this).IsConstant(info, InductionVarRange::kAtLeast, value); } +bool HInductionVarAnalysis::IsNarrowingLinear(InductionInfo* info) { + return info != nullptr && + info->induction_class == kLinear && + (info->type == Primitive::kPrimByte || + info->type == Primitive::kPrimShort || + info->type == Primitive::kPrimChar || + (info->type == Primitive::kPrimInt && (info->op_a->type == Primitive::kPrimLong || + info->op_b->type == Primitive::kPrimLong))); +} + bool HInductionVarAnalysis::InductionEqual(InductionInfo* info1, InductionInfo* info2) { // Test structural equality only, without accounting for simplifications. diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h index 293aa70525..39b39cdf55 100644 --- a/compiler/optimizing/induction_var_analysis.h +++ b/compiler/optimizing/induction_var_analysis.h @@ -167,7 +167,7 @@ class HInductionVarAnalysis : public HOptimization { InductionInfo* TransferAddSub(InductionInfo* a, InductionInfo* b, InductionOp op); InductionInfo* TransferNeg(InductionInfo* a); InductionInfo* TransferMul(InductionInfo* a, InductionInfo* b); - InductionInfo* TransferCnv(InductionInfo* a, Primitive::Type from, Primitive::Type to); + InductionInfo* TransferConversion(InductionInfo* a, Primitive::Type from, Primitive::Type to); // Solvers. InductionInfo* SolvePhi(HInstruction* phi, size_t input_index, size_t adjust_input_size); @@ -191,7 +191,9 @@ class HInductionVarAnalysis : public HOptimization { HInstruction* entry_phi, HInstruction* instruction, int64_t oppositive_value); - InductionInfo* SolveCnv(HTypeConversion* conversion); + InductionInfo* SolveConversion(HLoopInformation* loop, + HInstruction* entry_phi, + HTypeConversion* conversion); // Trip count information. void VisitControl(HLoopInformation* loop); @@ -235,6 +237,7 @@ class HInductionVarAnalysis : public HOptimization { bool IsAtLeast(InductionInfo* info, /*out*/ int64_t* value); // Helpers. + static bool IsNarrowingLinear(InductionInfo* info); static bool InductionEqual(InductionInfo* info1, InductionInfo* info2); static std::string FetchToString(HInstruction* fetch); static std::string InductionToString(InductionInfo* info); diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc index f52a1aad5a..82ee93d5c2 100644 --- a/compiler/optimizing/induction_var_analysis_test.cc +++ b/compiler/optimizing/induction_var_analysis_test.cc @@ -174,6 +174,12 @@ class InductionVarAnalysisTest : public CommonCompilerTest { iva_->LookupInfo(loop_body_[0]->GetLoopInformation(), instruction2)); } + // Returns true for narrowing linear induction. + bool IsNarrowingLinear(HInstruction* instruction) { + return HInductionVarAnalysis::IsNarrowingLinear( + iva_->LookupInfo(loop_body_[0]->GetLoopInformation(), instruction)); + } + // Performs InductionVarAnalysis (after proper set up). void PerformInductionVarAnalysis() { graph_->BuildDominatorTree(); @@ -1066,16 +1072,20 @@ TEST_F(InductionVarAnalysisTest, ByteInductionIntLoopControl) { // } BuildLoopNest(1); HInstruction* conv = InsertInstruction( - new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], -1), 0); + new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], kNoDexPc), 0); HInstruction* store1 = InsertArrayStore(conv, 0); HInstruction* store2 = InsertArrayStore(basic_[0], 0); PerformInductionVarAnalysis(); - // Regular int induction (i) is "transferred" over conversion into byte induction (k). + // Regular int induction (i) is transferred over conversion into byte induction (k). EXPECT_STREQ("((1) * i + (0)):PrimByte", GetInductionInfo(store1->InputAt(1), 0).c_str()); EXPECT_STREQ("((1) * i + (0)):PrimInt", GetInductionInfo(store2->InputAt(1), 0).c_str()); EXPECT_STREQ("((1) * i + (1)):PrimInt", GetInductionInfo(increment_[0], 0).c_str()); + // Narrowing detected. + EXPECT_TRUE(IsNarrowingLinear(store1->InputAt(1))); + EXPECT_FALSE(IsNarrowingLinear(store2->InputAt(1))); + // Type matters! EXPECT_FALSE(HaveSameInduction(store1->InputAt(1), store2->InputAt(1))); @@ -1093,7 +1103,7 @@ TEST_F(InductionVarAnalysisTest, ByteInductionDerivedIntLoopControl) { // } BuildLoopNest(1); HInstruction* conv = InsertInstruction( - new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], -1), 0); + new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], kNoDexPc), 0); HInstruction* store1 = InsertArrayStore(conv, 0); HInstruction* add = InsertInstruction( new (&allocator_) HAdd(Primitive::kPrimInt, conv, constant1_), 0); @@ -1101,11 +1111,86 @@ TEST_F(InductionVarAnalysisTest, ByteInductionDerivedIntLoopControl) { PerformInductionVarAnalysis(); - // Byte induction (k) is "transferred" over conversion into addition (k + 1). - // This means only values within byte range can be trusted (even though - // addition can jump out of the range of course). + // Byte induction (k) is detected, but it does not transfer over the addition, + // since this may yield out-of-type values. EXPECT_STREQ("((1) * i + (0)):PrimByte", GetInductionInfo(store1->InputAt(1), 0).c_str()); - EXPECT_STREQ("((1) * i + (1)):PrimByte", GetInductionInfo(store2->InputAt(1), 0).c_str()); + EXPECT_STREQ("", GetInductionInfo(store2->InputAt(1), 0).c_str()); + + // Narrowing detected. + EXPECT_TRUE(IsNarrowingLinear(store1->InputAt(1))); + EXPECT_FALSE(IsNarrowingLinear(store2->InputAt(1))); // works for null +} + +TEST_F(InductionVarAnalysisTest, ByteInduction) { + // Setup: + // k = -128; + // for (int i = 0; i < 100; i++) { + // k = k + 1; + // k = (byte) k; + // } + BuildLoopNest(1); + HPhi* k_header = InsertLoopPhi(0, 0); + k_header->AddInput(graph_->GetIntConstant(-128)); + + HInstruction* add = InsertInstruction( + new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant1_), 0); + HInstruction* conv = InsertInstruction( + new (&allocator_) HTypeConversion(Primitive::kPrimByte, add, kNoDexPc), 0); + k_header->AddInput(conv); + PerformInductionVarAnalysis(); + + // Byte induction (k) is detected, but it does not transfer over the addition, + // since this may yield out-of-type values. + EXPECT_STREQ("((1) * i + (-128)):PrimByte", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("", GetInductionInfo(add, 0).c_str()); + + // Narrowing detected. + EXPECT_TRUE(IsNarrowingLinear(k_header)); + EXPECT_FALSE(IsNarrowingLinear(add)); // works for null +} + +TEST_F(InductionVarAnalysisTest, NoByteInduction1) { + // Setup: + // k = -129; / does not fit! + // for (int i = 0; i < 100; i++) { + // k = k + 1; + // k = (byte) k; + // } + BuildLoopNest(1); + HPhi* k_header = InsertLoopPhi(0, 0); + k_header->AddInput(graph_->GetIntConstant(-129)); + + HInstruction* add = InsertInstruction( + new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant1_), 0); + HInstruction* conv = InsertInstruction( + new (&allocator_) HTypeConversion(Primitive::kPrimByte, add, kNoDexPc), 0); + k_header->AddInput(conv); + PerformInductionVarAnalysis(); + + EXPECT_STREQ("", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("", GetInductionInfo(add, 0).c_str()); +} + +TEST_F(InductionVarAnalysisTest, NoByteInduction2) { + // Setup: + // k = 0; + // for (int i = 0; i < 100; i++) { + // k = (byte) k; // conversion not done last! + // k = k + 1; + // } + BuildLoopNest(1); + HPhi* k_header = InsertLoopPhi(0, 0); + k_header->AddInput(constant0_); + + HInstruction* conv = InsertInstruction( + new (&allocator_) HTypeConversion(Primitive::kPrimByte, k_header, kNoDexPc), 0); + HInstruction* add = InsertInstruction( + new (&allocator_) HAdd(Primitive::kPrimInt, conv, constant1_), 0); + k_header->AddInput(add); + PerformInductionVarAnalysis(); + + EXPECT_STREQ("", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("", GetInductionInfo(add, 0).c_str()); } TEST_F(InductionVarAnalysisTest, ByteLoopControl1) { @@ -1116,12 +1201,20 @@ TEST_F(InductionVarAnalysisTest, ByteLoopControl1) { basic_[0]->ReplaceInput(graph_->GetIntConstant(-128), 0); HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious(); ifs->ReplaceInput(graph_->GetIntConstant(127), 1); - HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimByte, increment_[0], -1); + HInstruction* conv = + new (&allocator_) HTypeConversion(Primitive::kPrimByte, increment_[0], kNoDexPc); loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext()); basic_[0]->ReplaceInput(conv, 1); PerformInductionVarAnalysis(); - EXPECT_STREQ("((1) * i + ((-128) + (1))):PrimByte", GetInductionInfo(increment_[0], 0).c_str()); + // Recorded at the phi, but not transferred to increment. + EXPECT_STREQ("((1) * i + (-128)):PrimByte", GetInductionInfo(basic_[0], 0).c_str()); + EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str()); + + // Narrowing detected. + EXPECT_TRUE(IsNarrowingLinear(basic_[0])); + EXPECT_FALSE(IsNarrowingLinear(increment_[0])); // works for null + // Trip-count. EXPECT_STREQ("(((127) - (-128)) (TC-loop) ((-128) < (127)))", GetTripCount(0).c_str()); } @@ -1134,12 +1227,20 @@ TEST_F(InductionVarAnalysisTest, ByteLoopControl2) { basic_[0]->ReplaceInput(graph_->GetIntConstant(-128), 0); HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious(); ifs->ReplaceInput(graph_->GetIntConstant(128), 1); - HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimByte, increment_[0], -1); + HInstruction* conv = + new (&allocator_) HTypeConversion(Primitive::kPrimByte, increment_[0], kNoDexPc); loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext()); basic_[0]->ReplaceInput(conv, 1); PerformInductionVarAnalysis(); - EXPECT_STREQ("((1) * i + ((-128) + (1))):PrimByte", GetInductionInfo(increment_[0], 0).c_str()); + // Recorded at the phi, but not transferred to increment. + EXPECT_STREQ("((1) * i + (-128)):PrimByte", GetInductionInfo(basic_[0], 0).c_str()); + EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str()); + + // Narrowing detected. + EXPECT_TRUE(IsNarrowingLinear(basic_[0])); + EXPECT_FALSE(IsNarrowingLinear(increment_[0])); // works for null + // Trip-count undefined. EXPECT_STREQ("", GetTripCount(0).c_str()); } @@ -1152,13 +1253,20 @@ TEST_F(InductionVarAnalysisTest, ShortLoopControl1) { basic_[0]->ReplaceInput(graph_->GetIntConstant(-32768), 0); HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious(); ifs->ReplaceInput(graph_->GetIntConstant(32767), 1); - HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimShort, increment_[0], -1); + HInstruction* conv = + new (&allocator_) HTypeConversion(Primitive::kPrimShort, increment_[0], kNoDexPc); loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext()); basic_[0]->ReplaceInput(conv, 1); PerformInductionVarAnalysis(); - EXPECT_STREQ("((1) * i + ((-32768) + (1))):PrimShort", - GetInductionInfo(increment_[0], 0).c_str()); + // Recorded at the phi, but not transferred to increment. + EXPECT_STREQ("((1) * i + (-32768)):PrimShort", GetInductionInfo(basic_[0], 0).c_str()); + EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str()); + + // Narrowing detected. + EXPECT_TRUE(IsNarrowingLinear(basic_[0])); + EXPECT_FALSE(IsNarrowingLinear(increment_[0])); // works for null + // Trip-count. EXPECT_STREQ("(((32767) - (-32768)) (TC-loop) ((-32768) < (32767)))", GetTripCount(0).c_str()); } @@ -1171,13 +1279,20 @@ TEST_F(InductionVarAnalysisTest, ShortLoopControl2) { basic_[0]->ReplaceInput(graph_->GetIntConstant(-32768), 0); HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious(); ifs->ReplaceInput(graph_->GetIntConstant(32768), 1); - HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimShort, increment_[0], -1); + HInstruction* conv = + new (&allocator_) HTypeConversion(Primitive::kPrimShort, increment_[0], kNoDexPc); loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext()); basic_[0]->ReplaceInput(conv, 1); PerformInductionVarAnalysis(); - EXPECT_STREQ("((1) * i + ((-32768) + (1))):PrimShort", - GetInductionInfo(increment_[0], 0).c_str()); + // Recorded at the phi, but not transferred to increment. + EXPECT_STREQ("((1) * i + (-32768)):PrimShort", GetInductionInfo(basic_[0], 0).c_str()); + EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str()); + + // Narrowing detected. + EXPECT_TRUE(IsNarrowingLinear(basic_[0])); + EXPECT_FALSE(IsNarrowingLinear(increment_[0])); // works for null + // Trip-count undefined. EXPECT_STREQ("", GetTripCount(0).c_str()); } @@ -1189,12 +1304,20 @@ TEST_F(InductionVarAnalysisTest, CharLoopControl1) { BuildLoopNest(1); HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious(); ifs->ReplaceInput(graph_->GetIntConstant(65535), 1); - HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimChar, increment_[0], -1); + HInstruction* conv = + new (&allocator_) HTypeConversion(Primitive::kPrimChar, increment_[0], kNoDexPc); loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext()); basic_[0]->ReplaceInput(conv, 1); PerformInductionVarAnalysis(); - EXPECT_STREQ("((1) * i + (1)):PrimChar", GetInductionInfo(increment_[0], 0).c_str()); + // Recorded at the phi, but not transferred to increment. + EXPECT_STREQ("((1) * i + (0)):PrimChar", GetInductionInfo(basic_[0], 0).c_str()); + EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str()); + + // Narrowing detected. + EXPECT_TRUE(IsNarrowingLinear(basic_[0])); + EXPECT_FALSE(IsNarrowingLinear(increment_[0])); // works for null + // Trip-count. EXPECT_STREQ("((65535) (TC-loop) ((0) < (65535)))", GetTripCount(0).c_str()); } @@ -1206,12 +1329,20 @@ TEST_F(InductionVarAnalysisTest, CharLoopControl2) { BuildLoopNest(1); HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious(); ifs->ReplaceInput(graph_->GetIntConstant(65536), 1); - HInstruction* conv = new(&allocator_) HTypeConversion(Primitive::kPrimChar, increment_[0], -1); + HInstruction* conv = + new (&allocator_) HTypeConversion(Primitive::kPrimChar, increment_[0], kNoDexPc); loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext()); basic_[0]->ReplaceInput(conv, 1); PerformInductionVarAnalysis(); - EXPECT_STREQ("((1) * i + (1)):PrimChar", GetInductionInfo(increment_[0], 0).c_str()); + // Recorded at the phi, but not transferred to increment. + EXPECT_STREQ("((1) * i + (0)):PrimChar", GetInductionInfo(basic_[0], 0).c_str()); + EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str()); + + // Narrowing detected. + EXPECT_TRUE(IsNarrowingLinear(basic_[0])); + EXPECT_FALSE(IsNarrowingLinear(increment_[0])); // works for null + // Trip-count undefined. EXPECT_STREQ("", GetTripCount(0).c_str()); } diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 7bcc3845e7..d5c4c2fa69 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -169,8 +169,8 @@ static InductionVarRange::Value CorrectForType(InductionVarRange::Value v, Primi case Primitive::kPrimByte: { // Constants within range only. // TODO: maybe some room for improvement, like allowing widening conversions - const int32_t min = Primitive::MinValueOfIntegralType(type); - const int32_t max = Primitive::MaxValueOfIntegralType(type); + int32_t min = Primitive::MinValueOfIntegralType(type); + int32_t max = Primitive::MaxValueOfIntegralType(type); return (IsConstantValue(v) && min <= v.b_constant && v.b_constant <= max) ? v : InductionVarRange::Value(); @@ -551,7 +551,7 @@ InductionVarRange::Value InductionVarRange::GetPolynomial(HInductionVarAnalysis: int64_t b = 0; if (IsConstant(info->op_a->op_a, kExact, &a) && CanLongValueFitIntoInt(a) && a >= 0 && IsConstant(info->op_a->op_b, kExact, &b) && CanLongValueFitIntoInt(b) && b >= 0) { - // Evaluate bounds on sum_i=0^m-1(a * i + b) + c with a,b >= 0 for known + // Evaluate bounds on sum_i=0^m-1(a * i + b) + c with a,b >= 0 for // maximum index value m as a * (m * (m-1)) / 2 + b * m + c. Value c = GetVal(info->op_b, trip, in_body, is_min); if (is_min) { @@ -629,6 +629,7 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, } } else if (instruction->IsTypeConversion()) { // Since analysis is 32-bit (or narrower), chase beyond widening along the path. + // For example, this discovers the length in: for (long i = 0; i < a.length; i++); if (instruction->AsTypeConversion()->GetInputType() == Primitive::kPrimInt && instruction->AsTypeConversion()->GetResultType() == Primitive::kPrimLong) { return GetFetch(instruction->InputAt(0), trip, in_body, is_min); @@ -843,7 +844,7 @@ InductionVarRange::Value InductionVarRange::DivRangeAndConstant( InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) const { if (v1.is_known && v2.is_known && IsSafeAdd(v1.b_constant, v2.b_constant)) { - const int32_t b = v1.b_constant + v2.b_constant; + int32_t b = v1.b_constant + v2.b_constant; if (v1.a_constant == 0) { return Value(v2.instruction, v2.a_constant, b); } else if (v2.a_constant == 0) { @@ -857,7 +858,7 @@ InductionVarRange::Value InductionVarRange::AddValue(Value v1, Value v2) const { InductionVarRange::Value InductionVarRange::SubValue(Value v1, Value v2) const { if (v1.is_known && v2.is_known && IsSafeSub(v1.b_constant, v2.b_constant)) { - const int32_t b = v1.b_constant - v2.b_constant; + int32_t b = v1.b_constant - v2.b_constant; if (v1.a_constant == 0 && IsSafeSub(0, v2.a_constant)) { return Value(v2.instruction, -v2.a_constant, b); } else if (v2.a_constant == 0) { @@ -988,13 +989,16 @@ bool InductionVarRange::GenerateLastValuePolynomial(HInductionVarAnalysis::Induc IsConstant(trip->op_a, kExact, &m) && m >= 1) { // Evaluate bounds on sum_i=0^m-1(a * i + b) + c for known // maximum index value m as a * (m * (m-1)) / 2 + b * m + c. - // TODO: generalize - HInstruction* c_instr = nullptr; - if (GenerateCode(info->op_b, nullptr, graph, block, graph ? &c_instr : nullptr, false, false)) { + HInstruction* c = nullptr; + if (GenerateCode(info->op_b, nullptr, graph, block, graph ? &c : nullptr, false, false)) { if (graph != nullptr) { + Primitive::Type type = info->type; int64_t sum = a * ((m * (m - 1)) / 2) + b * m; - *result = Insert(block, new (graph->GetArena()) HAdd(info->type, - graph->GetIntConstant(sum), c_instr)); + if (type != Primitive::kPrimLong) { + sum = static_cast<int32_t>(sum); // okay to truncate + } + *result = + Insert(block, new (graph->GetArena()) HAdd(type, graph->GetConstant(type, sum), c)); } return true; } @@ -1011,35 +1015,33 @@ bool InductionVarRange::GenerateLastValueGeometric(HInductionVarAnalysis::Induct DCHECK_EQ(info->induction_class, HInductionVarAnalysis::kGeometric); // Detect known base and trip count (always taken). int64_t f = 0; - int64_t t = 0; - if (IsIntAndGet(info->fetch, &f) && f >= 1 && IsConstant(trip->op_a, kExact, &t) && t >= 1) { + int64_t m = 0; + if (IsIntAndGet(info->fetch, &f) && f >= 1 && IsConstant(trip->op_a, kExact, &m) && m >= 1) { HInstruction* opa = nullptr; HInstruction* opb = nullptr; if (GenerateCode(info->op_a, nullptr, graph, block, &opa, false, false) && GenerateCode(info->op_b, nullptr, graph, block, &opb, false, false)) { - // Compute f ^ t. - int64_t fpowt = IntPow(f, t); + // Compute f ^ m for known maximum index value m. + int64_t fpow = IntPow(f, m); if (graph != nullptr) { - DCHECK(info->type == Primitive::kPrimInt); // due to codegen, generalize? - if (fpowt == 0) { + DCHECK(info->operation == HInductionVarAnalysis::kMul || + info->operation == HInductionVarAnalysis::kDiv); + Primitive::Type type = info->type; + if (fpow == 0) { // Special case: repeated mul/div always yields zero. - *result = graph->GetIntConstant(0); - } else if (info->operation == HInductionVarAnalysis::kMul) { - // Last value multiplication: a * f ^ t + b. - HInstruction* mul = Insert(block, - new (graph->GetArena()) HMul(info->type, - opa, - graph->GetIntConstant(fpowt))); - *result = Insert(block, new (graph->GetArena()) HAdd(info->type, mul, opb)); + *result = graph->GetConstant(type, 0); } else { - // Last value multiplication: a * f ^ -t + b. - DCHECK_EQ(info->operation, HInductionVarAnalysis::kDiv); - HInstruction* div = Insert(block, - new (graph->GetArena()) HDiv(info->type, - opa, - graph->GetIntConstant(fpowt), - kNoDexPc)); - *result = Insert(block, new (graph->GetArena()) HAdd(info->type, div, opb)); + // Last value: a * f ^ m + b or a * f ^ -m + b. + if (type != Primitive::kPrimLong) { + fpow = static_cast<int32_t>(fpow); // okay to truncate + } + HInstruction* e = nullptr; + if (info->operation == HInductionVarAnalysis::kMul) { + e = new (graph->GetArena()) HMul(type, opa, graph->GetConstant(type, fpow)); + } else { + e = new (graph->GetArena()) HDiv(type, opa, graph->GetConstant(type, fpow), kNoDexPc); + } + *result = Insert(block, new (graph->GetArena()) HAdd(type, Insert(block, e), opb)); } } return true; @@ -1060,12 +1062,11 @@ bool InductionVarRange::GenerateLastValueWrapAround(HInductionVarAnalysis::Induc for (; info->induction_class == HInductionVarAnalysis::kWrapAround; info = info->op_b, ++depth) {} // Handle wrap(x, wrap(.., y)) if trip count reaches an invariant at end. - // TODO: generalize - int64_t t = 0; + // TODO: generalize, but be careful to adjust the terminal. + int64_t m = 0; if (info->induction_class == HInductionVarAnalysis::kInvariant && - IsConstant(trip->op_a, kExact, &t) && t >= depth && - GenerateCode(info, nullptr, graph, block, result, false, false)) { - return true; + IsConstant(trip->op_a, kExact, &m) && m >= depth) { + return GenerateCode(info, nullptr, graph, block, result, false, false); } return false; } @@ -1079,43 +1080,49 @@ bool InductionVarRange::GenerateLastValuePeriodic(HInductionVarAnalysis::Inducti DCHECK(info != nullptr); DCHECK_EQ(info->induction_class, HInductionVarAnalysis::kPeriodic); // Count period. - int32_t period = 1; + int64_t period = 1; for (HInductionVarAnalysis::InductionInfo* p = info; p->induction_class == HInductionVarAnalysis::kPeriodic; p = p->op_b, ++period) {} - // Handle periodic(x, y) case for restricted types. - // TODO: generalize - if (period != 2 || - trip->op_a->type != Primitive::kPrimInt || - (info->type != Primitive::kPrimInt && info->type != Primitive::kPrimBoolean)) { - return false; + // Handle any periodic(x, periodic(.., y)) for known maximum index value m. + int64_t m = 0; + if (IsConstant(trip->op_a, kExact, &m) && m >= 1) { + int64_t li = m % period; + for (int64_t i = 0; i < li; info = info->op_b, i++) {} + if (info->induction_class == HInductionVarAnalysis::kPeriodic) { + info = info->op_a; + } + return GenerateCode(info, nullptr, graph, block, result, false, false); } - HInstruction* x_instr = nullptr; - HInstruction* y_instr = nullptr; - HInstruction* trip_expr = nullptr; - if (GenerateCode(info->op_a, nullptr, graph, block, graph ? &x_instr : nullptr, false, false) && - GenerateCode(info->op_b, nullptr, graph, block, graph ? &y_instr : nullptr, false, false) && - GenerateCode(trip->op_a, nullptr, graph, block, graph ? &trip_expr : nullptr, false, false)) { - // During actual code generation (graph != nullptr), - // generate is_even ? x : y select instruction. + // Handle periodic(x, y) using even/odd-select on trip count. Enter trip count expression + // directly to obtain the maximum index value t even if taken test is needed. + HInstruction* x = nullptr; + HInstruction* y = nullptr; + HInstruction* t = nullptr; + if (period == 2 && + GenerateCode(info->op_a, nullptr, graph, block, graph ? &x : nullptr, false, false) && + GenerateCode(info->op_b, nullptr, graph, block, graph ? &y : nullptr, false, false) && + GenerateCode(trip->op_a, nullptr, graph, block, graph ? &t : nullptr, false, false)) { + // During actual code generation (graph != nullptr), generate is_even ? x : y. if (graph != nullptr) { - HInstruction* is_even = Insert(block, new (graph->GetArena()) HEqual( - Insert(block, new (graph->GetArena()) HAnd( - Primitive::kPrimInt, trip_expr, graph->GetIntConstant(1))), - graph->GetIntConstant(0), kNoDexPc)); - *result = Insert(block, new (graph->GetArena()) HSelect(is_even, x_instr, y_instr, kNoDexPc)); + Primitive::Type type = trip->type; + HInstruction* msk = + Insert(block, new (graph->GetArena()) HAnd(type, t, graph->GetConstant(type, 1))); + HInstruction* is_even = + Insert(block, new (graph->GetArena()) HEqual(msk, graph->GetConstant(type, 0), kNoDexPc)); + *result = Insert(block, new (graph->GetArena()) HSelect(is_even, x, y, kNoDexPc)); } // Guard select with taken test if needed. if (*needs_taken_test) { - HInstruction* taken_test = nullptr; - if (!GenerateCode( - trip->op_b, nullptr, graph, block, graph ? &taken_test : nullptr, false, false)) { + HInstruction* is_taken = nullptr; + if (GenerateCode(trip->op_b, nullptr, graph, block, graph ? &is_taken : nullptr, false, false)) { + if (graph != nullptr) { + *result = Insert(block, new (graph->GetArena()) HSelect(is_taken, *result, x, kNoDexPc)); + } + *needs_taken_test = false; // taken care of + } else { return false; - } else if (graph != nullptr) { - *result = Insert(block, - new (graph->GetArena()) HSelect(taken_test, *result, x_instr, kNoDexPc)); } - *needs_taken_test = false; // taken care of } return true; } @@ -1134,13 +1141,8 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, if (graph != nullptr && result == nullptr) { return true; } - // Verify type safety. - // TODO: generalize - Primitive::Type type = Primitive::kPrimInt; - if (info->type != Primitive::kPrimInt && info->type != Primitive::kPrimBoolean) { - return false; - } // Handle current operation. + Primitive::Type type = info->type; HInstruction* opa = nullptr; HInstruction* opb = nullptr; switch (info->induction_class) { @@ -1214,15 +1216,15 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, case HInductionVarAnalysis::kTripCountInBodyUnsafe: if (is_min) { if (graph != nullptr) { - *result = graph->GetIntConstant(0); + *result = graph->GetConstant(type, 0); } return true; } else if (in_body) { if (GenerateCode(info->op_a, trip, graph, block, &opb, in_body, is_min)) { if (graph != nullptr) { - *result = Insert(block, - new (graph->GetArena()) - HSub(type, opb, graph->GetIntConstant(1))); + *result = + Insert(block, + new (graph->GetArena()) HSub(type, opb, graph->GetConstant(type, 1))); } return true; } @@ -1236,26 +1238,31 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, // Linear induction a * i + b, for normalized 0 <= i < TC. For ranges, this should // be restricted to a unit stride to avoid arithmetic wrap-around situations that // are harder to guard against. For a last value, requesting min/max based on any - // stride yields right value. - int64_t stride_value = 0; - if (IsConstant(info->op_a, kExact, &stride_value)) { - const bool is_min_a = stride_value >= 0 ? is_min : !is_min; - if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) && - GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) { - if (graph != nullptr) { - HInstruction* oper; - if (stride_value == 1) { - oper = new (graph->GetArena()) HAdd(type, opa, opb); - } else if (stride_value == -1) { - oper = new (graph->GetArena()) HSub(type, opb, opa); - } else { - HInstruction* mul = new (graph->GetArena()) HMul( - type, graph->GetIntConstant(stride_value), opa); - oper = new (graph->GetArena()) HAdd(type, Insert(block, mul), opb); + // known stride yields right value. Always avoid any narrowing linear induction or + // any type mismatch between the linear induction and the trip count expression. + // TODO: careful runtime type conversions could generalize this latter restriction. + if (!HInductionVarAnalysis::IsNarrowingLinear(info) && trip->type == type) { + int64_t stride_value = 0; + if (IsConstant(info->op_a, kExact, &stride_value) && + CanLongValueFitIntoInt(stride_value)) { + const bool is_min_a = stride_value >= 0 ? is_min : !is_min; + if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) && + GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) { + if (graph != nullptr) { + HInstruction* oper; + if (stride_value == 1) { + oper = new (graph->GetArena()) HAdd(type, opa, opb); + } else if (stride_value == -1) { + oper = new (graph->GetArena()) HSub(type, opb, opa); + } else { + HInstruction* mul = + new (graph->GetArena()) HMul(type, graph->GetConstant(type, stride_value), opa); + oper = new (graph->GetArena()) HAdd(type, Insert(block, mul), opb); + } + *result = Insert(block, oper); } - *result = Insert(block, oper); + return true; } - return true; } } break; @@ -1270,7 +1277,7 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, Value extreme = GetVal(info, trip, in_body, is_min); if (IsConstantValue(extreme)) { if (graph != nullptr) { - *result = graph->GetIntConstant(extreme.b_constant); + *result = graph->GetConstant(type, extreme.b_constant); } return true; } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index fe4662abb1..e5d05e9e6d 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -344,6 +344,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { if (actual_method != nullptr) { bool result = TryInlineAndReplace(invoke_instruction, actual_method, + ReferenceTypeInfo::CreateInvalid(), /* do_rtp */ true, cha_devirtualize); if (result && !invoke_instruction->IsInvokeStaticOrDirect()) { @@ -428,13 +429,13 @@ HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker, DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); HInstanceFieldGet* result = new (graph_->GetArena()) HInstanceFieldGet( receiver, + field, Primitive::kPrimNot, field->GetOffset(), field->IsVolatile(), field->GetDexFieldIndex(), field->GetDeclaringClass()->GetDexClassDefIndex(), *field->GetDexFile(), - handles_->NewHandle(field->GetDexCache()), dex_pc); // The class of a field is effectively final, and does not have any memory dependencies. result->SetSideEffects(SideEffects::None()); @@ -471,22 +472,21 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, HInstruction* receiver = invoke_instruction->InputAt(0); HInstruction* cursor = invoke_instruction->GetPrevious(); HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); - + Handle<mirror::Class> handle = handles_->NewHandle(GetMonomorphicType(classes)); if (!TryInlineAndReplace(invoke_instruction, resolved_method, + ReferenceTypeInfo::Create(handle, /* is_exact */ true), /* do_rtp */ false, /* cha_devirtualize */ false)) { return false; } // We successfully inlined, now add a guard. - bool is_referrer = - (GetMonomorphicType(classes) == outermost_graph_->GetArtMethod()->GetDeclaringClass()); AddTypeGuard(receiver, cursor, bb_cursor, class_index, - is_referrer, + GetMonomorphicType(classes), invoke_instruction, /* with_deoptimization */ true); @@ -506,52 +506,62 @@ void HInliner::AddCHAGuard(HInstruction* invoke_instruction, uint32_t dex_pc, HInstruction* cursor, HBasicBlock* bb_cursor) { - HInstruction* deopt_flag = new (graph_->GetArena()) HShouldDeoptimizeFlag(dex_pc); - HInstruction* should_deopt = new (graph_->GetArena()) HNotEqual( + HShouldDeoptimizeFlag* deopt_flag = new (graph_->GetArena()) + HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc); + HInstruction* compare = new (graph_->GetArena()) HNotEqual( deopt_flag, graph_->GetIntConstant(0, dex_pc)); - HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(should_deopt, dex_pc); + HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(compare, dex_pc); if (cursor != nullptr) { bb_cursor->InsertInstructionAfter(deopt_flag, cursor); } else { bb_cursor->InsertInstructionBefore(deopt_flag, bb_cursor->GetFirstInstruction()); } - bb_cursor->InsertInstructionAfter(should_deopt, deopt_flag); - bb_cursor->InsertInstructionAfter(deopt, should_deopt); + bb_cursor->InsertInstructionAfter(compare, deopt_flag); + bb_cursor->InsertInstructionAfter(deopt, compare); + + // Add receiver as input to aid CHA guard optimization later. + deopt_flag->AddInput(invoke_instruction->InputAt(0)); + DCHECK_EQ(deopt_flag->InputCount(), 1u); deopt->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); + outermost_graph_->IncrementNumberOfCHAGuards(); } HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, HInstruction* cursor, HBasicBlock* bb_cursor, dex::TypeIndex class_index, - bool is_referrer, + mirror::Class* klass, HInstruction* invoke_instruction, bool with_deoptimization) { + ScopedAssertNoThreadSuspension sants("Adding compiler type guard"); + ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); HInstanceFieldGet* receiver_class = BuildGetReceiverClass( class_linker, receiver, invoke_instruction->GetDexPc()); + if (cursor != nullptr) { + bb_cursor->InsertInstructionAfter(receiver_class, cursor); + } else { + bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction()); + } const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + bool is_referrer = (klass == outermost_graph_->GetArtMethod()->GetDeclaringClass()); // Note that we will just compare the classes, so we don't need Java semantics access checks. - // Also, the caller of `AddTypeGuard` must have guaranteed that the class is in the dex cache. + // Note that the type index and the dex file are relative to the method this type guard is + // inlined into. HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(), class_index, caller_dex_file, is_referrer, invoke_instruction->GetDexPc(), - /* needs_access_check */ false, - /* is_in_dex_cache */ true, - /* is_in_boot_image */ false); + /* needs_access_check */ false); + bb_cursor->InsertInstructionAfter(load_class, receiver_class); + // Sharpen after adding the instruction, as the sharpening may remove inputs. + HSharpening::SharpenClass(load_class, klass, handles_, codegen_, compiler_driver_); - HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class); // TODO: Extend reference type propagation to understand the guard. - if (cursor != nullptr) { - bb_cursor->InsertInstructionAfter(receiver_class, cursor); - } else { - bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction()); - } - bb_cursor->InsertInstructionAfter(load_class, receiver_class); + HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class); bb_cursor->InsertInstructionAfter(compare, load_class); if (with_deoptimization) { HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( @@ -583,13 +593,13 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, break; } ArtMethod* method = nullptr; + + Handle<mirror::Class> handle = handles_->NewHandle(classes->Get(i)); if (invoke_instruction->IsInvokeInterface()) { - method = classes->Get(i)->FindVirtualMethodForInterface( - resolved_method, pointer_size); + method = handle->FindVirtualMethodForInterface(resolved_method, pointer_size); } else { DCHECK(invoke_instruction->IsInvokeVirtual()); - method = classes->Get(i)->FindVirtualMethodForVirtual( - resolved_method, pointer_size); + method = handle->FindVirtualMethodForVirtual(resolved_method, pointer_size); } HInstruction* receiver = invoke_instruction->InputAt(0); @@ -597,14 +607,19 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); dex::TypeIndex class_index = FindClassIndexIn( - classes->Get(i), caller_dex_file, caller_compilation_unit_.GetDexCache()); + handle.Get(), caller_dex_file, caller_compilation_unit_.GetDexCache()); HInstruction* return_replacement = nullptr; if (!class_index.IsValid() || - !TryBuildAndInline(invoke_instruction, method, &return_replacement)) { + !TryBuildAndInline(invoke_instruction, + method, + ReferenceTypeInfo::Create(handle, /* is_exact */ true), + &return_replacement)) { all_targets_inlined = false; } else { one_target_inlined = true; - bool is_referrer = (classes->Get(i) == outermost_graph_->GetArtMethod()->GetDeclaringClass()); + + VLOG(compiler) << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method) + << " has inlined " << ArtMethod::PrettyMethod(method); // If we have inlined all targets before, and this receiver is the last seen, // we deoptimize instead of keeping the original invoke instruction. @@ -616,8 +631,13 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, // We do not support HDeoptimize in OSR methods. deoptimize = false; } - HInstruction* compare = AddTypeGuard( - receiver, cursor, bb_cursor, class_index, is_referrer, invoke_instruction, deoptimize); + HInstruction* compare = AddTypeGuard(receiver, + cursor, + bb_cursor, + class_index, + handle.Get(), + invoke_instruction, + deoptimize); if (deoptimize) { if (return_replacement != nullptr) { invoke_instruction->ReplaceWith(return_replacement); @@ -638,6 +658,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, << " of its targets could be inlined"; return false; } + MaybeRecordStat(kInlinedPolymorphicCall); // Run type propagation to get the guards typed. @@ -780,7 +801,10 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); HInstruction* return_replacement = nullptr; - if (!TryBuildAndInline(invoke_instruction, actual_method, &return_replacement)) { + if (!TryBuildAndInline(invoke_instruction, + actual_method, + ReferenceTypeInfo::CreateInvalid(), + &return_replacement)) { return false; } @@ -845,13 +869,14 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* method, + ReferenceTypeInfo receiver_type, bool do_rtp, bool cha_devirtualize) { HInstruction* return_replacement = nullptr; uint32_t dex_pc = invoke_instruction->GetDexPc(); HInstruction* cursor = invoke_instruction->GetPrevious(); HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); - if (!TryBuildAndInline(invoke_instruction, method, &return_replacement)) { + if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) { if (invoke_instruction->IsInvokeInterface()) { // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always // better than an invoke-interface because: @@ -909,6 +934,7 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* method, + ReferenceTypeInfo receiver_type, HInstruction** return_replacement) { if (method->IsProxyMethod()) { VLOG(compiler) << "Method " << method->PrettyMethod() @@ -985,7 +1011,8 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - if (!TryBuildAndInlineHelper(invoke_instruction, method, same_dex_file, return_replacement)) { + if (!TryBuildAndInlineHelper( + invoke_instruction, method, receiver_type, same_dex_file, return_replacement)) { return false; } @@ -1138,13 +1165,13 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex DCHECK(resolved_field != nullptr); HInstanceFieldGet* iget = new (graph_->GetArena()) HInstanceFieldGet( obj, + resolved_field, resolved_field->GetTypeAsPrimitiveType(), resolved_field->GetOffset(), resolved_field->IsVolatile(), field_index, resolved_field->GetDeclaringClass()->GetDexClassDefIndex(), *dex_cache->GetDexFile(), - dex_cache, // Read barrier generates a runtime call in slow path and we need a valid // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537. /* dex_pc */ 0); @@ -1167,13 +1194,13 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(Handle<mirror::DexCache> dex HInstanceFieldSet* iput = new (graph_->GetArena()) HInstanceFieldSet( obj, value, + resolved_field, resolved_field->GetTypeAsPrimitiveType(), resolved_field->GetOffset(), resolved_field->IsVolatile(), field_index, resolved_field->GetDeclaringClass()->GetDexClassDefIndex(), *dex_cache->GetDexFile(), - dex_cache, // Read barrier generates a runtime call in slow path and we need a valid // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537. /* dex_pc */ 0); @@ -1182,8 +1209,10 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(Handle<mirror::DexCache> dex bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, ArtMethod* resolved_method, + ReferenceTypeInfo receiver_type, bool same_dex_file, HInstruction** return_replacement) { + DCHECK(!(resolved_method->IsStatic() && receiver_type.IsValid())); ScopedObjectAccess soa(Thread::Current()); const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); const DexFile& callee_dex_file = *resolved_method->GetDexFile(); @@ -1274,12 +1303,13 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, } size_t parameter_index = 0; + bool run_rtp = false; for (HInstructionIterator instructions(callee_graph->GetEntryBlock()->GetInstructions()); !instructions.Done(); instructions.Advance()) { HInstruction* current = instructions.Current(); if (current->IsParameterValue()) { - HInstruction* argument = invoke_instruction->InputAt(parameter_index++); + HInstruction* argument = invoke_instruction->InputAt(parameter_index); if (argument->IsNullConstant()) { current->ReplaceWith(callee_graph->GetNullConstant()); } else if (argument->IsIntConstant()) { @@ -1293,15 +1323,21 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, current->ReplaceWith( callee_graph->GetDoubleConstant(argument->AsDoubleConstant()->GetValue())); } else if (argument->GetType() == Primitive::kPrimNot) { - current->SetReferenceTypeInfo(argument->GetReferenceTypeInfo()); + if (!resolved_method->IsStatic() && parameter_index == 0 && receiver_type.IsValid()) { + run_rtp = true; + current->SetReferenceTypeInfo(receiver_type); + } else { + current->SetReferenceTypeInfo(argument->GetReferenceTypeInfo()); + } current->AsParameterValue()->SetCanBeNull(argument->CanBeNull()); } + ++parameter_index; } } // We have replaced formal arguments with actual arguments. If actual types // are more specific than the declared ones, run RTP again on the inner graph. - if (ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) { + if (run_rtp || ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) { ReferenceTypePropagation(callee_graph, dex_compilation_unit.GetDexCache(), handles_, @@ -1490,7 +1526,7 @@ static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti, ReferenceTypeInfo actual_rti = actual_obj->GetReferenceTypeInfo(); return (actual_rti.IsExact() && !declared_rti.IsExact()) || - declared_rti.IsStrictSupertypeOf(actual_rti); + declared_rti.IsStrictSupertypeOf(actual_rti); } ReferenceTypeInfo HInliner::GetClassRTI(mirror::Class* klass) { @@ -1547,6 +1583,13 @@ bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction, /* declared_can_be_null */ true, return_replacement)) { return true; + } else if (return_replacement->IsInstanceFieldGet()) { + HInstanceFieldGet* field_get = return_replacement->AsInstanceFieldGet(); + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + if (field_get->GetFieldInfo().GetField() == + class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0)) { + return true; + } } } else if (return_replacement->IsInstanceOf()) { // Inlining InstanceOf into an If may put a tighter bound on reference types. diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index ffebd97cb8..4c0b990f26 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -66,17 +66,20 @@ class HInliner : public HOptimization { // a CHA guard needs to be added for the inlining. bool TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* resolved_method, + ReferenceTypeInfo receiver_type, bool do_rtp, bool cha_devirtualize) REQUIRES_SHARED(Locks::mutator_lock_); bool TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* resolved_method, + ReferenceTypeInfo receiver_type, HInstruction** return_replacement) REQUIRES_SHARED(Locks::mutator_lock_); bool TryBuildAndInlineHelper(HInvoke* invoke_instruction, ArtMethod* resolved_method, + ReferenceTypeInfo receiver_type, bool same_dex_file, HInstruction** return_replacement); @@ -167,7 +170,7 @@ class HInliner : public HOptimization { HInstruction* cursor, HBasicBlock* bb_cursor, dex::TypeIndex class_index, - bool is_referrer, + mirror::Class* klass, HInstruction* invoke_instruction, bool with_deoptimization) REQUIRES_SHARED(Locks::mutator_lock_); diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index b97581beb3..768b1d80a1 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -816,8 +816,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, HInvokeStaticOrDirect::DispatchInfo dispatch_info = { HInvokeStaticOrDirect::MethodLoadKind::kStringInit, HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - dchecked_integral_cast<uint64_t>(string_init_entry_point), - 0U + dchecked_integral_cast<uint64_t>(string_init_entry_point) }; MethodReference target_method(dex_file_, method_idx); HInvoke* invoke = new (arena_) HInvokeStaticOrDirect( @@ -862,8 +861,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, HInvokeStaticOrDirect::DispatchInfo dispatch_info = { HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u, - 0U + 0u }; MethodReference target_method(resolved_method->GetDexFile(), resolved_method->GetDexMethodIndex()); @@ -937,9 +935,7 @@ bool HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t d outer_dex_file, IsOutermostCompilingClass(type_index), dex_pc, - needs_access_check, - /* is_in_dex_cache */ false, - /* is_in_boot_image */ false); + needs_access_check); AppendInstruction(load_class); HInstruction* cls = load_class; @@ -1029,9 +1025,7 @@ HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke( outer_dex_file, is_outer_class, dex_pc, - /*needs_access_check*/ false, - /* is_in_dex_cache */ false, - /* is_in_boot_image */ false); + /*needs_access_check*/ false); AppendInstruction(load_class); clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); AppendInstruction(clinit_check); @@ -1241,13 +1235,13 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex(); field_set = new (arena_) HInstanceFieldSet(object, value, + resolved_field, field_type, resolved_field->GetOffset(), resolved_field->IsVolatile(), field_index, class_def_index, *dex_file_, - dex_compilation_unit_->GetDexCache(), dex_pc); } AppendInstruction(field_set); @@ -1262,13 +1256,13 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio } else { uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex(); field_get = new (arena_) HInstanceFieldGet(object, + resolved_field, field_type, resolved_field->GetOffset(), resolved_field->IsVolatile(), field_index, class_def_index, *dex_file_, - dex_compilation_unit_->GetDexCache(), dex_pc); } AppendInstruction(field_get); @@ -1317,9 +1311,9 @@ bool HInstructionBuilder::IsOutermostCompilingClass(dex::TypeIndex type_index) c } void HInstructionBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction, - uint32_t dex_pc, - bool is_put, - Primitive::Type field_type) { + uint32_t dex_pc, + bool is_put, + Primitive::Type field_type) { uint32_t source_or_dest_reg = instruction.VRegA_21c(); uint16_t field_index = instruction.VRegB_21c(); @@ -1388,9 +1382,7 @@ bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, outer_dex_file, is_outer_class, dex_pc, - /*needs_access_check*/ false, - /* is_in_dex_cache */ false, - /* is_in_boot_image */ false); + /*needs_access_check*/ false); AppendInstruction(constant); HInstruction* cls = constant; @@ -1408,23 +1400,23 @@ bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, DCHECK_EQ(HPhi::ToPhiType(value->GetType()), HPhi::ToPhiType(field_type)); AppendInstruction(new (arena_) HStaticFieldSet(cls, value, + resolved_field, field_type, resolved_field->GetOffset(), resolved_field->IsVolatile(), field_index, class_def_index, *dex_file_, - dex_cache_, dex_pc)); } else { AppendInstruction(new (arena_) HStaticFieldGet(cls, + resolved_field, field_type, resolved_field->GetOffset(), resolved_field->IsVolatile(), field_index, class_def_index, *dex_file_, - dex_cache_, dex_pc)); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); } @@ -1664,9 +1656,7 @@ void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction, dex_file, IsOutermostCompilingClass(type_index), dex_pc, - !can_access, - /* is_in_dex_cache */ false, - /* is_in_boot_image */ false); + !can_access); AppendInstruction(cls); TypeCheckKind check_kind = ComputeTypeCheckKind(resolved_class); @@ -2656,9 +2646,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, *dex_file_, IsOutermostCompilingClass(type_index), dex_pc, - !can_access, - /* is_in_dex_cache */ false, - /* is_in_boot_image */ false)); + !can_access)); UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction()); break; } diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 439e3b66db..911bfb9cc6 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -1118,7 +1118,66 @@ void InstructionSimplifierVisitor::VisitAboveOrEqual(HAboveOrEqual* condition) { VisitCondition(condition); } +// Recognize the following pattern: +// obj.getClass() ==/!= Foo.class +// And replace it with a constant value if the type of `obj` is statically known. +static bool RecognizeAndSimplifyClassCheck(HCondition* condition) { + HInstruction* input_one = condition->InputAt(0); + HInstruction* input_two = condition->InputAt(1); + HLoadClass* load_class = input_one->IsLoadClass() + ? input_one->AsLoadClass() + : input_two->AsLoadClass(); + if (load_class == nullptr) { + return false; + } + + ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + if (!class_rti.IsValid()) { + // Unresolved class. + return false; + } + + HInstanceFieldGet* field_get = (load_class == input_one) + ? input_two->AsInstanceFieldGet() + : input_one->AsInstanceFieldGet(); + if (field_get == nullptr) { + return false; + } + + HInstruction* receiver = field_get->InputAt(0); + ReferenceTypeInfo receiver_type = receiver->GetReferenceTypeInfo(); + if (!receiver_type.IsExact()) { + return false; + } + + { + ScopedObjectAccess soa(Thread::Current()); + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); + DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); + if (field_get->GetFieldInfo().GetField() != field) { + return false; + } + + // We can replace the compare. + int value = 0; + if (receiver_type.IsEqual(class_rti)) { + value = condition->IsEqual() ? 1 : 0; + } else { + value = condition->IsNotEqual() ? 1 : 0; + } + condition->ReplaceWith(condition->GetBlock()->GetGraph()->GetIntConstant(value)); + return true; + } +} + void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) { + if (condition->IsEqual() || condition->IsNotEqual()) { + if (RecognizeAndSimplifyClassCheck(condition)) { + return; + } + } + // Reverse condition if left is constant. Our code generators prefer constant // on the right hand side. if (condition->GetLeft()->IsConstant() && !condition->GetRight()->IsConstant()) { @@ -1843,11 +1902,11 @@ void InstructionSimplifierVisitor::SimplifyStringCharAt(HInvoke* invoke) { // so create the HArrayLength, HBoundsCheck and HArrayGet. HArrayLength* length = new (arena) HArrayLength(str, dex_pc, /* is_string_length */ true); invoke->GetBlock()->InsertInstructionBefore(length, invoke); - HBoundsCheck* bounds_check = - new (arena) HBoundsCheck(index, length, dex_pc, invoke->GetDexMethodIndex()); + HBoundsCheck* bounds_check = new (arena) HBoundsCheck( + index, length, dex_pc, invoke->GetDexMethodIndex()); invoke->GetBlock()->InsertInstructionBefore(bounds_check, invoke); - HArrayGet* array_get = - new (arena) HArrayGet(str, index, Primitive::kPrimChar, dex_pc, /* is_string_char_at */ true); + HArrayGet* array_get = new (arena) HArrayGet( + str, bounds_check, Primitive::kPrimChar, dex_pc, /* is_string_char_at */ true); invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, array_get); bounds_check->CopyEnvironmentFrom(invoke->GetEnvironment()); GetGraph()->SetHasBoundsChecks(true); diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 641a5c92ea..85e84d8d2c 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -187,7 +187,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { assembler->MaybePoisonHeapReference(tmp); __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); __ Cmp(src_curr_addr, src_stop_addr); - __ B(ne, &loop); + __ B(ne, &loop, /* far_target */ false); __ B(GetExitLabel()); } @@ -851,7 +851,7 @@ static void GenUnsafePut(LocationSummary* locations, __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg)); __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg)); __ Cmp(temp_lo, 0); - __ B(ne, &loop_head); + __ B(ne, &loop_head, /* far_target */ false); } else { __ Strd(value_lo, value_hi, MemOperand(base, offset)); } @@ -1062,7 +1062,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* __ cmp(eq, tmp, 1); } - __ B(eq, &loop_head); + __ B(eq, &loop_head, /* far_target */ false); __ Dmb(vixl32::ISH); @@ -1238,23 +1238,23 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { __ Ldr(temp_reg, MemOperand(str, temp1)); __ Ldr(temp2, MemOperand(arg, temp1)); __ Cmp(temp_reg, temp2); - __ B(ne, &find_char_diff); + __ B(ne, &find_char_diff, /* far_target */ false); __ Add(temp1, temp1, char_size * 2); __ Ldr(temp_reg, MemOperand(str, temp1)); __ Ldr(temp2, MemOperand(arg, temp1)); __ Cmp(temp_reg, temp2); - __ B(ne, &find_char_diff_2nd_cmp); + __ B(ne, &find_char_diff_2nd_cmp, /* far_target */ false); __ Add(temp1, temp1, char_size * 2); // With string compression, we have compared 8 bytes, otherwise 4 chars. __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4)); - __ B(hi, &loop); + __ B(hi, &loop, /* far_target */ false); __ B(&end); __ Bind(&find_char_diff_2nd_cmp); if (mirror::kUseStringCompression) { __ Subs(temp0, temp0, 4); // 4 bytes previously compared. - __ B(ls, &end); // Was the second comparison fully beyond the end? + __ B(ls, &end, /* far_target */ false); // Was the second comparison fully beyond the end? } else { // Without string compression, we can start treating temp0 as signed // and rely on the signed comparison below. @@ -1282,7 +1282,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { // the remaining string data, so just return length diff (out). // The comparison is unsigned for string compression, otherwise signed. __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4))); - __ B((mirror::kUseStringCompression ? ls : le), &end); + __ B((mirror::kUseStringCompression ? ls : le), &end, /* far_target */ false); // Extract the characters and calculate the difference. if (mirror::kUseStringCompression) { @@ -1349,9 +1349,9 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex)); __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex)); __ Cmp(temp_reg, temp3); - __ B(ne, &different_compression_diff); + __ B(ne, &different_compression_diff, /* far_target */ false); __ Subs(temp0, temp0, 2); - __ B(hi, &different_compression_loop); + __ B(hi, &different_compression_loop, /* far_target */ false); __ B(&end); // Calculate the difference. @@ -1427,7 +1427,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { // Reference equality check, return true if same reference. __ Cmp(str, arg); - __ B(eq, &return_true); + __ B(eq, &return_true, /* far_target */ false); if (!optimizations.GetArgumentIsString()) { // Instanceof check for the argument by comparing class fields. @@ -1437,7 +1437,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { __ Ldr(temp, MemOperand(str, class_offset)); __ Ldr(temp1, MemOperand(arg, class_offset)); __ Cmp(temp, temp1); - __ B(ne, &return_false); + __ B(ne, &return_false, /* far_target */ false); } // Load `count` fields of this and argument strings. @@ -1446,7 +1446,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { // Check if `count` fields are equal, return false if they're not. // Also compares the compression style, if differs return false. __ Cmp(temp, temp1); - __ B(ne, &return_false); + __ B(ne, &return_false, /* far_target */ false); // Return true if both strings are empty. Even with string compression `count == 0` means empty. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); @@ -1477,10 +1477,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { __ Ldr(temp2, MemOperand(arg, temp1)); __ Add(temp1, temp1, Operand::From(sizeof(uint32_t))); __ Cmp(out, temp2); - __ B(ne, &return_false); + __ B(ne, &return_false, /* far_target */ false); // With string compression, we have compared 4 bytes, otherwise 2 chars. __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2); - __ B(hi, &loop); + __ B(hi, &loop, /* far_target */ false); // Return true and exit the function. // If loop does not result in returning false, we return true. @@ -1800,7 +1800,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } else { if (!optimizations.GetDestinationIsSource()) { __ Cmp(src, dest); - __ B(ne, &conditions_on_positions_validated); + __ B(ne, &conditions_on_positions_validated, /* far_target */ false); } __ Cmp(RegisterFrom(dest_pos), src_pos_constant); __ B(gt, intrinsic_slow_path->GetEntryLabel()); @@ -1808,7 +1808,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } else { if (!optimizations.GetDestinationIsSource()) { __ Cmp(src, dest); - __ B(ne, &conditions_on_positions_validated); + __ B(ne, &conditions_on_positions_validated, /* far_target */ false); } if (dest_pos.IsConstant()) { int32_t dest_pos_constant = Int32ConstantFrom(dest_pos); @@ -1916,7 +1916,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { if (optimizations.GetDestinationIsTypedObjectArray()) { vixl32::Label do_copy; - __ B(eq, &do_copy); + __ B(eq, &do_copy, /* far_target */ false); // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); @@ -1976,7 +1976,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { if (optimizations.GetDestinationIsTypedObjectArray()) { vixl32::Label do_copy; - __ B(eq, &do_copy); + __ B(eq, &do_copy, /* far_target */ false); if (!did_unpoison) { assembler->MaybeUnpoisonHeapReference(temp1); } @@ -2069,7 +2069,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // Don't enter copy loop if `length == 0`. __ Cmp(temp1, temp3); - __ B(eq, &done); + __ B(eq, &done, /* far_target */ false); // /* int32_t */ monitor = src->monitor_ __ Ldr(temp2, MemOperand(src, monitor_offset)); @@ -2122,7 +2122,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } __ Cmp(temp1, temp3); - __ B(ne, &loop); + __ B(ne, &loop, /* far_target */ false); __ Bind(read_barrier_slow_path->GetExitLabel()); __ Bind(&done); @@ -2142,7 +2142,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // poison/unpoison. vixl32::Label loop, done; __ Cmp(temp1, temp3); - __ B(eq, &done); + __ B(eq, &done, /* far_target */ false); __ Bind(&loop); { @@ -2154,7 +2154,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } __ Cmp(temp1, temp3); - __ B(ne, &loop); + __ B(ne, &loop, /* far_target */ false); __ Bind(&done); } @@ -2560,7 +2560,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Subs(num_chr, srcEnd, srcBegin); // Early out for valid zero-length retrievals. - __ B(eq, &done); + __ B(eq, &done, /* far_target */ false); // src range to copy. __ Add(src_ptr, srcObj, value_offset); @@ -2576,7 +2576,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Ldr(temp, MemOperand(srcObj, count_offset)); __ Tst(temp, 1); temps.Release(temp); - __ B(eq, &compressed_string_preloop); + __ B(eq, &compressed_string_preloop, /* far_target */ false); } __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1)); @@ -2586,7 +2586,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) temp = temps.Acquire(); // Save repairing the value of num_chr on the < 4 character path. __ Subs(temp, num_chr, 4); - __ B(lt, &remainder); + __ B(lt, &remainder, /* far_target */ false); // Keep the result of the earlier subs, we are going to fetch at least 4 characters. __ Mov(num_chr, temp); @@ -2601,10 +2601,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex)); __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex)); temps.Release(temp); - __ B(ge, &loop); + __ B(ge, &loop, /* far_target */ false); __ Adds(num_chr, num_chr, 4); - __ B(eq, &done); + __ B(eq, &done, /* far_target */ false); // Main loop for < 4 character case and remainder handling. Loads and stores one // 16-bit Java character at a time. @@ -2614,7 +2614,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Subs(num_chr, num_chr, 1); __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); temps.Release(temp); - __ B(gt, &remainder); + __ B(gt, &remainder, /* far_target */ false); if (mirror::kUseStringCompression) { __ B(&done); @@ -2630,7 +2630,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); temps.Release(temp); __ Subs(num_chr, num_chr, 1); - __ B(gt, &compressed_string_loop); + __ B(gt, &compressed_string_loop, /* far_target */ false); } __ Bind(&done); diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 9b5d7a02dd..cda3185a45 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -744,14 +744,54 @@ void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) { GenBitCount(invoke->GetLocations(), Primitive::kPrimLong, IsR6(), GetAssembler()); } -static void MathAbsFP(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { +static void MathAbsFP(LocationSummary* locations, + bool is64bit, + bool isR2OrNewer, + bool isR6, + MipsAssembler* assembler) { FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); FRegister out = locations->Out().AsFpuRegister<FRegister>(); - if (is64bit) { - __ AbsD(out, in); + // As a "quality of implementation", rather than pure "spec compliance", it is required that + // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN. + // + // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case, + // both regular floating point numbers and NAN values are treated alike, only the sign bit is + // affected by this instruction. + // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any + // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be + // changed when doing abs(NaN). Because of that, we clear sign bit in a different way. + if (isR6) { + if (is64bit) { + __ AbsD(out, in); + } else { + __ AbsS(out, in); + } } else { - __ AbsS(out, in); + if (is64bit) { + if (in != out) { + __ MovD(out, in); + } + __ MoveFromFpuHigh(TMP, in); + // ins instruction is not available for R1. + if (isR2OrNewer) { + __ Ins(TMP, ZERO, 31, 1); + } else { + __ Sll(TMP, TMP, 1); + __ Srl(TMP, TMP, 1); + } + __ MoveToFpuHigh(TMP, out); + } else { + __ Mfc1(TMP, in); + // ins instruction is not available for R1. + if (isR2OrNewer) { + __ Ins(TMP, ZERO, 31, 1); + } else { + __ Sll(TMP, TMP, 1); + __ Srl(TMP, TMP, 1); + } + __ Mtc1(TMP, out); + } } } @@ -761,7 +801,7 @@ void IntrinsicLocationsBuilderMIPS::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MathAbsFP(invoke->GetLocations(), /* is64bit */ true, IsR2OrNewer(), IsR6(), GetAssembler()); } // float java.lang.Math.abs(float) @@ -770,7 +810,7 @@ void IntrinsicLocationsBuilderMIPS::VisitMathAbsFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MathAbsFP(invoke->GetLocations(), /* is64bit */ false, IsR2OrNewer(), IsR6(), GetAssembler()); } static void GenAbsInteger(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { @@ -1648,7 +1688,8 @@ static void GenUnsafePut(LocationSummary* locations, } if (type == Primitive::kPrimNot) { - codegen->MarkGCCard(base, locations->InAt(3).AsRegister<Register>()); + bool value_can_be_null = true; // TODO: Worth finding out this information? + codegen->MarkGCCard(base, locations->InAt(3).AsRegister<Register>(), value_can_be_null); } } @@ -1806,7 +1847,8 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat if (type == Primitive::kPrimNot) { // Mark card for object assuming new value is stored. - codegen->MarkGCCard(base, value); + bool value_can_be_null = true; // TODO: Worth finding out this information? + codegen->MarkGCCard(base, value, value_can_be_null); } // do { @@ -2464,6 +2506,94 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { __ Bind(&done); } +// void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) +void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnMainOnly, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + locations->SetInAt(4, Location::RequiresRegister()); + + // We will call memcpy() to do the actual work. Allocate the temporary + // registers to use the correct input registers, and output register. + // memcpy() uses the normal MIPS calling convention. + InvokeRuntimeCallingConvention calling_convention; + + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + + Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<Register>())); +} + +void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { + MipsAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + // Check assumption that sizeof(Char) is 2 (used in scaling below). + const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + DCHECK_EQ(char_size, 2u); + const size_t char_shift = Primitive::ComponentSizeShift(Primitive::kPrimChar); + + Register srcObj = locations->InAt(0).AsRegister<Register>(); + Register srcBegin = locations->InAt(1).AsRegister<Register>(); + Register srcEnd = locations->InAt(2).AsRegister<Register>(); + Register dstObj = locations->InAt(3).AsRegister<Register>(); + Register dstBegin = locations->InAt(4).AsRegister<Register>(); + + Register dstPtr = locations->GetTemp(0).AsRegister<Register>(); + DCHECK_EQ(dstPtr, A0); + Register srcPtr = locations->GetTemp(1).AsRegister<Register>(); + DCHECK_EQ(srcPtr, A1); + Register numChrs = locations->GetTemp(2).AsRegister<Register>(); + DCHECK_EQ(numChrs, A2); + + Register dstReturn = locations->GetTemp(3).AsRegister<Register>(); + DCHECK_EQ(dstReturn, V0); + + MipsLabel done; + + // Location of data in char array buffer. + const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); + + // Get offset of value field within a string object. + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + + __ Beq(srcEnd, srcBegin, &done); // No characters to move. + + // Calculate number of characters to be copied. + __ Subu(numChrs, srcEnd, srcBegin); + + // Calculate destination address. + __ Addiu(dstPtr, dstObj, data_offset); + if (IsR6()) { + __ Lsa(dstPtr, dstBegin, dstPtr, char_shift); + } else { + __ Sll(AT, dstBegin, char_shift); + __ Addu(dstPtr, dstPtr, AT); + } + + // Calculate source address. + __ Addiu(srcPtr, srcObj, value_offset); + if (IsR6()) { + __ Lsa(srcPtr, srcBegin, srcPtr, char_shift); + } else { + __ Sll(AT, srcBegin, char_shift); + __ Addu(srcPtr, srcPtr, AT); + } + + // Calculate number of bytes to copy from number of characters. + __ Sll(numChrs, numChrs, char_shift); + + codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr); + + __ Bind(&done); +} + // Unimplemented intrinsics. UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil) @@ -2473,7 +2603,6 @@ UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundDouble) UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong) UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(MIPS, StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopy) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 5a998861eb..3022e975e8 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1846,6 +1846,84 @@ void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } +// void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) +void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnMainOnly, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + locations->SetInAt(4, Location::RequiresRegister()); + + // We will call memcpy() to do the actual work. Allocate the temporary + // registers to use the correct input registers, and output register. + // memcpy() uses the normal MIPS calling conventions. + InvokeRuntimeCallingConvention calling_convention; + + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + + Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimLong); + locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); +} + +void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + // Check assumption that sizeof(Char) is 2 (used in scaling below). + const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + DCHECK_EQ(char_size, 2u); + const size_t char_shift = Primitive::ComponentSizeShift(Primitive::kPrimChar); + + GpuRegister srcObj = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister srcBegin = locations->InAt(1).AsRegister<GpuRegister>(); + GpuRegister srcEnd = locations->InAt(2).AsRegister<GpuRegister>(); + GpuRegister dstObj = locations->InAt(3).AsRegister<GpuRegister>(); + GpuRegister dstBegin = locations->InAt(4).AsRegister<GpuRegister>(); + + GpuRegister dstPtr = locations->GetTemp(0).AsRegister<GpuRegister>(); + DCHECK_EQ(dstPtr, A0); + GpuRegister srcPtr = locations->GetTemp(1).AsRegister<GpuRegister>(); + DCHECK_EQ(srcPtr, A1); + GpuRegister numChrs = locations->GetTemp(2).AsRegister<GpuRegister>(); + DCHECK_EQ(numChrs, A2); + + GpuRegister dstReturn = locations->GetTemp(3).AsRegister<GpuRegister>(); + DCHECK_EQ(dstReturn, V0); + + Mips64Label done; + + // Location of data in char array buffer. + const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); + + // Get offset of value field within a string object. + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + + __ Beqc(srcEnd, srcBegin, &done); // No characters to move. + + // Calculate number of characters to be copied. + __ Dsubu(numChrs, srcEnd, srcBegin); + + // Calculate destination address. + __ Daddiu(dstPtr, dstObj, data_offset); + __ Dlsa(dstPtr, dstBegin, dstPtr, char_shift); + + // Calculate source address. + __ Daddiu(srcPtr, srcObj, value_offset); + __ Dlsa(srcPtr, srcBegin, srcPtr, char_shift); + + // Calculate number of bytes to copy from number of characters. + __ Dsll(numChrs, numChrs, char_shift); + + codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr); + + __ Bind(&done); +} + static void GenHighestOneBit(LocationSummary* locations, Primitive::Type type, Mips64Assembler* assembler) { @@ -1925,7 +2003,6 @@ void IntrinsicCodeGeneratorMIPS64::VisitLongLowestOneBit(HInvoke* invoke) { } UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(MIPS64, StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy) diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc index 8c34dc6a86..5bcfa4c98b 100644 --- a/compiler/optimizing/licm_test.cc +++ b/compiler/optimizing/licm_test.cc @@ -111,20 +111,19 @@ TEST_F(LICMTest, FieldHoisting) { BuildLoop(); // Populate the loop with instructions: set/get field with different types. - ScopedNullHandle<mirror::DexCache> dex_cache; HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_, + nullptr, Primitive::kPrimLong, MemberOffset(10), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph_->GetDexFile(), - dex_cache, 0); loop_body_->InsertInstructionBefore(get_field, loop_body_->GetLastInstruction()); HInstruction* set_field = new (&allocator_) HInstanceFieldSet( - parameter_, int_constant_, Primitive::kPrimInt, MemberOffset(20), - false, kUnknownFieldIndex, kUnknownClassDefIndex, graph_->GetDexFile(), dex_cache, 0); + parameter_, int_constant_, nullptr, Primitive::kPrimInt, MemberOffset(20), + false, kUnknownFieldIndex, kUnknownClassDefIndex, graph_->GetDexFile(), 0); loop_body_->InsertInstructionBefore(set_field, loop_body_->GetLastInstruction()); EXPECT_EQ(get_field->GetBlock(), loop_body_); @@ -140,24 +139,24 @@ TEST_F(LICMTest, NoFieldHoisting) { // Populate the loop with instructions: set/get field with same types. ScopedNullHandle<mirror::DexCache> dex_cache; HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_, + nullptr, Primitive::kPrimLong, MemberOffset(10), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph_->GetDexFile(), - dex_cache, 0); loop_body_->InsertInstructionBefore(get_field, loop_body_->GetLastInstruction()); HInstruction* set_field = new (&allocator_) HInstanceFieldSet(parameter_, get_field, + nullptr, Primitive::kPrimLong, MemberOffset(10), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph_->GetDexFile(), - dex_cache, 0); loop_body_->InsertInstructionBefore(set_field, loop_body_->GetLastInstruction()); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index cabc0782ca..d45fa11534 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1357,7 +1357,9 @@ std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind& void HInstruction::MoveBefore(HInstruction* cursor) { DCHECK(!IsPhi()); DCHECK(!IsControlFlow()); - DCHECK(CanBeMoved()); + DCHECK(CanBeMoved() || + // HShouldDeoptimizeFlag can only be moved by CHAGuardOptimization. + IsShouldDeoptimizeFlag()); DCHECK(!cursor->IsPhi()); next_->previous_ = previous_; @@ -2404,8 +2406,6 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind return os << "recursive"; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: return os << "direct"; - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: - return os << "direct_fixup"; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: return os << "dex_cache_pc_relative"; case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: @@ -2498,6 +2498,17 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) { } } +// Helper for InstructionDataEquals to fetch the mirror String out +// from a kJitTableAddress LoadString kind. +// NO_THREAD_SAFETY_ANALYSIS because even though we're accessing +// mirrors, they are stored in a variable size handle scope which is always +// visited during a pause. Also, the only caller of this helper +// only uses the mirror for pointer comparison. +static inline mirror::String* AsMirrorInternal(Handle<mirror::String> handle) + NO_THREAD_SAFETY_ANALYSIS { + return handle.Get(); +} + bool HLoadString::InstructionDataEquals(const HInstruction* other) const { const HLoadString* other_load_string = other->AsLoadString(); // TODO: To allow GVN for HLoadString from different dex files, we should compare the strings @@ -2506,16 +2517,16 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const { GetPackedFields() != other_load_string->GetPackedFields()) { return false; } - LoadKind load_kind = GetLoadKind(); - if (HasAddress(load_kind)) { - return GetAddress() == other_load_string->GetAddress(); - } else { - DCHECK(HasStringReference(load_kind)) << load_kind; - return IsSameDexFile(GetDexFile(), other_load_string->GetDexFile()); + switch (GetLoadKind()) { + case LoadKind::kBootImageAddress: + case LoadKind::kJitTableAddress: + return AsMirrorInternal(GetString()) == AsMirrorInternal(other_load_string->GetString()); + default: + return IsSameDexFile(GetDexFile(), other_load_string->GetDexFile()); } } -void HLoadString::SetLoadKindInternal(LoadKind load_kind) { +void HLoadString::SetLoadKind(LoadKind load_kind) { // Once sharpened, the load kind should not be changed again. DCHECK_EQ(GetLoadKind(), LoadKind::kDexCacheViaMethod); SetPackedField<LoadKindField>(load_kind); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 4a77bed44a..7d6f6164ec 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -171,6 +171,7 @@ class HInstructionList : public ValueObject { friend class HGraph; friend class HInstruction; friend class HInstructionIterator; + friend class HInstructionIteratorHandleChanges; friend class HBackwardInstructionIterator; DISALLOW_COPY_AND_ASSIGN(HInstructionList); @@ -330,6 +331,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { invoke_type_(invoke_type), in_ssa_form_(false), should_generate_constructor_barrier_(should_generate_constructor_barrier), + number_of_cha_guards_(0), instruction_set_(instruction_set), cached_null_constant_(nullptr), cached_int_constants_(std::less<int32_t>(), arena->Adapter(kArenaAllocConstantsMap)), @@ -551,9 +553,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { } bool HasShouldDeoptimizeFlag() const { - // TODO: if all CHA guards can be eliminated, there is no need for the flag - // even if cha_single_implementation_list_ is not empty. - return !cha_single_implementation_list_.empty(); + return number_of_cha_guards_ != 0; } bool HasTryCatch() const { return has_try_catch_; } @@ -572,6 +572,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { ReferenceTypeInfo GetInexactObjectRti() const { return inexact_object_rti_; } + uint32_t GetNumberOfCHAGuards() { return number_of_cha_guards_; } + void SetNumberOfCHAGuards(uint32_t num) { number_of_cha_guards_ = num; } + void IncrementNumberOfCHAGuards() { number_of_cha_guards_++; } + private: void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited); @@ -667,6 +671,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { const bool should_generate_constructor_barrier_; + // Number of CHA guards in the graph. Used to short-circuit the + // CHA guard optimization pass when there is no CHA guard left. + uint32_t number_of_cha_guards_; + const InstructionSet instruction_set_; // Cached constants. @@ -2305,6 +2313,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { }; std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind& rhs); +// Iterates over the instructions, while preserving the next instruction +// in case the current instruction gets removed from the list by the user +// of this iterator. class HInstructionIterator : public ValueObject { public: explicit HInstructionIterator(const HInstructionList& instructions) @@ -2326,6 +2337,28 @@ class HInstructionIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HInstructionIterator); }; +// Iterates over the instructions without saving the next instruction, +// therefore handling changes in the graph potentially made by the user +// of this iterator. +class HInstructionIteratorHandleChanges : public ValueObject { + public: + explicit HInstructionIteratorHandleChanges(const HInstructionList& instructions) + : instruction_(instructions.first_instruction_) { + } + + bool Done() const { return instruction_ == nullptr; } + HInstruction* Current() const { return instruction_; } + void Advance() { + instruction_ = instruction_->GetNext(); + } + + private: + HInstruction* instruction_; + + DISALLOW_COPY_AND_ASSIGN(HInstructionIteratorHandleChanges); +}; + + class HBackwardInstructionIterator : public ValueObject { public: explicit HBackwardInstructionIterator(const HInstructionList& instructions) @@ -2349,6 +2382,11 @@ class HBackwardInstructionIterator : public ValueObject { class HVariableInputSizeInstruction : public HInstruction { public: + using HInstruction::GetInputRecords; // Keep the const version visible. + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE { + return ArrayRef<HUserRecord<HInstruction*>>(inputs_); + } + void AddInput(HInstruction* input); void InsertInputAt(size_t index, HInstruction* input); void RemoveInputAt(size_t index); @@ -2489,11 +2527,6 @@ class HPhi FINAL : public HVariableInputSizeInstruction { bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); } - using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL { - return ArrayRef<HUserRecord<HInstruction*>>(inputs_); - } - Primitive::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); } void SetType(Primitive::Type new_type) { // Make sure that only valid type changes occur. The following are allowed: @@ -2925,14 +2958,20 @@ class HDeoptimize FINAL : public HTemplateInstruction<1> { // if it's true, starts to do deoptimization. // It has a 4-byte slot on stack. // TODO: allocate a register for this flag. -class HShouldDeoptimizeFlag FINAL : public HExpression<0> { +class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { public: - // TODO: use SideEffects to aid eliminating some CHA guards. - explicit HShouldDeoptimizeFlag(uint32_t dex_pc) - : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) { + // CHA guards are only optimized in a separate pass and it has no side effects + // with regard to other passes. + HShouldDeoptimizeFlag(ArenaAllocator* arena, uint32_t dex_pc) + : HVariableInputSizeInstruction(SideEffects::None(), dex_pc, arena, 0, kArenaAllocCHA) { } - // We don't eliminate CHA guards yet. + Primitive::Type GetType() const OVERRIDE { return Primitive::kPrimInt; } + + // We do all CHA guard elimination/motion in a single pass, after which there is no + // further guard elimination/motion since a guard might have been used for justification + // of the elimination of another guard. Therefore, we pretend this guard cannot be moved + // to avoid other optimizations trying to move it. bool CanBeMoved() const OVERRIDE { return false; } DECLARE_INSTRUCTION(ShouldDeoptimizeFlag); @@ -3816,11 +3855,6 @@ class HInvoke : public HVariableInputSizeInstruction { public: bool NeedsEnvironment() const OVERRIDE; - using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE { - return ArrayRef<HUserRecord<HInstruction*>>(inputs_); - } - void SetArgumentAt(size_t index, HInstruction* argument) { SetRawInputAt(index, argument); } @@ -3974,12 +4008,6 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // Used for app->boot calls with non-relocatable image and for JIT-compiled calls. kDirectAddress, - // Use ArtMethod* at an address that will be known at link time, embed the direct - // address in the code. If the image is relocatable, emit .patch_oat entry. - // Used for app->boot calls with relocatable image and boot->boot calls, whether - // the image relocatable or not. - kDirectAddressWithFixup, - // Load from resolved methods array in the dex cache using a PC-relative load. // Used when we need to use the dex cache, for example for invoke-static that // may cause class initialization (the entry may point to a resolution method), @@ -3998,20 +4026,6 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // Recursive call, use local PC-relative call instruction. kCallSelf, - // Use PC-relative call instruction patched at link time. - // Used for calls within an oat file, boot->boot or app->app. - kCallPCRelative, - - // Call to a known target address, embed the direct address in code. - // Used for app->boot call with non-relocatable image and for JIT-compiled calls. - kCallDirect, - - // Call to a target address that will be known at link time, embed the direct - // address in code. If the image is relocatable, emit .patch_oat entry. - // Used for app->boot calls with relocatable image and boot->boot calls, whether - // the image relocatable or not. - kCallDirectWithFixup, - // Use code pointer from the ArtMethod*. // Used when we don't know the target code. This is also the last-resort-kind used when // other kinds are unimplemented or impractical (i.e. slow) on a particular architecture. @@ -4027,7 +4041,6 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // - the method address for kDirectAddress // - the dex cache arrays offset for kDexCachePcRel. uint64_t method_load_data; - uint64_t direct_code_ptr; }; HInvokeStaticOrDirect(ArenaAllocator* arena, @@ -4137,7 +4150,6 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { return false; } } - bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; } QuickEntrypointEnum GetStringInitEntryPoint() const { DCHECK(IsStringInit()); @@ -4154,11 +4166,6 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { return dispatch_info_.method_load_data; } - uint64_t GetDirectCodePtr() const { - DCHECK(HasDirectCodePtr()); - return dispatch_info_.direct_code_ptr; - } - ClinitCheckRequirement GetClinitCheckRequirement() const { return GetPackedField<ClinitCheckRequirementField>(); } @@ -5075,60 +5082,62 @@ class HNullCheck FINAL : public HExpression<1> { DISALLOW_COPY_AND_ASSIGN(HNullCheck); }; +// Embeds an ArtField and all the information required by the compiler. We cache +// that information to avoid requiring the mutator lock every time we need it. class FieldInfo : public ValueObject { public: - FieldInfo(MemberOffset field_offset, + FieldInfo(ArtField* field, + MemberOffset field_offset, Primitive::Type field_type, bool is_volatile, uint32_t index, uint16_t declaring_class_def_index, - const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) - : field_offset_(field_offset), + const DexFile& dex_file) + : field_(field), + field_offset_(field_offset), field_type_(field_type), is_volatile_(is_volatile), index_(index), declaring_class_def_index_(declaring_class_def_index), - dex_file_(dex_file), - dex_cache_(dex_cache) {} + dex_file_(dex_file) {} + ArtField* GetField() const { return field_; } MemberOffset GetFieldOffset() const { return field_offset_; } Primitive::Type GetFieldType() const { return field_type_; } uint32_t GetFieldIndex() const { return index_; } uint16_t GetDeclaringClassDefIndex() const { return declaring_class_def_index_;} const DexFile& GetDexFile() const { return dex_file_; } bool IsVolatile() const { return is_volatile_; } - Handle<mirror::DexCache> GetDexCache() const { return dex_cache_; } private: + ArtField* const field_; const MemberOffset field_offset_; const Primitive::Type field_type_; const bool is_volatile_; const uint32_t index_; const uint16_t declaring_class_def_index_; const DexFile& dex_file_; - const Handle<mirror::DexCache> dex_cache_; }; class HInstanceFieldGet FINAL : public HExpression<1> { public: HInstanceFieldGet(HInstruction* value, + ArtField* field, Primitive::Type field_type, MemberOffset field_offset, bool is_volatile, uint32_t field_idx, uint16_t declaring_class_def_index, const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache, uint32_t dex_pc) : HExpression(field_type, SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc), - field_info_(field_offset, + field_info_(field, + field_offset, field_type, is_volatile, field_idx, declaring_class_def_index, - dex_file, - dex_cache) { + dex_file) { SetRawInputAt(0, value); } @@ -5164,22 +5173,22 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { public: HInstanceFieldSet(HInstruction* object, HInstruction* value, + ArtField* field, Primitive::Type field_type, MemberOffset field_offset, bool is_volatile, uint32_t field_idx, uint16_t declaring_class_def_index, const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache, uint32_t dex_pc) : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc), - field_info_(field_offset, + field_info_(field, + field_offset, field_type, is_volatile, field_idx, declaring_class_def_index, - dex_file, - dex_cache) { + dex_file) { SetPackedFlag<kFlagValueCanBeNull>(true); SetRawInputAt(0, object); SetRawInputAt(1, value); @@ -5537,9 +5546,7 @@ class HLoadClass FINAL : public HInstruction { const DexFile& dex_file, bool is_referrers_class, uint32_t dex_pc, - bool needs_access_check, - bool is_in_dex_cache, - bool is_in_boot_image) + bool needs_access_check) : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc), special_input_(HUserRecord<HInstruction*>(current_method)), type_index_(type_index), @@ -5552,8 +5559,8 @@ class HLoadClass FINAL : public HInstruction { SetPackedField<LoadKindField>( is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod); SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check); - SetPackedFlag<kFlagIsInDexCache>(is_in_dex_cache); - SetPackedFlag<kFlagIsInBootImage>(is_in_boot_image); + SetPackedFlag<kFlagIsInDexCache>(false); + SetPackedFlag<kFlagIsInBootImage>(false); SetPackedFlag<kFlagGenerateClInitCheck>(false); } @@ -5782,41 +5789,31 @@ class HLoadString FINAL : public HInstruction { uint32_t dex_pc) : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc), special_input_(HUserRecord<HInstruction*>(current_method)), - string_index_(string_index) { - SetPackedFlag<kFlagIsInDexCache>(false); + string_index_(string_index), + dex_file_(dex_file) { SetPackedField<LoadKindField>(LoadKind::kDexCacheViaMethod); - load_data_.dex_file_ = &dex_file; } - void SetLoadKindWithAddress(LoadKind load_kind, uint64_t address) { - DCHECK(HasAddress(load_kind)); - load_data_.address = address; - SetLoadKindInternal(load_kind); - } - - void SetLoadKindWithStringReference(LoadKind load_kind, - const DexFile& dex_file, - dex::StringIndex string_index) { - DCHECK(HasStringReference(load_kind)); - load_data_.dex_file_ = &dex_file; - string_index_ = string_index; - SetLoadKindInternal(load_kind); - } + void SetLoadKind(LoadKind load_kind); LoadKind GetLoadKind() const { return GetPackedField<LoadKindField>(); } - const DexFile& GetDexFile() const; + const DexFile& GetDexFile() const { + return dex_file_; + } dex::StringIndex GetStringIndex() const { - DCHECK(HasStringReference(GetLoadKind()) || /* For slow paths. */ !IsInDexCache()); return string_index_; } - uint64_t GetAddress() const { - DCHECK(HasAddress(GetLoadKind())); - return load_data_.address; + Handle<mirror::String> GetString() const { + return string_; + } + + void SetString(Handle<mirror::String> str) { + string_ = str; } bool CanBeMoved() const OVERRIDE { return true; } @@ -5835,7 +5832,7 @@ class HLoadString FINAL : public HInstruction { load_kind == LoadKind::kJitTableAddress) { return false; } - return !IsInDexCache(); + return true; } bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { @@ -5849,15 +5846,6 @@ class HLoadString FINAL : public HInstruction { return SideEffects::CanTriggerGC(); } - bool IsInDexCache() const { return GetPackedFlag<kFlagIsInDexCache>(); } - - void MarkInDexCache() { - SetPackedFlag<kFlagIsInDexCache>(true); - DCHECK(!NeedsEnvironment()); - RemoveEnvironment(); - SetSideEffects(SideEffects::None()); - } - void AddSpecialInput(HInstruction* special_input); using HInstruction::GetInputRecords; // Keep the const version visible. @@ -5873,26 +5861,13 @@ class HLoadString FINAL : public HInstruction { DECLARE_INSTRUCTION(LoadString); private: - static constexpr size_t kFlagIsInDexCache = kNumberOfGenericPackedBits; - static constexpr size_t kFieldLoadKind = kFlagIsInDexCache + 1; + static constexpr size_t kFieldLoadKind = kNumberOfGenericPackedBits; static constexpr size_t kFieldLoadKindSize = MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast)); static constexpr size_t kNumberOfLoadStringPackedBits = kFieldLoadKind + kFieldLoadKindSize; static_assert(kNumberOfLoadStringPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>; - static bool HasStringReference(LoadKind load_kind) { - return load_kind == LoadKind::kBootImageLinkTimeAddress || - load_kind == LoadKind::kBootImageLinkTimePcRelative || - load_kind == LoadKind::kBssEntry || - load_kind == LoadKind::kDexCacheViaMethod || - load_kind == LoadKind::kJitTableAddress; - } - - static bool HasAddress(LoadKind load_kind) { - return load_kind == LoadKind::kBootImageAddress; - } - void SetLoadKindInternal(LoadKind load_kind); // The special input is the HCurrentMethod for kDexCacheViaMethod. @@ -5900,26 +5875,16 @@ class HLoadString FINAL : public HInstruction { // for PC-relative loads, i.e. kDexCachePcRelative or kBootImageLinkTimePcRelative. HUserRecord<HInstruction*> special_input_; - // String index serves also as the hash code and it's also needed for slow-paths, - // so it must not be overwritten with other load data. dex::StringIndex string_index_; + const DexFile& dex_file_; - union { - const DexFile* dex_file_; // For string reference. - uint64_t address; // Up to 64-bit, needed for kDexCacheAddress on 64-bit targets. - } load_data_; + Handle<mirror::String> string_; DISALLOW_COPY_AND_ASSIGN(HLoadString); }; std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs); // Note: defined outside class to see operator<<(., HLoadString::LoadKind). -inline const DexFile& HLoadString::GetDexFile() const { - DCHECK(HasStringReference(GetLoadKind())) << GetLoadKind(); - return *load_data_.dex_file_; -} - -// Note: defined outside class to see operator<<(., HLoadString::LoadKind). inline void HLoadString::AddSpecialInput(HInstruction* special_input) { // The special input is used for PC-relative loads on some architectures, // including literal pool loads, which are PC-relative too. @@ -5970,22 +5935,22 @@ class HClinitCheck FINAL : public HExpression<1> { class HStaticFieldGet FINAL : public HExpression<1> { public: HStaticFieldGet(HInstruction* cls, + ArtField* field, Primitive::Type field_type, MemberOffset field_offset, bool is_volatile, uint32_t field_idx, uint16_t declaring_class_def_index, const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache, uint32_t dex_pc) : HExpression(field_type, SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc), - field_info_(field_offset, + field_info_(field, + field_offset, field_type, is_volatile, field_idx, declaring_class_def_index, - dex_file, - dex_cache) { + dex_file) { SetRawInputAt(0, cls); } @@ -6018,22 +5983,22 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> { public: HStaticFieldSet(HInstruction* cls, HInstruction* value, + ArtField* field, Primitive::Type field_type, MemberOffset field_offset, bool is_volatile, uint32_t field_idx, uint16_t declaring_class_def_index, const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache, uint32_t dex_pc) : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc), - field_info_(field_offset, + field_info_(field, + field_offset, field_type, is_volatile, field_idx, declaring_class_def_index, - dex_file, - dex_cache) { + dex_file) { SetPackedFlag<kFlagValueCanBeNull>(true); SetRawInputAt(0, cls); SetRawInputAt(1, value); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 0d0f62a55c..297500b12f 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -54,6 +54,7 @@ #include "base/timing_logger.h" #include "bounds_check_elimination.h" #include "builder.h" +#include "cha_guard_optimization.h" #include "code_generator.h" #include "compiled_method.h" #include "compiler.h" @@ -517,6 +518,8 @@ static HOptimization* BuildOptimization( return new (arena) SideEffectsAnalysis(graph); } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) { return new (arena) HLoopOptimization(graph, most_recent_induction); + } else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) { + return new (arena) CHAGuardOptimization(graph); #ifdef ART_ENABLE_CODEGEN_arm } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) { return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats); @@ -779,6 +782,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier$before_codegen"); IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats); + CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph); HOptimization* optimizations1[] = { intrinsics, @@ -807,6 +811,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, fold3, // evaluates code generated by dynamic bce simplify3, lse, + cha_guard, dce3, // The codegen has a few assumptions that only the instruction simplifier // can satisfy. For example, the code generator does not expect to see a @@ -1200,7 +1205,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, } MaybeRecordStat(MethodCompilationStat::kCompiled); codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item); - codegen->EmitJitRoots(code_allocator.GetData(), roots, roots_data, dex_cache); + codegen->EmitJitRoots(code_allocator.GetData(), roots, roots_data); const void* code = code_cache->CommitCode( self, diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index 82feb95a2f..e321b9e3aa 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -45,10 +45,6 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } private: - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { - HandleInvoke(invoke); - } - void InitializePCRelativeBasePointer() { // Ensure we only initialize the pointer once. if (base_ != nullptr) { @@ -112,38 +108,6 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { block->ReplaceAndRemoveInstructionWith(switch_insn, mips_switch); } - void HandleInvoke(HInvoke* invoke) { - // If this is an invoke-static/-direct with PC-relative dex cache array - // addressing, we need the PC-relative address base. - HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); - if (invoke_static_or_direct != nullptr) { - HInvokeStaticOrDirect::MethodLoadKind method_load_kind = - invoke_static_or_direct->GetMethodLoadKind(); - HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = - invoke_static_or_direct->GetCodePtrLocation(); - - bool has_extra_input = - (method_load_kind == HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) || - (code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup); - - // We can't add a pointer to the constant area if we already have a current - // method pointer. This may arise when sharpening doesn't remove the current - // method pointer from the invoke. - if (invoke_static_or_direct->HasCurrentMethodInput()) { - DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache()); - CHECK(!has_extra_input); - return; - } - - if (has_extra_input && - !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) { - InitializePCRelativeBasePointer(); - // Add the extra parameter base_. - invoke_static_or_direct->AddSpecialInput(base_); - } - } - } - CodeGeneratorMIPS* codegen_; // The generated HMipsComputeBaseMethodAddress in the entry block needed as an diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index c191c6651f..f8a4469712 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -76,6 +76,7 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { worklist_(worklist), is_first_run_(is_first_run) {} + void VisitDeoptimize(HDeoptimize* deopt) OVERRIDE; void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; void VisitLoadClass(HLoadClass* load_class) OVERRIDE; void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE; @@ -151,38 +152,6 @@ void ReferenceTypePropagation::Visit(HInstruction* instruction) { instruction->Accept(&visitor); } -void ReferenceTypePropagation::Run() { - worklist_.reserve(kDefaultWorklistSize); - - // To properly propagate type info we need to visit in the dominator-based order. - // Reverse post order guarantees a node's dominators are visited first. - // We take advantage of this order in `VisitBasicBlock`. - for (HBasicBlock* block : graph_->GetReversePostOrder()) { - VisitBasicBlock(block); - } - - ProcessWorklist(); - ValidateTypes(); -} - -void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { - RTPVisitor visitor(graph_, hint_dex_cache_, &handle_cache_, &worklist_, is_first_run_); - // Handle Phis first as there might be instructions in the same block who depend on them. - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - VisitPhi(it.Current()->AsPhi()); - } - - // Handle instructions. - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* instr = it.Current(); - instr->Accept(&visitor); - } - - // Add extra nodes to bound types. - BoundTypeForIfNotNull(block); - BoundTypeForIfInstanceOf(block); -} - // Check if we should create a bound type for the given object at the specified // position. Because of inlining and the fact we run RTP more than once and we // might have a HBoundType already. If we do, we should not create a new one. @@ -225,6 +194,153 @@ static bool ShouldCreateBoundType(HInstruction* position, return false; } +// Helper method to bound the type of `receiver` for all instructions dominated +// by `start_block`, or `start_instruction` if `start_block` is null. The new +// bound type will have its upper bound be `class_rti`. +static void BoundTypeIn(HInstruction* receiver, + HBasicBlock* start_block, + HInstruction* start_instruction, + const ReferenceTypeInfo& class_rti) { + // We only need to bound the type if we have uses in the relevant block. + // So start with null and create the HBoundType lazily, only if it's needed. + HBoundType* bound_type = nullptr; + DCHECK(!receiver->IsLoadClass()) << "We should not replace HLoadClass instructions"; + const HUseList<HInstruction*>& uses = receiver->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { + HInstruction* user = it->GetUser(); + size_t index = it->GetIndex(); + // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). + ++it; + bool dominates = (start_instruction != nullptr) + ? start_instruction->StrictlyDominates(user) + : start_block->Dominates(user->GetBlock()); + if (!dominates) { + continue; + } + if (bound_type == nullptr) { + ScopedObjectAccess soa(Thread::Current()); + HInstruction* insert_point = (start_instruction != nullptr) + ? start_instruction->GetNext() + : start_block->GetFirstInstruction(); + if (ShouldCreateBoundType( + insert_point, receiver, class_rti, start_instruction, start_block)) { + bound_type = new (receiver->GetBlock()->GetGraph()->GetArena()) HBoundType(receiver); + bound_type->SetUpperBound(class_rti, /* bound_can_be_null */ false); + start_block->InsertInstructionBefore(bound_type, insert_point); + // To comply with the RTP algorithm, don't type the bound type just yet, it will + // be handled in RTPVisitor::VisitBoundType. + } else { + // We already have a bound type on the position we would need to insert + // the new one. The existing bound type should dominate all the users + // (dchecked) so there's no need to continue. + break; + } + } + user->ReplaceInput(bound_type, index); + } + // If the receiver is a null check, also bound the type of the actual + // receiver. + if (receiver->IsNullCheck()) { + BoundTypeIn(receiver->InputAt(0), start_block, start_instruction, class_rti); + } +} + +// Recognize the patterns: +// if (obj.shadow$_klass_ == Foo.class) ... +// deoptimize if (obj.shadow$_klass_ == Foo.class) +static void BoundTypeForClassCheck(HInstruction* check) { + if (!check->IsIf() && !check->IsDeoptimize()) { + return; + } + HInstruction* compare = check->InputAt(0); + if (!compare->IsEqual() && !compare->IsNotEqual()) { + return; + } + HInstruction* input_one = compare->InputAt(0); + HInstruction* input_two = compare->InputAt(1); + HLoadClass* load_class = input_one->IsLoadClass() + ? input_one->AsLoadClass() + : input_two->AsLoadClass(); + if (load_class == nullptr) { + return; + } + + ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + if (!class_rti.IsValid()) { + // We have loaded an unresolved class. Don't bother bounding the type. + return; + } + + HInstanceFieldGet* field_get = (load_class == input_one) + ? input_two->AsInstanceFieldGet() + : input_one->AsInstanceFieldGet(); + if (field_get == nullptr) { + return; + } + HInstruction* receiver = field_get->InputAt(0); + ReferenceTypeInfo receiver_type = receiver->GetReferenceTypeInfo(); + if (receiver_type.IsExact()) { + // If we already know the receiver type, don't bother updating its users. + return; + } + + { + ScopedObjectAccess soa(Thread::Current()); + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); + DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); + if (field_get->GetFieldInfo().GetField() != field) { + return; + } + } + + if (check->IsIf()) { + HBasicBlock* trueBlock = check->IsEqual() + ? check->AsIf()->IfTrueSuccessor() + : check->AsIf()->IfFalseSuccessor(); + BoundTypeIn(receiver, trueBlock, /* start_instruction */ nullptr, class_rti); + } else { + DCHECK(check->IsDeoptimize()); + if (check->IsEqual()) { + BoundTypeIn(receiver, check->GetBlock(), check, class_rti); + } + } +} + +void ReferenceTypePropagation::Run() { + worklist_.reserve(kDefaultWorklistSize); + + // To properly propagate type info we need to visit in the dominator-based order. + // Reverse post order guarantees a node's dominators are visited first. + // We take advantage of this order in `VisitBasicBlock`. + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + VisitBasicBlock(block); + } + + ProcessWorklist(); + ValidateTypes(); +} + +void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { + RTPVisitor visitor(graph_, hint_dex_cache_, &handle_cache_, &worklist_, is_first_run_); + // Handle Phis first as there might be instructions in the same block who depend on them. + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + VisitPhi(it.Current()->AsPhi()); + } + + // Handle instructions. Since RTP may add HBoundType instructions just after the + // last visited instruction, use `HInstructionIteratorHandleChanges` iterator. + for (HInstructionIteratorHandleChanges it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + instr->Accept(&visitor); + } + + // Add extra nodes to bound types. + BoundTypeForIfNotNull(block); + BoundTypeForIfInstanceOf(block); + BoundTypeForClassCheck(block->GetLastInstruction()); +} + void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) { HIf* ifInstruction = block->GetLastInstruction()->AsIf(); if (ifInstruction == nullptr) { @@ -254,40 +370,14 @@ void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) { // We only need to bound the type if we have uses in the relevant block. // So start with null and create the HBoundType lazily, only if it's needed. - HBoundType* bound_type = nullptr; HBasicBlock* notNullBlock = ifInput->IsNotEqual() ? ifInstruction->IfTrueSuccessor() : ifInstruction->IfFalseSuccessor(); - const HUseList<HInstruction*>& uses = obj->GetUses(); - for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { - HInstruction* user = it->GetUser(); - size_t index = it->GetIndex(); - // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). - ++it; - if (notNullBlock->Dominates(user->GetBlock())) { - if (bound_type == nullptr) { - ScopedObjectAccess soa(Thread::Current()); - HInstruction* insert_point = notNullBlock->GetFirstInstruction(); - ReferenceTypeInfo object_rti = ReferenceTypeInfo::Create( - handle_cache_.GetObjectClassHandle(), /* is_exact */ true); - if (ShouldCreateBoundType(insert_point, obj, object_rti, nullptr, notNullBlock)) { - bound_type = new (graph_->GetArena()) HBoundType(obj); - bound_type->SetUpperBound(object_rti, /* bound_can_be_null */ false); - if (obj->GetReferenceTypeInfo().IsValid()) { - bound_type->SetReferenceTypeInfo(obj->GetReferenceTypeInfo()); - } - notNullBlock->InsertInstructionBefore(bound_type, insert_point); - } else { - // We already have a bound type on the position we would need to insert - // the new one. The existing bound type should dominate all the users - // (dchecked) so there's no need to continue. - break; - } - } - user->ReplaceInput(bound_type, index); - } - } + ReferenceTypeInfo object_rti = ReferenceTypeInfo::Create( + handle_cache_.GetObjectClassHandle(), /* is_exact */ false); + + BoundTypeIn(obj, notNullBlock, /* start_instruction */ nullptr, object_rti); } // Returns true if one of the patterns below has been recognized. If so, the @@ -378,15 +468,10 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); - { - if (!class_rti.IsValid()) { - // He have loaded an unresolved class. Don't bother bounding the type. - return; - } + if (!class_rti.IsValid()) { + // He have loaded an unresolved class. Don't bother bounding the type. + return; } - // We only need to bound the type if we have uses in the relevant block. - // So start with null and create the HBoundType lazily, only if it's needed. - HBoundType* bound_type = nullptr; HInstruction* obj = instanceOf->InputAt(0); if (obj->GetReferenceTypeInfo().IsExact() && !obj->IsPhi()) { @@ -398,31 +483,14 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { // input. return; } - DCHECK(!obj->IsLoadClass()) << "We should not replace HLoadClass instructions"; - const HUseList<HInstruction*>& uses = obj->GetUses(); - for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { - HInstruction* user = it->GetUser(); - size_t index = it->GetIndex(); - // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). - ++it; - if (instanceOfTrueBlock->Dominates(user->GetBlock())) { - if (bound_type == nullptr) { - ScopedObjectAccess soa(Thread::Current()); - HInstruction* insert_point = instanceOfTrueBlock->GetFirstInstruction(); - if (ShouldCreateBoundType(insert_point, obj, class_rti, nullptr, instanceOfTrueBlock)) { - bound_type = new (graph_->GetArena()) HBoundType(obj); - bound_type->SetUpperBound(class_rti, /* InstanceOf fails for null. */ false); - instanceOfTrueBlock->InsertInstructionBefore(bound_type, insert_point); - } else { - // We already have a bound type on the position we would need to insert - // the new one. The existing bound type should dominate all the users - // (dchecked) so there's no need to continue. - break; - } - } - user->ReplaceInput(bound_type, index); + + { + ScopedObjectAccess soa(Thread::Current()); + if (!class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes()) { + class_rti = ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false); } } + BoundTypeIn(obj, instanceOfTrueBlock, /* start_instruction */ nullptr, class_rti); } void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* instr, @@ -462,6 +530,10 @@ void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* inst } } +void ReferenceTypePropagation::RTPVisitor::VisitDeoptimize(HDeoptimize* instr) { + BoundTypeForClassCheck(instr); +} + void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr, dex::TypeIndex type_idx, const DexFile& dex_file, @@ -513,16 +585,9 @@ void ReferenceTypePropagation::RTPVisitor::UpdateFieldAccessTypeInfo(HInstructio ScopedObjectAccess soa(Thread::Current()); ObjPtr<mirror::Class> klass; - // The field index is unknown only during tests. - if (info.GetFieldIndex() != kUnknownFieldIndex) { - ClassLinker* cl = Runtime::Current()->GetClassLinker(); - ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), - MakeObjPtr(info.GetDexCache().Get())); - // TODO: There are certain cases where we can't resolve the field. - // b/21914925 is open to keep track of a repro case for this issue. - if (field != nullptr) { - klass = field->GetType<false>(); - } + // The field is unknown only during tests. + if (info.GetField() != nullptr) { + klass = info.GetField()->GetType<false>(); } SetClassAsTypeInfo(instr, klass, /* is_exact */ false); @@ -605,15 +670,17 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) { // Narrow the type as much as possible. HInstruction* obj = instr->InputAt(0); ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo(); - if (class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes()) { - instr->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ true)); + if (class_rti.IsExact()) { + instr->SetReferenceTypeInfo(class_rti); } else if (obj_rti.IsValid()) { if (class_rti.IsSupertypeOf(obj_rti)) { // Object type is more specific. instr->SetReferenceTypeInfo(obj_rti); } else { - // Upper bound is more specific. + // Upper bound is more specific, or unrelated to the object's type. + // Note that the object might then be exact, and we know the code dominated by this + // bound type is dead. To not confuse potential other optimizations, we mark + // the bound as non-exact. instr->SetReferenceTypeInfo( ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false)); } @@ -644,8 +711,11 @@ void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast if (class_rti.IsValid()) { DCHECK(is_first_run_); + ScopedObjectAccess soa(Thread::Current()); // This is the first run of RTP and class is resolved. - bound_type->SetUpperBound(class_rti, /* CheckCast succeeds for nulls. */ true); + bool is_exact = class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes(); + bound_type->SetUpperBound(ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), is_exact), + /* CheckCast succeeds for nulls. */ true); } else { // This is the first run of RTP and class is unresolved. Remove the binding. // The instruction itself is removed in VisitBoundType so as to not @@ -795,21 +865,25 @@ void ReferenceTypePropagation::RTPVisitor::VisitArrayGet(HArrayGet* instr) { } void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { - ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo(); - if (!new_rti.IsValid()) { + ReferenceTypeInfo input_rti = instr->InputAt(0)->GetReferenceTypeInfo(); + if (!input_rti.IsValid()) { return; // No new info yet. } - // Make sure that we don't go over the bounded type. ReferenceTypeInfo upper_bound_rti = instr->GetUpperBound(); - if (!upper_bound_rti.IsSupertypeOf(new_rti)) { - // Note that the input might be exact, in which case we know the branch leading - // to the bound type is dead. We play it safe by not marking the bound type as - // exact. - bool is_exact = upper_bound_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes(); - new_rti = ReferenceTypeInfo::Create(upper_bound_rti.GetTypeHandle(), is_exact); - } - instr->SetReferenceTypeInfo(new_rti); + if (upper_bound_rti.IsExact()) { + instr->SetReferenceTypeInfo(upper_bound_rti); + } else if (upper_bound_rti.IsSupertypeOf(input_rti)) { + // input is more specific. + instr->SetReferenceTypeInfo(input_rti); + } else { + // upper_bound is more specific or unrelated. + // Note that the object might then be exact, and we know the code dominated by this + // bound type is dead. To not confuse potential other optimizations, we mark + // the bound as non-exact. + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(upper_bound_rti.GetTypeHandle(), /* is_exact */ false)); + } } // NullConstant inputs are ignored during merging as they do not provide any useful information. diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 559f40923b..2227872f76 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -492,7 +492,6 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, HInstruction** input2) { HGraph* graph = CreateGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); - ScopedNullHandle<mirror::DexCache> dex_cache; graph->AddBlock(entry); graph->SetEntryBlock(entry); HInstruction* parameter = new (allocator) HParameterValue( @@ -504,13 +503,13 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, entry->AddSuccessor(block); HInstruction* test = new (allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimBoolean, MemberOffset(22), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0); block->AddInstruction(test); block->AddInstruction(new (allocator) HIf(test)); @@ -531,22 +530,22 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, *phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); join->AddPhi(*phi); *input1 = new (allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimInt, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0); *input2 = new (allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimInt, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0); then->AddInstruction(*input1); else_->AddInstruction(*input2); @@ -654,7 +653,6 @@ static HGraph* BuildFieldReturn(ArenaAllocator* allocator, HInstruction** field, HInstruction** ret) { HGraph* graph = CreateGraph(allocator); - ScopedNullHandle<mirror::DexCache> dex_cache; HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -667,13 +665,13 @@ static HGraph* BuildFieldReturn(ArenaAllocator* allocator, entry->AddSuccessor(block); *field = new (allocator) HInstanceFieldGet(parameter, + nullptr, Primitive::kPrimInt, MemberOffset(42), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph->GetDexFile(), - dex_cache, 0); block->AddInstruction(*field); *ret = new (allocator) HReturn(*field); diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 91efb80015..dc8ee23ba4 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -54,6 +54,24 @@ void HSharpening::Run() { } } +static bool IsInBootImage(ArtMethod* method) { + const std::vector<gc::space::ImageSpace*>& image_spaces = + Runtime::Current()->GetHeap()->GetBootImageSpaces(); + for (gc::space::ImageSpace* image_space : image_spaces) { + const auto& method_section = image_space->GetImageHeader().GetMethodsSection(); + if (method_section.Contains(reinterpret_cast<uint8_t*>(method) - image_space->Begin())) { + return true; + } + } + return false; +} + +static bool AOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& options) { + // Including patch information means the AOT code will be patched, which we don't + // support in the compiler, and is anyways moving away b/33192586. + return IsInBootImage(method) && !options.GetCompilePic() && !options.GetIncludePatchInformation(); +} + void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { if (invoke->IsStringInit()) { // Not using the dex cache arrays. But we could still try to use a better dispatch... @@ -61,68 +79,42 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { return; } - HGraph* outer_graph = codegen_->GetGraph(); - ArtMethod* compiling_method = graph_->GetArtMethod(); + ArtMethod* callee = invoke->GetResolvedMethod(); + DCHECK(callee != nullptr); HInvokeStaticOrDirect::MethodLoadKind method_load_kind; HInvokeStaticOrDirect::CodePtrLocation code_ptr_location; uint64_t method_load_data = 0u; - uint64_t direct_code_ptr = 0u; - if (invoke->GetResolvedMethod() == outer_graph->GetArtMethod()) { - DCHECK(outer_graph->GetArtMethod() != nullptr); + // Note: we never call an ArtMethod through a known code pointer, as + // we do not want to keep on invoking it if it gets deoptimized. This + // applies to both AOT and JIT. + // This also avoids having to find out if the code pointer of an ArtMethod + // is the resolution trampoline (for ensuring the class is initialized), or + // the interpreter entrypoint. Such code pointers we do not want to call + // directly. + // Only in the case of a recursive call can we call directly, as we know the + // class is initialized already or being initialized, and the call will not + // be invoked once the method is deoptimized. + + if (callee == codegen_->GetGraph()->GetArtMethod()) { + // Recursive call. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf; + } else if (Runtime::Current()->UseJitCompilation() || + AOTCanEmbedMethod(callee, codegen_->GetCompilerOptions())) { + // JIT or on-device AOT compilation referencing a boot image method. + // Use the method address directly. + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress; + method_load_data = reinterpret_cast<uintptr_t>(callee); + code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } else { - uintptr_t direct_code, direct_method; - { - ScopedObjectAccess soa(Thread::Current()); - compiler_driver_->GetCodeAndMethodForDirectCall( - (compiling_method == nullptr) ? nullptr : compiling_method->GetDeclaringClass(), - invoke->GetResolvedMethod(), - &direct_code, - &direct_method); - } - if (direct_method != 0u) { // Should we use a direct pointer to the method? - // Note: For JIT, kDirectAddressWithFixup doesn't make sense at all and while - // kDirectAddress would be fine for image methods, we don't support it at the moment. - DCHECK(!Runtime::Current()->UseJitCompilation()); - if (direct_method != static_cast<uintptr_t>(-1)) { // Is the method pointer known now? - method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress; - method_load_data = direct_method; - } else { // The direct pointer will be known at link time. - method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup; - } - } else { // Use dex cache. - if (!Runtime::Current()->UseJitCompilation()) { - // Use PC-relative access to the dex cache arrays. - method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative; - DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen_->GetInstructionSet()), - &graph_->GetDexFile()); - method_load_data = layout.MethodOffset(invoke->GetDexMethodIndex()); - } else { // We must go through the ArtMethod's pointer to resolved methods. - method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; - } - } - if (direct_code != 0u) { // Should we use a direct pointer to the code? - // Note: For JIT, kCallPCRelative and kCallDirectWithFixup don't make sense at all and - // while kCallDirect would be fine for image methods, we don't support it at the moment. - DCHECK(!Runtime::Current()->UseJitCompilation()); - const DexFile* dex_file_of_callee = invoke->GetTargetMethod().dex_file; - if (direct_code != static_cast<uintptr_t>(-1)) { // Is the code pointer known now? - code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirect; - direct_code_ptr = direct_code; - } else if (ContainsElement(compiler_driver_->GetDexFilesForOatFile(), dex_file_of_callee)) { - // Use PC-relative calls for invokes within a multi-dex oat file. - code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative; - } else { // The direct pointer will be known at link time. - // NOTE: This is used for app->boot calls when compiling an app against - // a relocatable but not yet relocated image. - code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup; - } - } else { // We must use the code pointer from the ArtMethod. - code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; - } + // Use PC-relative access to the dex cache arrays. + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative; + DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen_->GetInstructionSet()), + &graph_->GetDexFile()); + method_load_data = layout.MethodOffset(invoke->GetDexMethodIndex()); + code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } if (graph_->IsDebuggable()) { @@ -132,7 +124,7 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { } HInvokeStaticOrDirect::DispatchInfo desired_dispatch_info = { - method_load_kind, code_ptr_location, method_load_data, direct_code_ptr + method_load_kind, code_ptr_location, method_load_data }; HInvokeStaticOrDirect::DispatchInfo dispatch_info = codegen_->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, invoke); @@ -140,6 +132,25 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { } void HSharpening::ProcessLoadClass(HLoadClass* load_class) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + Runtime* runtime = Runtime::Current(); + ClassLinker* class_linker = runtime->GetClassLinker(); + const DexFile& dex_file = load_class->GetDexFile(); + dex::TypeIndex type_index = load_class->GetTypeIndex(); + Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile()) + ? compilation_unit_.GetDexCache() + : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); + mirror::Class* cls = dex_cache->GetResolvedType(type_index); + SharpenClass(load_class, cls, handles_, codegen_, compiler_driver_); +} + +void HSharpening::SharpenClass(HLoadClass* load_class, + mirror::Class* klass, + VariableSizedHandleScope* handles, + CodeGenerator* codegen, + CompilerDriver* compiler_driver) { + ScopedAssertNoThreadSuspension sants("Sharpening class in compiler"); DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod || load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) << load_class->GetLoadKind(); @@ -153,69 +164,60 @@ void HSharpening::ProcessLoadClass(HLoadClass* load_class) { bool is_in_boot_image = false; HLoadClass::LoadKind desired_load_kind = static_cast<HLoadClass::LoadKind>(-1); uint64_t address = 0u; // Class or dex cache element address. - { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<1> hs(soa.Self()); - Runtime* runtime = Runtime::Current(); - ClassLinker* class_linker = runtime->GetClassLinker(); - Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile()) - ? compilation_unit_.GetDexCache() - : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); - mirror::Class* klass = dex_cache->GetResolvedType(type_index); - if (codegen_->GetCompilerOptions().IsBootImage()) { - // Compiling boot image. Check if the class is a boot image class. - DCHECK(!runtime->UseJitCompilation()); - if (!compiler_driver_->GetSupportBootImageFixup()) { - // MIPS64 or compiler_driver_test. Do not sharpen. - desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; - } else if ((klass != nullptr) && compiler_driver_->IsImageClass( - dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) { - is_in_boot_image = true; - is_in_dex_cache = true; - desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic() - ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative - : HLoadClass::LoadKind::kBootImageLinkTimeAddress; - } else { - // Not a boot image class. We must go through the dex cache. - DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)); - desired_load_kind = HLoadClass::LoadKind::kDexCachePcRelative; - } + Runtime* runtime = Runtime::Current(); + if (codegen->GetCompilerOptions().IsBootImage()) { + // Compiling boot image. Check if the class is a boot image class. + DCHECK(!runtime->UseJitCompilation()); + if (!compiler_driver->GetSupportBootImageFixup()) { + // MIPS64 or compiler_driver_test. Do not sharpen. + desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + } else if ((klass != nullptr) && compiler_driver->IsImageClass( + dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) { + is_in_boot_image = true; + is_in_dex_cache = true; + desired_load_kind = codegen->GetCompilerOptions().GetCompilePic() + ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative + : HLoadClass::LoadKind::kBootImageLinkTimeAddress; } else { - is_in_boot_image = (klass != nullptr) && runtime->GetHeap()->ObjectIsInBootImageSpace(klass); - if (runtime->UseJitCompilation()) { - // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. - // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); - is_in_dex_cache = (klass != nullptr); - if (is_in_boot_image) { - // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787 - desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; - address = reinterpret_cast64<uint64_t>(klass); - } else if (is_in_dex_cache) { - desired_load_kind = HLoadClass::LoadKind::kJitTableAddress; - // We store in the address field the location of the stack reference maintained - // by the handle. We do this now so that the code generation does not need to figure - // out which class loader to use. - address = reinterpret_cast<uint64_t>(handles_->NewHandle(klass).GetReference()); - } else { - // Class not loaded yet. This happens when the dex code requesting - // this `HLoadClass` hasn't been executed in the interpreter. - // Fallback to the dex cache. - // TODO(ngeoffray): Generate HDeoptimize instead. - desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; - } - } else if (is_in_boot_image && !codegen_->GetCompilerOptions().GetCompilePic()) { - // AOT app compilation. Check if the class is in the boot image. + // Not a boot image class. We must go through the dex cache. + DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file)); + desired_load_kind = HLoadClass::LoadKind::kDexCachePcRelative; + } + } else { + is_in_boot_image = (klass != nullptr) && runtime->GetHeap()->ObjectIsInBootImageSpace(klass); + if (runtime->UseJitCompilation()) { + // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. + // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); + is_in_dex_cache = (klass != nullptr); + if (is_in_boot_image) { + // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787 desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; address = reinterpret_cast64<uint64_t>(klass); + } else if (is_in_dex_cache) { + desired_load_kind = HLoadClass::LoadKind::kJitTableAddress; + // We store in the address field the location of the stack reference maintained + // by the handle. We do this now so that the code generation does not need to figure + // out which class loader to use. + address = reinterpret_cast<uint64_t>(handles->NewHandle(klass).GetReference()); } else { - // Not JIT and either the klass is not in boot image or we are compiling in PIC mode. - // Use PC-relative load from the dex cache if the dex file belongs - // to the oat file that we're currently compiling. - desired_load_kind = - ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &load_class->GetDexFile()) - ? HLoadClass::LoadKind::kDexCachePcRelative - : HLoadClass::LoadKind::kDexCacheViaMethod; + // Class not loaded yet. This happens when the dex code requesting + // this `HLoadClass` hasn't been executed in the interpreter. + // Fallback to the dex cache. + // TODO(ngeoffray): Generate HDeoptimize instead. + desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; } + } else if (is_in_boot_image && !codegen->GetCompilerOptions().GetCompilePic()) { + // AOT app compilation. Check if the class is in the boot image. + desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; + address = reinterpret_cast64<uint64_t>(klass); + } else { + // Not JIT and either the klass is not in boot image or we are compiling in PIC mode. + // Use PC-relative load from the dex cache if the dex file belongs + // to the oat file that we're currently compiling. + desired_load_kind = + ContainsElement(compiler_driver->GetDexFilesForOatFile(), &load_class->GetDexFile()) + ? HLoadClass::LoadKind::kDexCachePcRelative + : HLoadClass::LoadKind::kDexCacheViaMethod; } } DCHECK_NE(desired_load_kind, static_cast<HLoadClass::LoadKind>(-1)); @@ -241,7 +243,7 @@ void HSharpening::ProcessLoadClass(HLoadClass* load_class) { load_class->MarkInDexCache(); } - HLoadClass::LoadKind load_kind = codegen_->GetSupportedLoadClassKind(desired_load_kind); + HLoadClass::LoadKind load_kind = codegen->GetSupportedLoadClassKind(desired_load_kind); switch (load_kind) { case HLoadClass::LoadKind::kBootImageLinkTimeAddress: case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: @@ -254,7 +256,7 @@ void HSharpening::ProcessLoadClass(HLoadClass* load_class) { load_class->SetLoadKindWithAddress(load_kind, address); break; case HLoadClass::LoadKind::kDexCachePcRelative: { - PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet()); + PointerSize pointer_size = InstructionSetPointerSize(codegen->GetInstructionSet()); DexCacheArraysLayout layout(pointer_size, &dex_file); size_t element_index = layout.TypeOffset(type_index); load_class->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index); @@ -268,13 +270,11 @@ void HSharpening::ProcessLoadClass(HLoadClass* load_class) { void HSharpening::ProcessLoadString(HLoadString* load_string) { DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod); - DCHECK(!load_string->IsInDexCache()); const DexFile& dex_file = load_string->GetDexFile(); dex::StringIndex string_index = load_string->GetStringIndex(); HLoadString::LoadKind desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; - uint64_t address = 0u; // String or dex cache element address. { Runtime* runtime = Runtime::Current(); ClassLinker* class_linker = runtime->GetClassLinker(); @@ -283,12 +283,13 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile()) ? compilation_unit_.GetDexCache() : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); + mirror::String* string = nullptr; if (codegen_->GetCompilerOptions().IsBootImage()) { // Compiling boot image. Resolve the string and allocate it if needed, to ensure // the string will be added to the boot image. DCHECK(!runtime->UseJitCompilation()); - mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache); + string = class_linker->ResolveString(dex_file, string_index, dex_cache); CHECK(string != nullptr); if (compiler_driver_->GetSupportBootImageFixup()) { DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)); @@ -302,43 +303,32 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { } else if (runtime->UseJitCompilation()) { // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); - mirror::String* string = class_linker->LookupString(dex_file, string_index, dex_cache); + string = class_linker->LookupString(dex_file, string_index, dex_cache); if (string != nullptr) { if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { desired_load_kind = HLoadString::LoadKind::kBootImageAddress; - address = reinterpret_cast64<uint64_t>(string); } else { desired_load_kind = HLoadString::LoadKind::kJitTableAddress; } } } else { // AOT app compilation. Try to lookup the string without allocating if not found. - mirror::String* string = class_linker->LookupString(dex_file, string_index, dex_cache); + string = class_linker->LookupString(dex_file, string_index, dex_cache); if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string) && !codegen_->GetCompilerOptions().GetCompilePic()) { desired_load_kind = HLoadString::LoadKind::kBootImageAddress; - address = reinterpret_cast64<uint64_t>(string); } else { desired_load_kind = HLoadString::LoadKind::kBssEntry; } } + if (string != nullptr) { + load_string->SetString(handles_->NewHandle(string)); + } } HLoadString::LoadKind load_kind = codegen_->GetSupportedLoadStringKind(desired_load_kind); - switch (load_kind) { - case HLoadString::LoadKind::kBootImageLinkTimeAddress: - case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBssEntry: - case HLoadString::LoadKind::kDexCacheViaMethod: - case HLoadString::LoadKind::kJitTableAddress: - load_string->SetLoadKindWithStringReference(load_kind, dex_file, string_index); - break; - case HLoadString::LoadKind::kBootImageAddress: - DCHECK_NE(address, 0u); - load_string->SetLoadKindWithAddress(load_kind, address); - break; - } + load_string->SetLoadKind(load_kind); } } // namespace art diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index 74189549fd..ae5ccb33ab 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -47,6 +47,14 @@ class HSharpening : public HOptimization { static constexpr const char* kSharpeningPassName = "sharpening"; + // Used internally but also by the inliner. + static void SharpenClass(HLoadClass* load_class, + mirror::Class* klass, + VariableSizedHandleScope* handles, + CodeGenerator* codegen, + CompilerDriver* compiler_driver) + REQUIRES_SHARED(Locks::mutator_lock_); + private: void ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke); void ProcessLoadClass(HLoadClass* load_class); |