diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 107 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.h | 32 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 3 | ||||
| -rw-r--r-- | compiler/optimizing/dex_cache_array_fixups_arm.cc | 92 | ||||
| -rw-r--r-- | compiler/optimizing/dex_cache_array_fixups_arm.h | 37 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.cc | 53 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.h | 16 | ||||
| -rw-r--r-- | compiler/optimizing/nodes_arm.h | 59 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 15 | ||||
| -rw-r--r-- | compiler/optimizing/pc_relative_fixups_x86.cc | 28 | ||||
| -rw-r--r-- | compiler/optimizing/reference_type_propagation.cc | 103 | ||||
| -rw-r--r-- | compiler/optimizing/reference_type_propagation.h | 1 |
12 files changed, 512 insertions, 34 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index a98d9c68b7..76bf951a47 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -724,7 +724,9 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + dex_cache_arrays_base_labels_(std::less<HArmDexCacheArraysBase*>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Always save the LR register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(LR)); } @@ -1922,10 +1924,18 @@ void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok codegen_->GetAssembler(), codegen_->GetInstructionSetFeatures()); if (intrinsic.TryDispatch(invoke)) { + if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); + } return; } HandleInvoke(invoke); + + // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. + if (invoke->HasPcRelativeDexCache()) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); + } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) { @@ -5818,16 +5828,6 @@ void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction, HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method) { - if (desired_dispatch_info.method_load_kind == - HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) { - // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod. - return HInvokeStaticOrDirect::DispatchInfo { - HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u, - 0u - }; - } if (desired_dispatch_info.code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) { const DexFile& outer_dex_file = GetGraph()->GetDexFile(); @@ -5850,6 +5850,32 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOr return desired_dispatch_info; } +Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, + Register temp) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); + if (!invoke->GetLocations()->Intrinsified()) { + return location.AsRegister<Register>(); + } + // For intrinsics we allow any location, so it may be on the stack. + if (!location.IsRegister()) { + __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex()); + return temp; + } + // For register locations, check if the register was saved. If so, get it from the stack. + // Note: There is a chance that the register was saved but not overwritten, so we could + // save one load. However, since this is just an intrinsic slow path we prefer this + // simple and more robust approach rather that trying to determine if that's the case. + SlowPathCode* slow_path = GetCurrentSlowPath(); + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { + int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); + __ LoadFromOffset(kLoadWord, temp, SP, stack_offset); + return temp; + } + return location.AsRegister<Register>(); +} + void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { // For better instruction scheduling we load the direct code pointer before the method pointer. switch (invoke->GetCodePtrLocation()) { @@ -5881,11 +5907,15 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ LoadLiteral(temp.AsRegister<Register>(), DeduplicateMethodAddressLiteral(invoke->GetTargetMethod())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - // TODO: Implement this type. - // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch(). - LOG(FATAL) << "Unsupported"; - UNREACHABLE(); + case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + HArmDexCacheArraysBase* base = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase(); + Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, + temp.AsRegister<Register>()); + int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset(); + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register method_reg; @@ -5970,7 +6000,11 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); - size_t size = method_patches_.size() + call_patches_.size() + relative_call_patches_.size(); + size_t size = + method_patches_.size() + + call_patches_.size() + + relative_call_patches_.size() + + /* MOVW+MOVT for each base */ 2u * dex_cache_arrays_base_labels_.size(); linker_patches->reserve(size); for (const auto& entry : method_patches_) { const MethodReference& target_method = entry.first; @@ -5996,6 +6030,28 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche info.target_method.dex_file, info.target_method.dex_method_index)); } + for (const auto& pair : dex_cache_arrays_base_labels_) { + HArmDexCacheArraysBase* base = pair.first; + const DexCacheArraysBaseLabels* labels = &pair.second; + const DexFile& dex_file = base->GetDexFile(); + size_t base_element_offset = base->GetElementOffset(); + DCHECK(labels->add_pc_label.IsBound()); + uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(labels->add_pc_label.Position()); + // Add MOVW patch. + DCHECK(labels->movw_label.IsBound()); + uint32_t movw_offset = dchecked_integral_cast<uint32_t>(labels->movw_label.Position()); + linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset, + &dex_file, + add_pc_offset, + base_element_offset)); + // Add MOVT patch. + DCHECK(labels->movt_label.IsBound()); + uint32_t movt_offset = dchecked_integral_cast<uint32_t>(labels->movt_label.Position()); + linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset, + &dex_file, + add_pc_offset, + base_element_offset)); + } } Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method, @@ -6107,6 +6163,23 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) } } +void LocationsBuilderARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base); + locations->SetOut(Location::RequiresRegister()); + codegen_->AddDexCacheArraysBase(base); +} + +void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { + Register base_reg = base->GetLocations()->Out().AsRegister<Register>(); + CodeGeneratorARM::DexCacheArraysBaseLabels* labels = codegen_->GetDexCacheArraysBaseLabels(base); + __ BindTrackedLabel(&labels->movw_label); + __ movw(base_reg, 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(base_reg, 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(base_reg, base_reg, ShifterOperand(PC)); +} + void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) { if (!trg.IsValid()) { DCHECK(type == Primitive::kPrimVoid); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 89de4f801d..193add2541 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -373,6 +373,31 @@ class CodeGeneratorARM : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + // The PC-relative base address is loaded with three instructions, MOVW+MOVT + // to load the offset to base_reg and then ADD base_reg, PC. The offset is + // calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we + // currently emit these 3 instructions together, instruction scheduling could + // split this sequence apart, so we keep separate labels for each of them. + struct DexCacheArraysBaseLabels { + DexCacheArraysBaseLabels() = default; + DexCacheArraysBaseLabels(DexCacheArraysBaseLabels&& other) = default; + + Label movw_label; + Label movt_label; + Label add_pc_label; + }; + + void AddDexCacheArraysBase(HArmDexCacheArraysBase* base) { + DexCacheArraysBaseLabels labels; + dex_cache_arrays_base_labels_.Put(base, std::move(labels)); + } + + DexCacheArraysBaseLabels* GetDexCacheArraysBaseLabels(HArmDexCacheArraysBase* base) { + auto it = dex_cache_arrays_base_labels_.find(base); + DCHECK(it != dex_cache_arrays_base_labels_.end()); + return &it->second; + } + // Generate a read barrier for a heap reference within `instruction`. // // A read barrier for an object reference read from the heap is @@ -419,7 +444,12 @@ class CodeGeneratorARM : public CodeGenerator { void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); private: + Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); + using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; + using DexCacheArraysBaseToLabelsMap = ArenaSafeMap<HArmDexCacheArraysBase*, + DexCacheArraysBaseLabels, + std::less<HArmDexCacheArraysBase*>>; Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); Literal* DeduplicateMethodAddressLiteral(MethodReference target_method); @@ -441,6 +471,8 @@ class CodeGeneratorARM : public CodeGenerator { // Using ArenaDeque<> which retains element addresses on push/emplace_back(). ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; + DexCacheArraysBaseToLabelsMap dex_cache_arrays_base_labels_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM); }; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 1fc09a81bc..a0d31dad04 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1929,8 +1929,7 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. if (invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), - Location::RequiresRegister()); + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); } if (codegen_->IsBaseline()) { diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc new file mode 100644 index 0000000000..65820630f8 --- /dev/null +++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dex_cache_array_fixups_arm.h" + +#include "base/arena_containers.h" +#include "utils/dex_cache_arrays_layout-inl.h" + +namespace art { +namespace arm { + +/** + * Finds instructions that need the dex cache arrays base as an input. + */ +class DexCacheArrayFixupsVisitor : public HGraphVisitor { + public: + explicit DexCacheArrayFixupsVisitor(HGraph* graph) + : HGraphVisitor(graph), + dex_cache_array_bases_(std::less<const DexFile*>(), + // Attribute memory use to code generator. + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} + + void MoveBasesIfNeeded() { + for (const auto& entry : dex_cache_array_bases_) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + HArmDexCacheArraysBase* base = entry.second; + base->MoveBeforeFirstUserAndOutOfLoops(); + } + } + + private: + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + // If this is an invoke with PC-relative access to the dex cache methods array, + // we need to add the dex cache arrays base as the special input. + if (invoke->HasPcRelativeDexCache()) { + // Initialize base for target method dex file if needed. + MethodReference target_method = invoke->GetTargetMethod(); + HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file); + // Update the element offset in base. + DexCacheArraysLayout layout(kArmPointerSize, target_method.dex_file); + base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index)); + // Add the special argument base to the method. + DCHECK(!invoke->HasCurrentMethodInput()); + invoke->AddSpecialInput(base); + } + } + + HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) { + // Ensure we only initialize the pointer once for each dex file. + auto lb = dex_cache_array_bases_.lower_bound(&dex_file); + if (lb != dex_cache_array_bases_.end() && + !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) { + return lb->second; + } + + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction()); + dex_cache_array_bases_.PutBefore(lb, &dex_file, base); + return base; + } + + using DexCacheArraysBaseMap = + ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>; + DexCacheArraysBaseMap dex_cache_array_bases_; +}; + +void DexCacheArrayFixups::Run() { + DexCacheArrayFixupsVisitor visitor(graph_); + visitor.VisitInsertionOrder(); + visitor.MoveBasesIfNeeded(); +} + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h new file mode 100644 index 0000000000..015f910328 --- /dev/null +++ b/compiler/optimizing/dex_cache_array_fixups_arm.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_ +#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { +namespace arm { + +class DexCacheArrayFixups : public HOptimization { + public: + DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats) + : HOptimization(graph, "dex_cache_array_fixups_arm", stats) {} + + void Run() OVERRIDE; +}; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_ diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index b5ac773505..847d147b69 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1177,6 +1177,59 @@ void HInstruction::MoveBefore(HInstruction* cursor) { } } +void HInstruction::MoveBeforeFirstUserAndOutOfLoops() { + DCHECK(!CanThrow()); + DCHECK(!HasSideEffects()); + DCHECK(!HasEnvironmentUses()); + DCHECK(HasNonEnvironmentUses()); + DCHECK(!IsPhi()); // Makes no sense for Phi. + DCHECK_EQ(InputCount(), 0u); + + // Find the target block. + HUseIterator<HInstruction*> uses_it(GetUses()); + HBasicBlock* target_block = uses_it.Current()->GetUser()->GetBlock(); + uses_it.Advance(); + while (!uses_it.Done() && uses_it.Current()->GetUser()->GetBlock() == target_block) { + uses_it.Advance(); + } + if (!uses_it.Done()) { + // This instruction has uses in two or more blocks. Find the common dominator. + CommonDominator finder(target_block); + for (; !uses_it.Done(); uses_it.Advance()) { + finder.Update(uses_it.Current()->GetUser()->GetBlock()); + } + target_block = finder.Get(); + DCHECK(target_block != nullptr); + } + // Move to the first dominator not in a loop. + while (target_block->IsInLoop()) { + target_block = target_block->GetDominator(); + DCHECK(target_block != nullptr); + } + + // Find insertion position. + HInstruction* insert_pos = nullptr; + for (HUseIterator<HInstruction*> uses_it2(GetUses()); !uses_it2.Done(); uses_it2.Advance()) { + if (uses_it2.Current()->GetUser()->GetBlock() == target_block && + (insert_pos == nullptr || uses_it2.Current()->GetUser()->StrictlyDominates(insert_pos))) { + insert_pos = uses_it2.Current()->GetUser(); + } + } + if (insert_pos == nullptr) { + // No user in `target_block`, insert before the control flow instruction. + insert_pos = target_block->GetLastInstruction(); + DCHECK(insert_pos->IsControlFlow()); + // Avoid splitting HCondition from HIf to prevent unnecessary materialization. + if (insert_pos->IsIf()) { + HInstruction* if_input = insert_pos->AsIf()->InputAt(0); + if (if_input == insert_pos->GetPrevious()) { + insert_pos = if_input; + } + } + } + MoveBefore(insert_pos); +} + HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) { DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; DCHECK_EQ(cursor->GetBlock(), this); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index d5110a7172..441aa0493a 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1095,7 +1095,12 @@ class HLoopInformationOutwardIterator : public ValueObject { M(UShr, BinaryOperation) \ M(Xor, BinaryOperation) \ +#ifndef ART_ENABLE_CODEGEN_arm #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) +#else +#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) \ + M(ArmDexCacheArraysBase, Instruction) +#endif #ifndef ART_ENABLE_CODEGEN_arm64 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) @@ -1952,6 +1957,14 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // Move `this` instruction before `cursor`. void MoveBefore(HInstruction* cursor); + // Move `this` before its first user and out of any loops. If there is no + // out-of-loop user that dominates all other users, move the instruction + // to the end of the out-of-loop common dominator of the user's blocks. + // + // This can be used only on non-throwing instructions with no side effects that + // have at least one use but no environment uses. + void MoveBeforeFirstUserAndOutOfLoops(); + #define INSTRUCTION_TYPE_CHECK(type, super) \ bool Is##type() const { return (As##type() != nullptr); } \ virtual const H##type* As##type() const { return nullptr; } \ @@ -5566,6 +5579,9 @@ class HParallelMove : public HTemplateInstruction<0> { } // namespace art +#ifdef ART_ENABLE_CODEGEN_arm +#include "nodes_arm.h" +#endif #ifdef ART_ENABLE_CODEGEN_arm64 #include "nodes_arm64.h" #endif diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h new file mode 100644 index 0000000000..6a1dbb9e70 --- /dev/null +++ b/compiler/optimizing/nodes_arm.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM_H_ +#define ART_COMPILER_OPTIMIZING_NODES_ARM_H_ + +namespace art { + +class HArmDexCacheArraysBase : public HExpression<0> { + public: + explicit HArmDexCacheArraysBase(const DexFile& dex_file) + : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc), + dex_file_(&dex_file), + element_offset_(static_cast<size_t>(-1)) { } + + void UpdateElementOffset(size_t element_offset) { + // Use the lowest offset from the requested elements so that all offsets from + // this base are non-negative because our assemblers emit negative-offset loads + // as a sequence of two or more instructions. (However, positive offsets beyond + // 4KiB also require two or more instructions, so this simple heuristic could + // be improved for cases where there is a dense cluster of elements far from + // the lowest offset. This is expected to be rare enough though, so we choose + // not to spend compile time on elaborate calculations.) + element_offset_ = std::min(element_offset_, element_offset); + } + + const DexFile& GetDexFile() const { + return *dex_file_; + } + + size_t GetElementOffset() const { + return element_offset_; + } + + DECLARE_INSTRUCTION(ArmDexCacheArraysBase); + + private: + const DexFile* dex_file_; + size_t element_offset_; + + DISALLOW_COPY_AND_ASSIGN(HArmDexCacheArraysBase); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_NODES_ARM_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 8440813a87..4ee7fca760 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -20,6 +20,10 @@ #include <stdint.h> #ifdef ART_ENABLE_CODEGEN_arm64 +#include "dex_cache_array_fixups_arm.h" +#endif + +#ifdef ART_ENABLE_CODEGEN_arm64 #include "instruction_simplifier_arm64.h" #endif @@ -435,6 +439,17 @@ static void RunArchOptimizations(InstructionSet instruction_set, PassObserver* pass_observer) { ArenaAllocator* arena = graph->GetArena(); switch (instruction_set) { +#ifdef ART_ENABLE_CODEGEN_arm + case kThumb2: + case kArm: { + arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats); + HOptimization* arm_optimizations[] = { + fixups + }; + RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer); + break; + } +#endif #ifdef ART_ENABLE_CODEGEN_arm64 case kArm64: { arm64::InstructionSimplifierArm64* simplifier = diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 808a1dc6c2..b383f1e1ad 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -26,6 +26,15 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { public: explicit PCRelativeHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {} + void MoveBaseIfNeeded() { + if (base_ != nullptr) { + // Bring the base closer to the first use (previously, it was in the + // entry block) and relieve some pressure on the register allocator + // while avoiding recalculation of the base in a loop. + base_->MoveBeforeFirstUserAndOutOfLoops(); + } + } + private: void VisitAdd(HAdd* add) OVERRIDE { BinaryFP(add); @@ -72,7 +81,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to // address the constant area. - InitializePCRelativeBasePointer(switch_insn); + InitializePCRelativeBasePointer(); HGraph* graph = GetGraph(); HBasicBlock* block = switch_insn->GetBlock(); HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch( @@ -84,22 +93,22 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch); } - void InitializePCRelativeBasePointer(HInstruction* user) { + void InitializePCRelativeBasePointer() { // Ensure we only initialize the pointer once. if (base_ != nullptr) { return; } - HGraph* graph = GetGraph(); - HBasicBlock* entry = graph->GetEntryBlock(); - base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress(); - HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction(); - entry->InsertInstructionBefore(base_, insert_pos); + // Insert the base at the start of the entry block, move it to a better + // position later in MoveBaseIfNeeded(). + base_ = new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress(); + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction()); DCHECK(base_ != nullptr); } void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) { - InitializePCRelativeBasePointer(insn); + InitializePCRelativeBasePointer(); HX86LoadFromConstantTable* load_constant = new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value, materialize); insn->GetBlock()->InsertInstructionBefore(load_constant, insn); @@ -111,7 +120,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { // addressing, we need the PC-relative address base. HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) { - InitializePCRelativeBasePointer(invoke); + InitializePCRelativeBasePointer(); // Add the extra parameter base_. DCHECK(!invoke_static_or_direct->HasCurrentMethodInput()); invoke_static_or_direct->AddSpecialInput(base_); @@ -133,6 +142,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void PcRelativeFixups::Run() { PCRelativeHandlerVisitor visitor(graph_); visitor.VisitInsertionOrder(); + visitor.MoveBaseIfNeeded(); } } // namespace x86 diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 0d05c49fc5..bcdbeecfd7 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -127,6 +127,87 @@ void ReferenceTypePropagation::ValidateTypes() { } } +static void CheckHasNoTypedInputs(HInstruction* root_instr) { + ArenaAllocatorAdapter<void> adapter = + root_instr->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocReferenceTypePropagation); + + ArenaVector<HPhi*> visited_phis(adapter); + ArenaVector<HInstruction*> worklist(adapter); + worklist.push_back(root_instr); + + while (!worklist.empty()) { + HInstruction* instr = worklist.back(); + worklist.pop_back(); + + if (instr->IsPhi() || instr->IsBoundType() || instr->IsNullCheck()) { + // Expect that both `root_instr` and its inputs have invalid RTI. + ScopedObjectAccess soa(Thread::Current()); + DCHECK(!instr->GetReferenceTypeInfo().IsValid()) << "Instruction should not have valid RTI."; + + // Insert all unvisited inputs to the worklist. + for (HInputIterator it(instr); !it.Done(); it.Advance()) { + HInstruction* input = it.Current(); + if (input->IsPhi()) { + if (ContainsElement(visited_phis, input->AsPhi())) { + continue; + } else { + visited_phis.push_back(input->AsPhi()); + } + } + worklist.push_back(input); + } + } else if (instr->IsNullConstant()) { + // The only input of `root_instr` allowed to have valid RTI because it is ignored. + } else { + LOG(FATAL) << "Unexpected input " << instr->DebugName() << instr->GetId() << " with RTI " + << instr->GetReferenceTypeInfo(); + UNREACHABLE(); + } + } +} + +template<typename Functor> +static void ForEachUntypedInstruction(HGraph* graph, Functor fn) { + ScopedObjectAccess soa(Thread::Current()); + for (HReversePostOrderIterator block_it(*graph); !block_it.Done(); block_it.Advance()) { + for (HInstructionIterator it(block_it.Current()->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) { + fn(instr); + } + } + for (HInstructionIterator it(block_it.Current()->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) { + fn(instr); + } + } + } +} + +void ReferenceTypePropagation::SetUntypedInstructionsToObject() { + // In some cases, the fix-point iteration will leave kPrimNot instructions with + // invalid RTI because bytecode does not provide enough typing information. + // Set the RTI of such instructions to Object. + // Example: + // MyClass a = null, b = null; + // while (a == null) { + // if (cond) { a = b; } else { b = a; } + // } + + if (kIsDebugBuild) { + // Test that if we are going to set RTI from invalid to Object, that + // instruction did not have any typed instructions in its def-use chain + // and therefore its type could not be inferred. + ForEachUntypedInstruction(graph_, [](HInstruction* instr) { CheckHasNoTypedInputs(instr); }); + } + + ReferenceTypeInfo obj_rti = ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false); + ForEachUntypedInstruction(graph_, [obj_rti](HInstruction* instr) { + instr->SetReferenceTypeInfo(obj_rti); + }); +} + void ReferenceTypePropagation::Run() { // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. @@ -136,6 +217,7 @@ void ReferenceTypePropagation::Run() { } ProcessWorklist(); + SetUntypedInstructionsToObject(); ValidateTypes(); } @@ -534,8 +616,9 @@ void RTPVisitor::VisitLoadException(HLoadException* instr) { void RTPVisitor::VisitNullCheck(HNullCheck* instr) { ScopedObjectAccess soa(Thread::Current()); ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo(); - DCHECK(parent_rti.IsValid()); - instr->SetReferenceTypeInfo(parent_rti); + if (parent_rti.IsValid()) { + instr->SetReferenceTypeInfo(parent_rti); + } } void RTPVisitor::VisitFakeString(HFakeString* instr) { @@ -588,11 +671,16 @@ void ReferenceTypePropagation::VisitPhi(HPhi* phi) { } if (phi->GetBlock()->IsLoopHeader()) { + ScopedObjectAccess soa(Thread::Current()); // Set the initial type for the phi. Use the non back edge input for reaching // a fixed point faster. + HInstruction* first_input = phi->InputAt(0); + ReferenceTypeInfo first_input_rti = first_input->GetReferenceTypeInfo(); + if (first_input_rti.IsValid() && !first_input->IsNullConstant()) { + phi->SetCanBeNull(first_input->CanBeNull()); + phi->SetReferenceTypeInfo(first_input_rti); + } AddToWorklist(phi); - phi->SetCanBeNull(phi->InputAt(0)->CanBeNull()); - phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo()); } else { // Eagerly compute the type of the phi, for quicker convergence. Note // that we don't need to add users to the worklist because we are @@ -683,7 +771,7 @@ bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { instr->SetReferenceTypeInfo(parent_rti); } } else if (instr->IsArrayGet()) { - // TODO: consider if it's worth "looking back" and bounding the input object + // TODO: consider if it's worth "looking back" and binding the input object // to an array type. UpdateArrayGet(instr->AsArrayGet(), handles_, object_class_handle_); } else { @@ -770,7 +858,10 @@ void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { } } } - instr->SetReferenceTypeInfo(new_rti); + + if (new_rti.IsValid()) { + instr->SetReferenceTypeInfo(new_rti); + } } // Re-computes and updates the nullability of the instruction. Returns whether or diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 5c05592726..21789e1331 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -57,6 +57,7 @@ class ReferenceTypePropagation : public HOptimization { SHARED_REQUIRES(Locks::mutator_lock_); void ValidateTypes(); + void SetUntypedInstructionsToObject(); StackHandleScopeCollection* handles_; |